Things You (Might) Forget about R
Totally unrelated notes
I do forget a lot. One is not batman.
pivot
Pivot data longer while re-coding some similar column names into the same one
https://stackoverflow.com/questions/65371189/pivot-longer-by-group-prefix
<- data.frame(
df date = rep(seq(as.Date("2020-01-01"),as.Date("2020-01-05"),by="day"),each = 6),
k = rep(c("A.mean","A.median","A.min","B.mean","B.median","B.min"),5),
v = runif(30,0,50)
%>%
) pivot_wider(names_from = k, values_from = v)
# df.head() in python / or use view(df) in r
head(df) %>% knitr::kable()
date | A.mean | A.median | A.min | B.mean | B.median | B.min |
---|---|---|---|---|---|---|
2020-01-01 | 48.732622 | 42.63542 | 48.28532 | 26.983021 | 3.189964 | 3.841179 |
2020-01-02 | 1.283834 | 46.04468 | 42.86398 | 45.522767 | 49.894911 | 38.187536 |
2020-01-03 | 6.784811 | 9.45290 | 30.71775 | 47.538903 | 49.314453 | 1.333299 |
2020-01-04 | 32.269100 | 27.61920 | 14.12190 | 24.186885 | 48.072475 | 43.217502 |
2020-01-05 | 32.884956 | 24.21991 | 21.60856 | 3.824533 | 36.382598 | 26.704129 |
<- pivot_longer(df, -date, names_sep = "\\.", names_to = c("k", "stat"))
temp
<- pivot_wider(temp, id_cols = c("date", "k"), names_from = "stat", values_from = "value")
answer
%>%
df pivot_longer(-date, names_sep = "\\.", names_to = c("k", "stat")) %>%
pivot_wider(id_cols = c("date", "k"), names_from = "stat", values_from = "value") %>%
::kable() knitr
date | k | mean | median | min |
---|---|---|---|---|
2020-01-01 | A | 48.732622 | 42.635422 | 48.285325 |
2020-01-01 | B | 26.983021 | 3.189964 | 3.841179 |
2020-01-02 | A | 1.283834 | 46.044676 | 42.863984 |
2020-01-02 | B | 45.522767 | 49.894911 | 38.187536 |
2020-01-03 | A | 6.784811 | 9.452900 | 30.717747 |
2020-01-03 | B | 47.538903 | 49.314453 | 1.333299 |
2020-01-04 | A | 32.269100 | 27.619201 | 14.121901 |
2020-01-04 | B | 24.186885 | 48.072475 | 43.217502 |
2020-01-05 | A | 32.884956 | 24.219914 | 21.608557 |
2020-01-05 | B | 3.824533 | 36.382598 | 26.704129 |
mutate_at
many columns (with a naming pattern) subtract a same column
<- tribble(
tb ~x, ~y1, ~y2, ~y3, ~z,
1,2,4,6,2,
2,1,2,3,3,
3,6,4,2,1
)
head(tb) %>% knitr::kable()
x | y1 | y2 | y3 | z |
---|---|---|---|---|
1 | 2 | 4 | 6 | 2 |
2 | 1 | 2 | 3 | 3 |
3 | 6 | 4 | 2 | 1 |
overwrite
# overwrite
%>% mutate_at(vars(starts_with("y")), funs(. - z)) %>% knitr::kable() tb
x | y1 | y2 | y3 | z |
---|---|---|---|---|
1 | 0 | 2 | 4 | 2 |
2 | -2 | -1 | 0 | 3 |
3 | 5 | 3 | 1 | 1 |
!overwrite
# new cols
%>% mutate_at(vars(starts_with("y")), funs(mod = . - z)) %>% knitr::kable() tb
x | y1 | y2 | y3 | z | y1_mod | y2_mod | y3_mod |
---|---|---|---|---|---|---|---|
1 | 2 | 4 | 6 | 2 | 0 | 2 | 4 |
2 | 1 | 2 | 3 | 3 | -2 | -1 | 0 |
3 | 6 | 4 | 2 | 1 | 5 | 3 | 1 |
T Test by Group (with multiple categories)
library('tidyverse')
library('broom')
set.seed(354654)
= tibble(value = rnorm(100),
d category = sample(1:5, replace = TRUE, 100),
group = sample(c('A', 'B'), replace = TRUE, 100)) %>%
arrange(category)
%>%
d group_by(category, group) %>%
nest() %>%
spread(key = group, value = data) %>%
mutate(
t_test = map2(A, B, ~{t.test(.x$value, .y$value) %>% tidy()}),
A = map(A, nrow),
B = map(B, nrow)
%>%
) unnest()
## # A tibble: 5 x 13
## # Groups: category [5]
## category A B estimate estimate1 estimate2 statistic p.value parameter
## <int> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 10 10 -0.0625 0.374 0.437 -0.184 0.856 18.0
## 2 2 7 6 0.126 0.0553 -0.0709 0.375 0.717 7.98
## 3 3 8 9 -0.497 -0.436 0.0611 -0.881 0.395 12.6
## 4 4 15 11 0.157 0.222 0.0641 0.408 0.687 22.5
## 5 5 11 13 -0.0798 -0.297 -0.217 -0.190 0.851 21.3
## # ... with 4 more variables: conf.low <dbl>, conf.high <dbl>, method <chr>,
## # alternative <chr>
heatmap data
convert columns to rownames before making heatmap
before
= tibble(value = rnorm(5),
df category = sample(1:5, replace = TRUE, 5),
group = sample(c('A', 'B', 'C', 'D', 'E'))) %>%
arrange(category) %>%
select(group, everything())
head(df, n=3) %>% knitr::kable()
group | value | category |
---|---|---|
A | 0.9582820 | 1 |
B | -0.8025585 | 1 |
C | -0.3725190 | 3 |
after
= df %>%
df_heatmap remove_rownames() %>%
column_to_rownames(var = 'group')
head(df_heatmap, n=3) %>%
::kable() knitr
value | category | |
---|---|---|
A | 0.9582820 | 1 |
B | -0.8025585 | 1 |
C | -0.3725190 | 3 |
dropping NAs & 0s
replace 0 with NA
= data.frame(a = 1:6, b = 1:0, c = 0:5)
df_withzero
head(df_withzero)
## a b c
## 1 1 1 0
## 2 2 0 1
## 3 3 1 2
## 4 4 0 3
## 5 5 1 4
## 6 6 0 5
sol.1
==0] <- NA
df_withzero[df_withzero
df_withzero
## a b c
## 1 1 1 NA
## 2 2 NA 1
## 3 3 1 2
## 4 4 NA 3
## 5 5 1 4
## 6 6 NA 5
sol.2
= data.frame(a = 1:6, b = 1:0, c = 0:5)
df_withzero is.na(df_withzero) <- !df_withzero
df_withzero
## a b c
## 1 1 1 NA
## 2 2 NA 1
## 3 3 1 2
## 4 4 NA 3
## 5 5 1 4
## 6 6 NA 5
sol.3
<- na_if(df_withzero, 0)
df_withzero
df_withzero
## a b c
## 1 1 1 NA
## 2 2 NA 1
## 3 3 1 2
## 4 4 NA 3
## 5 5 1 4
## 6 6 NA 5
sol.4
library(dplyr)
<- df_withzero %>% mutate_all(~na_if(., 0))
df_withzero
df_withzero
## a b c
## 1 1 1 NA
## 2 2 NA 1
## 3 3 1 2
## 4 4 NA 3
## 5 5 1 4
## 6 6 NA 5
then, you can use drop_na()
%>%
df_withzero drop_na()
## a b c
## 1 3 1 2
## 2 5 1 4
advanced mutate_all/if/at + select
#Example 6 - Replace using dplyr::mutate_all()
library(dplyr)
<- df %>% mutate_all(~na_if(., 0)))
df
#Example 7 - Replace only on all Numeric columns
library(dplyr)
<- df %>% mutate_if(is.numeric, ~na_if(., 0))
df
#Example 8 - Replace only on selected columns
library(dplyr)
<- df %>% mutate_at(c('pages'), ~na_if(., 0)) df