Things You (Might) Forget about R

Totally unrelated notes

I do forget a lot. One is not batman.

pivot

Pivot data longer while re-coding some similar column names into the same one

https://stackoverflow.com/questions/65371189/pivot-longer-by-group-prefix

df <- data.frame(
   date = rep(seq(as.Date("2020-01-01"),as.Date("2020-01-05"),by="day"),each = 6),
   k = rep(c("A.mean","A.median","A.min","B.mean","B.median","B.min"),5),
   v = runif(30,0,50)
) %>%
   pivot_wider(names_from = k, values_from = v)

# df.head() in python / or use view(df) in r
head(df) %>% knitr::kable()
date A.mean A.median A.min B.mean B.median B.min
2020-01-01 48.732622 42.63542 48.28532 26.983021 3.189964 3.841179
2020-01-02 1.283834 46.04468 42.86398 45.522767 49.894911 38.187536
2020-01-03 6.784811 9.45290 30.71775 47.538903 49.314453 1.333299
2020-01-04 32.269100 27.61920 14.12190 24.186885 48.072475 43.217502
2020-01-05 32.884956 24.21991 21.60856 3.824533 36.382598 26.704129
temp <- pivot_longer(df, -date, names_sep = "\\.", names_to = c("k", "stat"))

answer <- pivot_wider(temp, id_cols = c("date", "k"), names_from = "stat", values_from = "value")

df %>% 
  pivot_longer(-date, names_sep = "\\.", names_to = c("k", "stat")) %>% 
  pivot_wider(id_cols = c("date", "k"), names_from = "stat", values_from = "value") %>% 
  knitr::kable()
date k mean median min
2020-01-01 A 48.732622 42.635422 48.285325
2020-01-01 B 26.983021 3.189964 3.841179
2020-01-02 A 1.283834 46.044676 42.863984
2020-01-02 B 45.522767 49.894911 38.187536
2020-01-03 A 6.784811 9.452900 30.717747
2020-01-03 B 47.538903 49.314453 1.333299
2020-01-04 A 32.269100 27.619201 14.121901
2020-01-04 B 24.186885 48.072475 43.217502
2020-01-05 A 32.884956 24.219914 21.608557
2020-01-05 B 3.824533 36.382598 26.704129

mutate_at

many columns (with a naming pattern) subtract a same column

https://stackoverflow.com/questions/48898121/mutate-multiple-variable-to-create-multiple-new-variables/

tb <- tribble(
  ~x, ~y1, ~y2, ~y3, ~z,
  1,2,4,6,2,
  2,1,2,3,3,
  3,6,4,2,1
)

head(tb) %>% knitr::kable()
x y1 y2 y3 z
1 2 4 6 2
2 1 2 3 3
3 6 4 2 1

overwrite

# overwrite
tb %>% mutate_at(vars(starts_with("y")), funs(. - z)) %>% knitr::kable()
x y1 y2 y3 z
1 0 2 4 2
2 -2 -1 0 3
3 5 3 1 1

!overwrite

# new cols
tb %>% mutate_at(vars(starts_with("y")), funs(mod = . - z)) %>% knitr::kable()
x y1 y2 y3 z y1_mod y2_mod y3_mod
1 2 4 6 2 0 2 4
2 1 2 3 3 -2 -1 0
3 6 4 2 1 5 3 1

T Test by Group (with multiple categories)

https://community.rstudio.com/t/use-dplyr-to-do-grouped-t-tests-and-get-number-of-observations-simultanously/23561

library('tidyverse')
library('broom')
set.seed(354654)
d = tibble(value = rnorm(100),
           category = sample(1:5, replace = TRUE, 100),
           group = sample(c('A', 'B'), replace = TRUE, 100)) %>% 
  arrange(category)

d %>% 
  group_by(category, group) %>% 
  nest() %>% 
  spread(key = group, value = data) %>% 
  mutate(
    t_test = map2(A, B, ~{t.test(.x$value, .y$value) %>% tidy()}),
    A = map(A, nrow),
    B = map(B, nrow)
  ) %>% 
  unnest()
## # A tibble: 5 x 13
## # Groups:   category [5]
##   category     A     B estimate estimate1 estimate2 statistic p.value parameter
##      <int> <int> <int>    <dbl>     <dbl>     <dbl>     <dbl>   <dbl>     <dbl>
## 1        1    10    10  -0.0625    0.374     0.437     -0.184   0.856     18.0 
## 2        2     7     6   0.126     0.0553   -0.0709     0.375   0.717      7.98
## 3        3     8     9  -0.497    -0.436     0.0611    -0.881   0.395     12.6 
## 4        4    15    11   0.157     0.222     0.0641     0.408   0.687     22.5 
## 5        5    11    13  -0.0798   -0.297    -0.217     -0.190   0.851     21.3 
## # ... with 4 more variables: conf.low <dbl>, conf.high <dbl>, method <chr>,
## #   alternative <chr>

heatmap data

convert columns to rownames before making heatmap

before

df = tibble(value = rnorm(5),
           category = sample(1:5, replace = TRUE, 5),
           group = sample(c('A', 'B', 'C', 'D', 'E'))) %>% 
  arrange(category) %>% 
  select(group, everything())

head(df, n=3) %>% knitr::kable()
group value category
A 0.9582820 1
B -0.8025585 1
C -0.3725190 3

after

df_heatmap = df %>% 
  remove_rownames() %>% 
  column_to_rownames(var = 'group')

head(df_heatmap, n=3) %>% 
  knitr::kable()
value category
A 0.9582820 1
B -0.8025585 1
C -0.3725190 3

dropping NAs & 0s

replace 0 with NA

df_withzero = data.frame(a = 1:6, b = 1:0, c = 0:5)

head(df_withzero)
##   a b c
## 1 1 1 0
## 2 2 0 1
## 3 3 1 2
## 4 4 0 3
## 5 5 1 4
## 6 6 0 5

sol.1

df_withzero[df_withzero==0] <- NA


df_withzero
##   a  b  c
## 1 1  1 NA
## 2 2 NA  1
## 3 3  1  2
## 4 4 NA  3
## 5 5  1  4
## 6 6 NA  5

sol.2

df_withzero = data.frame(a = 1:6, b = 1:0, c = 0:5)
is.na(df_withzero) <- !df_withzero

df_withzero
##   a  b  c
## 1 1  1 NA
## 2 2 NA  1
## 3 3  1  2
## 4 4 NA  3
## 5 5  1  4
## 6 6 NA  5

sol.3

df_withzero <- na_if(df_withzero, 0)


df_withzero
##   a  b  c
## 1 1  1 NA
## 2 2 NA  1
## 3 3  1  2
## 4 4 NA  3
## 5 5  1  4
## 6 6 NA  5

sol.4

library(dplyr) 
df_withzero <- df_withzero %>% mutate_all(~na_if(., 0))

df_withzero
##   a  b  c
## 1 1  1 NA
## 2 2 NA  1
## 3 3  1  2
## 4 4 NA  3
## 5 5  1  4
## 6 6 NA  5

then, you can use drop_na()

df_withzero %>% 
  drop_na()
##   a b c
## 1 3 1 2
## 2 5 1 4

advanced mutate_all/if/at + select

REF

#Example 6 - Replace using dplyr::mutate_all() 
library(dplyr) 
df <- df %>% mutate_all(~na_if(., 0)))

#Example 7 - Replace only on all Numeric columns
library(dplyr) 
df <- df %>% mutate_if(is.numeric, ~na_if(., 0))

#Example 8 - Replace only on selected columns
library(dplyr) 
df <- df %>% mutate_at(c('pages'), ~na_if(., 0))