4.4 Applications

Applications copied from cohort 2 slide

4.4.1 Lookup tables (character subsetting)

x <- c("m", "f", "u", "f", "f", "m", "m")
lookup <- c(m = "Male", f = "Female", u = NA)
lookup[x]
#        m        f        u        f        f        m        m 
#   "Male" "Female"       NA "Female" "Female"   "Male"   "Male"

4.4.2 Matching and merging by hand (integer subsetting)

  • The match() function allows merging a vector with a table
grades <- c("D", "A", "C", "B", "F")
info <- data.frame(
  grade = c("A", "B", "C", "D", "F"),
  desc = c("Excellent", "Very Good", "Average", "Fair", "Poor"),
  fail = c(F, F, F, F, T)
)
id <- match(grades, info$grade)
id
# [1] 3 2 2 1 3
info[id, ]
#   grade      desc  fail
# 4     D      Fair FALSE
# 1     A Excellent FALSE
# 3     C   Average FALSE
# 2     B Very Good FALSE
# 5     F      Poor  TRUE

4.4.3 Random samples and bootstrapping (integer subsetting)

# mtcars[sample(nrow(mtcars), 3), ] # use replace = TRUE to replace
#                     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
# Lotus Europa       30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
# Mazda RX4          21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
# Cadillac Fleetwood 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4

4.4.4 Ordering (integer subsetting)

# mtcars[order(mtcars$mpg), ]
#                      mpg cyl  disp  hp drat    wt  qsec vs am gear carb
# Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
# Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
# Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
# Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
# Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
# Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
# ...

4.4.5 Expanding aggregated counts (integer subsetting)

  • We can expand a count column by using rep()
df <- tibble::tibble(x = c("Amy", "Julie", "Brian"), n = c(2, 1, 3))
df[rep(1:nrow(df), df$n), ]
# A tibble: 6 x 2
#   x         n
#   <chr> <dbl>
# 1 Amy       2
# 2 Amy       2
# 3 Julie     1
# 4 Brian     3
# 5 Brian     3
# 6 Brian     3

4.4.6 Removing columns from data frames (character)

  • We can remove a column by subsetting, which does not change the object
df[, 1]
# A tibble: 3 x 1
#   x    
#   <chr>
# 1 Amy  
# 2 Julie
# 3 Brian
  • We can also delete the column using NULL
df$n <- NULL
df
# A tibble: 3 x 1
#   x    
#   <chr>
# 1 Amy  
# 2 Julie
# 3 Brian

4.4.7 Selecting rows based on a condition (logical subsetting)

# mtcars[mtcars$gear == 5, ]
#                 mpg cyl  disp  hp drat    wt qsec vs am gear carb
# Porsche 914-2  26.0   4 120.3  91 4.43 2.140 16.7  0  1    5    2
# Lotus Europa   30.4   4  95.1 113 3.77 1.513 16.9  1  1    5    2
# Ford Pantera L 15.8   8 351.0 264 4.22 3.170 14.5  0  1    5    4
# Ferrari Dino   19.7   6 145.0 175 3.62 2.770 15.5  0  1    5    6
# Maserati Bora  15.0   8 301.0 335 3.54 3.570 14.6  0  1    5    8

4.4.8 Boolean algebra versus sets (logical and integer)

  • which() gives the indices of a Boolean vector
(x1 <- 1:10 %% 2 == 0) # 1-10 divisible by 2
#  [1] FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE
(x2 <- which(x1))
# [1]  2  4  6  8 10
(y1 <- 1:10 %% 5 == 0) # 1-10 divisible by 5
#  [1] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE
(y2 <- which(y1))
# [1]  5 10
x1 & y1
# [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE