2.5 Objects and Containers in R

Characters and data frames

Year <- 2008 : 2017
NL <- c("PHI", "PHI", "SFN", "SLN", "SFN",
        "SLN", "SFN", "NYN", "CHN", "LAN")
AL <- c("TBA", "NYA", "TEX", "TEX", "DET",
        "BOS", "KCA", "KCA", "CLE", "HOU")
Winner <- c("NL", "AL", "NL", "NL", "NL",
            "AL", "NL", "AL", "NL", "AL")
N_Games <- c(5, 6, 5, 7, 4, 7, 7, 5, 7, 7)

WS_results <- tibble::tibble(
  Year = Year, NL_Team = NL, AL_Team = AL,
  N_Games = N_Games, Winner = Winner)

WS_results
## # A tibble: 10 × 5
##     Year NL_Team AL_Team N_Games Winner
##    <int> <chr>   <chr>     <dbl> <chr> 
##  1  2008 PHI     TBA           5 NL    
##  2  2009 PHI     NYA           6 AL    
##  3  2010 SFN     TEX           5 NL    
##  4  2011 SLN     TEX           7 NL    
##  5  2012 SFN     DET           4 NL    
##  6  2013 SLN     BOS           7 AL    
##  7  2014 SFN     KCA           7 NL    
##  8  2015 NYN     KCA           5 AL    
##  9  2016 CHN     CLE           7 NL    
## 10  2017 LAN     HOU           7 AL
WS <- WS_results |> 
  dplyr::group_by(Winner) |> 
  dplyr::summarize(N = dplyr::n())

WS
## # A tibble: 2 × 2
##   Winner     N
##   <chr>  <int>
## 1 AL         4
## 2 NL         6
ggplot2::ggplot(WS, ggplot2::aes(x = Winner, y = N)) + 
  ggplot2::geom_col()

Factors

# Alphabetical order
WS_results |> 
  dplyr::group_by(NL_Team) |> 
  dplyr::summarize(N = dplyr::n())
## # A tibble: 6 × 2
##   NL_Team     N
##   <chr>   <int>
## 1 CHN         1
## 2 LAN         1
## 3 NYN         1
## 4 PHI         2
## 5 SFN         3
## 6 SLN         2
# Use a factor to order by division
WS_results <- WS_results |>
  dplyr::mutate(
    NL_Team = factor(
      NL_Team, 
      levels = c("NYN", "PHI", "CHN", "SLN", "LAN", "SFN")
    )
  )

# Now ordered by division
WS_results |> 
  dplyr::group_by(NL_Team) |> 
  dplyr::summarize(N = dplyr::n())
## # A tibble: 6 × 2
##   NL_Team     N
##   <fct>   <int>
## 1 NYN         1
## 2 PHI         2
## 3 CHN         1
## 4 SLN         2
## 5 LAN         1
## 6 SFN         3

Lists

world_series <- list(
  Winner = Winner, 
  Number_Games = N_Games,
  Seasons = "2008 to 2017"
)

world_series
## $Winner
##  [1] "NL" "AL" "NL" "NL" "NL" "AL" "NL" "AL" "NL" "AL"
## 
## $Number_Games
##  [1] 5 6 5 7 4 7 7 5 7 7
## 
## $Seasons
## [1] "2008 to 2017"

Many ways to pull data:

world_series$Number_Games
##  [1] 5 6 5 7 4 7 7 5 7 7
world_series[[2]]
##  [1] 5 6 5 7 4 7 7 5 7 7
purrr::pluck(world_series, "Number_Games")
##  [1] 5 6 5 7 4 7 7 5 7 7
world_series["Number_Games"]
## $Number_Games
##  [1] 5 6 5 7 4 7 7 5 7 7
WS_results$NL_Team
##  [1] PHI PHI SFN SLN SFN SLN SFN NYN CHN LAN
## Levels: NYN PHI CHN SLN LAN SFN
# same
dplyr::pull(WS_results, NL_Team)
##  [1] PHI PHI SFN SLN SFN SLN SFN NYN CHN LAN
## Levels: NYN PHI CHN SLN LAN SFN