3.9 Digestif

Let is use some of this chapter’s skills on the penguins data.

3.9.1 Attributes

str(penguins_raw)
#> tibble [344 × 17] (S3: tbl_df/tbl/data.frame)
#>  $ studyName          : chr [1:344] "PAL0708" "PAL0708" "PAL0708" "PAL0708" ...
#>  $ Sample Number      : num [1:344] 1 2 3 4 5 6 7 8 9 10 ...
#>  $ Species            : chr [1:344] "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" ...
#>  $ Region             : chr [1:344] "Anvers" "Anvers" "Anvers" "Anvers" ...
#>  $ Island             : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
#>  $ Stage              : chr [1:344] "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" ...
#>  $ Individual ID      : chr [1:344] "N1A1" "N1A2" "N2A1" "N2A2" ...
#>  $ Clutch Completion  : chr [1:344] "Yes" "Yes" "Yes" "Yes" ...
#>  $ Date Egg           : Date[1:344], format: "2007-11-11" "2007-11-11" ...
#>  $ Culmen Length (mm) : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
#>  $ Culmen Depth (mm)  : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
#>  $ Flipper Length (mm): num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
#>  $ Body Mass (g)      : num [1:344] 3750 3800 3250 NA 3450 ...
#>  $ Sex                : chr [1:344] "MALE" "FEMALE" "FEMALE" NA ...
#>  $ Delta 15 N (o/oo)  : num [1:344] NA 8.95 8.37 NA 8.77 ...
#>  $ Delta 13 C (o/oo)  : num [1:344] NA -24.7 -25.3 NA -25.3 ...
#>  $ Comments           : chr [1:344] "Not enough blood for isotopes." NA NA "Adult not sampled." ...
#>  - attr(*, "spec")=List of 3
#>   ..$ cols   :List of 17
#>   .. ..$ studyName          : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Sample Number      : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Species            : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Region             : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Island             : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Stage              : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Individual ID      : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Clutch Completion  : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Date Egg           :List of 1
#>   .. .. ..$ format: chr ""
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_date" "collector"
#>   .. ..$ Culmen Length (mm) : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Culmen Depth (mm)  : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Flipper Length (mm): list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Body Mass (g)      : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Sex                : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   .. ..$ Delta 15 N (o/oo)  : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Delta 13 C (o/oo)  : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#>   .. ..$ Comments           : list()
#>   .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#>   ..$ default: list()
#>   .. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
#>   ..$ skip   : num 1
#>   ..- attr(*, "class")= chr "col_spec"
str(penguins_raw, give.attr = FALSE)
#> tibble [344 × 17] (S3: tbl_df/tbl/data.frame)
#>  $ studyName          : chr [1:344] "PAL0708" "PAL0708" "PAL0708" "PAL0708" ...
#>  $ Sample Number      : num [1:344] 1 2 3 4 5 6 7 8 9 10 ...
#>  $ Species            : chr [1:344] "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" ...
#>  $ Region             : chr [1:344] "Anvers" "Anvers" "Anvers" "Anvers" ...
#>  $ Island             : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
#>  $ Stage              : chr [1:344] "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" ...
#>  $ Individual ID      : chr [1:344] "N1A1" "N1A2" "N2A1" "N2A2" ...
#>  $ Clutch Completion  : chr [1:344] "Yes" "Yes" "Yes" "Yes" ...
#>  $ Date Egg           : Date[1:344], format: "2007-11-11" "2007-11-11" ...
#>  $ Culmen Length (mm) : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
#>  $ Culmen Depth (mm)  : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
#>  $ Flipper Length (mm): num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
#>  $ Body Mass (g)      : num [1:344] 3750 3800 3250 NA 3450 ...
#>  $ Sex                : chr [1:344] "MALE" "FEMALE" "FEMALE" NA ...
#>  $ Delta 15 N (o/oo)  : num [1:344] NA 8.95 8.37 NA 8.77 ...
#>  $ Delta 13 C (o/oo)  : num [1:344] NA -24.7 -25.3 NA -25.3 ...
#>  $ Comments           : chr [1:344] "Not enough blood for isotopes." NA NA "Adult not sampled." ...

3.9.2 Data Frames vs Tibbles

penguins_df <- data.frame(penguins)
penguins_tb <- penguins #i.e. penguins was already a tibble

3.9.2.1 Printing

  • Tip: print out these results in RStudio under different editor themes
print(penguins_df) #don't run this
head(penguins_df)
#>   species    island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
#> 1  Adelie Torgersen           39.1          18.7               181        3750
#> 2  Adelie Torgersen           39.5          17.4               186        3800
#> 3  Adelie Torgersen           40.3          18.0               195        3250
#> 4  Adelie Torgersen             NA            NA                NA          NA
#> 5  Adelie Torgersen           36.7          19.3               193        3450
#> 6  Adelie Torgersen           39.3          20.6               190        3650
#>      sex year
#> 1   male 2007
#> 2 female 2007
#> 3 female 2007
#> 4   <NA> 2007
#> 5 female 2007
#> 6   male 2007
penguins_tb
#> # A tibble: 344 × 8
#>    species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
#>    <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
#>  1 Adelie  Torgersen           39.1          18.7               181        3750
#>  2 Adelie  Torgersen           39.5          17.4               186        3800
#>  3 Adelie  Torgersen           40.3          18                 195        3250
#>  4 Adelie  Torgersen           NA            NA                  NA          NA
#>  5 Adelie  Torgersen           36.7          19.3               193        3450
#>  6 Adelie  Torgersen           39.3          20.6               190        3650
#>  7 Adelie  Torgersen           38.9          17.8               181        3625
#>  8 Adelie  Torgersen           39.2          19.6               195        4675
#>  9 Adelie  Torgersen           34.1          18.1               193        3475
#> 10 Adelie  Torgersen           42            20.2               190        4250
#> # ℹ 334 more rows
#> # ℹ 2 more variables: sex <fct>, year <int>

3.9.3 Atomic Vectors

species_vector_df <- penguins_df |> select(species)
species_unlist_df <- penguins_df |> select(species) |> unlist()
species_pull_df   <- penguins_df |> select(species) |> pull()

species_vector_tb <- penguins_tb |> select(species)
species_unlist_tb <- penguins_tb |> select(species) |> unlist()
species_pull_tb   <- penguins_tb |> select(species) |> pull()
typeof() and class()
typeof(species_vector_df)
#> [1] "list"
class(species_vector_df)
#> [1] "data.frame"

typeof(species_unlist_df)
#> [1] "integer"
class(species_unlist_df)
#> [1] "factor"

typeof(species_pull_df)
#> [1] "integer"
class(species_pull_df)
#> [1] "factor"

typeof(species_vector_tb)
#> [1] "list"
class(species_vector_tb)
#> [1] "tbl_df"     "tbl"        "data.frame"

typeof(species_unlist_tb)
#> [1] "integer"
class(species_unlist_tb)
#> [1] "factor"

typeof(species_pull_tb)
#> [1] "integer"
class(species_pull_tb)
#> [1] "factor"

3.9.4 Column Names

colnames(penguins_tb)
#> [1] "species"           "island"            "bill_length_mm"   
#> [4] "bill_depth_mm"     "flipper_length_mm" "body_mass_g"      
#> [7] "sex"               "year"
names(penguins_tb) == colnames(penguins_tb)
#> [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
names(penguins_df) == names(penguins_tb)
#> [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
  • What if we only invoke a partial name of a column of a tibble?
penguins_tb$y 
#> Warning: Unknown or uninitialised column: `y`.
#> NULL
tibbles are surly!
tibbles are surly!
  • What if we only invoke a partial name of a column of a data frame?
head(penguins_df$y) #instead of `year`
#> [1] 2007 2007 2007 2007 2007 2007
  • Is this evaluation in alphabetical order or column order?
penguins_df_se_sp <- penguins_df |> select(sex, species)
penguins_df_sp_se <- penguins_df |> select(species, sex)
head(penguins_df_se_sp$s)
#> NULL
head(penguins_df_sp_se$s)
#> NULL