3.9 Digestif
Let is use some of this chapter’s skills on the penguins
data.
3.9.1 Attributes
str(penguins_raw)
#> tibble [344 × 17] (S3: tbl_df/tbl/data.frame)
#> $ studyName : chr [1:344] "PAL0708" "PAL0708" "PAL0708" "PAL0708" ...
#> $ Sample Number : num [1:344] 1 2 3 4 5 6 7 8 9 10 ...
#> $ Species : chr [1:344] "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" ...
#> $ Region : chr [1:344] "Anvers" "Anvers" "Anvers" "Anvers" ...
#> $ Island : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
#> $ Stage : chr [1:344] "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" ...
#> $ Individual ID : chr [1:344] "N1A1" "N1A2" "N2A1" "N2A2" ...
#> $ Clutch Completion : chr [1:344] "Yes" "Yes" "Yes" "Yes" ...
#> $ Date Egg : Date[1:344], format: "2007-11-11" "2007-11-11" ...
#> $ Culmen Length (mm) : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
#> $ Culmen Depth (mm) : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
#> $ Flipper Length (mm): num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
#> $ Body Mass (g) : num [1:344] 3750 3800 3250 NA 3450 ...
#> $ Sex : chr [1:344] "MALE" "FEMALE" "FEMALE" NA ...
#> $ Delta 15 N (o/oo) : num [1:344] NA 8.95 8.37 NA 8.77 ...
#> $ Delta 13 C (o/oo) : num [1:344] NA -24.7 -25.3 NA -25.3 ...
#> $ Comments : chr [1:344] "Not enough blood for isotopes." NA NA "Adult not sampled." ...
#> - attr(*, "spec")=List of 3
#> ..$ cols :List of 17
#> .. ..$ studyName : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Sample Number : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Species : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Region : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Island : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Stage : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Individual ID : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Clutch Completion : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Date Egg :List of 1
#> .. .. ..$ format: chr ""
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_date" "collector"
#> .. ..$ Culmen Length (mm) : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Culmen Depth (mm) : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Flipper Length (mm): list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Body Mass (g) : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Sex : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> .. ..$ Delta 15 N (o/oo) : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Delta 13 C (o/oo) : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
#> .. ..$ Comments : list()
#> .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
#> ..$ default: list()
#> .. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
#> ..$ skip : num 1
#> ..- attr(*, "class")= chr "col_spec"
str(penguins_raw, give.attr = FALSE)
#> tibble [344 × 17] (S3: tbl_df/tbl/data.frame)
#> $ studyName : chr [1:344] "PAL0708" "PAL0708" "PAL0708" "PAL0708" ...
#> $ Sample Number : num [1:344] 1 2 3 4 5 6 7 8 9 10 ...
#> $ Species : chr [1:344] "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" "Adelie Penguin (Pygoscelis adeliae)" ...
#> $ Region : chr [1:344] "Anvers" "Anvers" "Anvers" "Anvers" ...
#> $ Island : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
#> $ Stage : chr [1:344] "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" "Adult, 1 Egg Stage" ...
#> $ Individual ID : chr [1:344] "N1A1" "N1A2" "N2A1" "N2A2" ...
#> $ Clutch Completion : chr [1:344] "Yes" "Yes" "Yes" "Yes" ...
#> $ Date Egg : Date[1:344], format: "2007-11-11" "2007-11-11" ...
#> $ Culmen Length (mm) : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
#> $ Culmen Depth (mm) : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
#> $ Flipper Length (mm): num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
#> $ Body Mass (g) : num [1:344] 3750 3800 3250 NA 3450 ...
#> $ Sex : chr [1:344] "MALE" "FEMALE" "FEMALE" NA ...
#> $ Delta 15 N (o/oo) : num [1:344] NA 8.95 8.37 NA 8.77 ...
#> $ Delta 13 C (o/oo) : num [1:344] NA -24.7 -25.3 NA -25.3 ...
#> $ Comments : chr [1:344] "Not enough blood for isotopes." NA NA "Adult not sampled." ...
3.9.2 Data Frames vs Tibbles
3.9.2.1 Printing
- Tip: print out these results in RStudio under different editor themes
head(penguins_df)
#> species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
#> 1 Adelie Torgersen 39.1 18.7 181 3750
#> 2 Adelie Torgersen 39.5 17.4 186 3800
#> 3 Adelie Torgersen 40.3 18.0 195 3250
#> 4 Adelie Torgersen NA NA NA NA
#> 5 Adelie Torgersen 36.7 19.3 193 3450
#> 6 Adelie Torgersen 39.3 20.6 190 3650
#> sex year
#> 1 male 2007
#> 2 female 2007
#> 3 female 2007
#> 4 <NA> 2007
#> 5 female 2007
#> 6 male 2007
penguins_tb
#> # A tibble: 344 × 8
#> species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
#> <fct> <fct> <dbl> <dbl> <int> <int>
#> 1 Adelie Torgersen 39.1 18.7 181 3750
#> 2 Adelie Torgersen 39.5 17.4 186 3800
#> 3 Adelie Torgersen 40.3 18 195 3250
#> 4 Adelie Torgersen NA NA NA NA
#> 5 Adelie Torgersen 36.7 19.3 193 3450
#> 6 Adelie Torgersen 39.3 20.6 190 3650
#> 7 Adelie Torgersen 38.9 17.8 181 3625
#> 8 Adelie Torgersen 39.2 19.6 195 4675
#> 9 Adelie Torgersen 34.1 18.1 193 3475
#> 10 Adelie Torgersen 42 20.2 190 4250
#> # ℹ 334 more rows
#> # ℹ 2 more variables: sex <fct>, year <int>
3.9.3 Atomic Vectors
species_vector_df <- penguins_df |> select(species)
species_unlist_df <- penguins_df |> select(species) |> unlist()
species_pull_df <- penguins_df |> select(species) |> pull()
species_vector_tb <- penguins_tb |> select(species)
species_unlist_tb <- penguins_tb |> select(species) |> unlist()
species_pull_tb <- penguins_tb |> select(species) |> pull()
typeof()
and class()
typeof(species_vector_df)
#> [1] "list"
class(species_vector_df)
#> [1] "data.frame"
typeof(species_unlist_df)
#> [1] "integer"
class(species_unlist_df)
#> [1] "factor"
typeof(species_pull_df)
#> [1] "integer"
class(species_pull_df)
#> [1] "factor"
typeof(species_vector_tb)
#> [1] "list"
class(species_vector_tb)
#> [1] "tbl_df" "tbl" "data.frame"
typeof(species_unlist_tb)
#> [1] "integer"
class(species_unlist_tb)
#> [1] "factor"
typeof(species_pull_tb)
#> [1] "integer"
class(species_pull_tb)
#> [1] "factor"
3.9.4 Column Names
colnames(penguins_tb)
#> [1] "species" "island" "bill_length_mm"
#> [4] "bill_depth_mm" "flipper_length_mm" "body_mass_g"
#> [7] "sex" "year"
- What if we only invoke a partial name of a column of a tibble?

tibbles are surly!
- What if we only invoke a partial name of a column of a data frame?
- Is this evaluation in alphabetical order or column order?