Keys

Surrogate key: a single variable added to reflect a compound primary key; makes life easier

flights |> 
  count(time_hour, carrier, flight) |> 
  filter(n > 1)
## # A tibble: 0 × 4
## # ℹ 4 variables: time_hour <dttm>, carrier <chr>, flight <int>, n <int>
flights2 <- flights |> 
  mutate(id = row_number(), .before = 1)
flights2
## # A tibble: 336,776 × 20
##      id  year month   day dep_time sched_dep_time dep_delay arr_time
##   <int> <int> <int> <int>    <int>          <int>     <dbl>    <int>
## 1     1  2013     1     1      517            515         2      830
## 2     2  2013     1     1      533            529         4      850
## 3     3  2013     1     1      542            540         2      923
## 4     4  2013     1     1      544            545        -1     1004
## # ℹ 336,772 more rows
## # ℹ 12 more variables: sched_arr_time <int>, arr_delay <dbl>, carrier <chr>,
## #   flight <int>, tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>,
## #   distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>