5.3 Charlie: external helpers
Move reusable data and logic out of the analysis script and into separate files.
Here is the content of beach-lookup-table.csv
:
where,english
beach,US
coast,US
seashore,UK
seaside,UK
Here is the content of cleaning-helpers.R
:
library(tidyverse)
localize_beach <- function(dat) {
lookup_table <- read_csv(
"beach-lookup-table.csv",
col_types = cols(where = "c", english = "c")
)
left_join(dat, lookup_table)
}
f_to_c <- function(x) (x - 32) * 5/9
celsify_temp <- function(dat) {
mutate(dat, temp = if_else(english == "US", f_to_c(temp), temp))
}
now <- Sys.time()
timestamp <- function(time) format(time, "%Y-%B-%d_%H-%M-%S")
outfile_path <- function(infile) {
paste0(timestamp(now), "_", sub("(.*)([.]csv$)", "\\1_clean\\2", infile))
}
- High-level helper functions like
localize_beach()
andcelsify_temp()
, were added to the pre-existing helpers (f_to_c()
,timestamp()
, andoutfile_path()
)
library(tidyverse)
source("cleaning-helpers.R")
infile <- "swim.csv"
dat <- read_csv(infile, col_types = cols(name = "c", where = "c", temp = "d"))
(dat <- dat %>%
localize_beach() %>%
celsify_temp())
#> Joining, by = "where"
#> # A tibble: 5 × 4
#> name where temp english
#> <chr> <chr> <dbl> <chr>
#> 1 Adam beach 35 US
#> 2 Bess coast 32.8 US
#> 3 Cora seashore 28 UK
#> 4 Dale beach 29.4 US
#> 5 Evan seaside 31 UK
write_csv(dat, outfile_path(infile))
Script is now much shorter (and cleaner). However, whether it’s easier depends on personal preference and what “feels” easier to work with.
For more information, refer to the Tidyverse design guide