14.2 Tidy input data
The German government provides gridded census data at either 1 km or 100 m resolution. The following code chunk downloads, unzips and reads in the 1 km data.
download.file("https://tinyurl.com/ybtpkwxz",
destfile = "census.zip", mode = "wb")
unzip("census.zip") # unzip the files
<-
input_tidy ::read_csv2(list.files(pattern = "Gitter.csv")) |>
readrselect(
x = x_mp_1km,
y = y_mp_1km,
pop = Einwohner,
women = Frauen_A,
mean_age = Alter_D,
hh_size = HHGroesse_D
|>
) ::mutate(dplyr::across(
dplyr.cols = c(pop, women, mean_age, hh_size),
.fns = ~ ifelse(.x %in% c(-1,-9), NA, .x)
))
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 361478 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): Gitter_ID_1km
## dbl (12): x_mp_1km, y_mp_1km, Einwohner, Frauen_A, Alter_D, unter18_A, ab65_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(input_tidy)
## x y pop women
## Min. :4031500 Min. :2684500 Min. :1.00 Min. :1.0
## 1st Qu.:4230500 1st Qu.:2932500 1st Qu.:1.00 1st Qu.:2.0
## Median :4345500 Median :3117500 Median :1.00 Median :3.0
## Mean :4347113 Mean :3106078 Mean :1.49 Mean :2.9
## 3rd Qu.:4466500 3rd Qu.:3274500 3rd Qu.:2.00 3rd Qu.:3.0
## Max. :4672500 Max. :3551500 Max. :6.00 Max. :5.0
## NA's :146845 NA's :150933
## mean_age hh_size
## Min. :1.00 Min. :1.00
## 1st Qu.:2.00 1st Qu.:2.00
## Median :3.00 Median :2.00
## Mean :3.06 Mean :2.55
## 3rd Qu.:4.00 3rd Qu.:3.00
## Max. :5.00 Max. :5.00
## NA's :151064 NA's :161755