8.2 A SIMPLE RECIPE FOR THE AMES HOUSING DATA

A Recommended preprocessing

Models preprocessing steps

Figure 8.1: Models preprocessing steps

library(tidyverse)
library(tidymodels)
tidymodels_prefer()
data(ames)
fit <- lm(Sale_Price ~ Neighborhood + log10(Gr_Liv_Area) + Year_Built + Bldg_Type, 
   data = ames)

head(broom::tidy(fit),3)
## # A tibble: 3 × 5
##   term                       estimate std.error statistic   p.value
##   <chr>                         <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)               -2320382.   101509.   -22.9   1.84e-106
## 2 NeighborhoodCollege_Creek     6361.     3602.     1.77  7.75e-  2
## 3 NeighborhoodOld_Town          1485.     3673.     0.404 6.86e-  1
set.seed(1234)
split<-initial_split(ames, strata = Sale_Price)
ames_train<-training(split)
ames_test<-testing(split)

simple_ames <- 
  recipe(Sale_Price ~ Neighborhood + Gr_Liv_Area + Year_Built + Bldg_Type,
         data = ames_train) %>%
  step_log(Gr_Liv_Area, base = 10) %>% 
  step_dummy(all_nominal_predictors())

simple_ames