A SIMPLE RECIPE FOR THE AMES HOUSING DATA
A Recommended preprocessing
library(tidyverse)
library(tidymodels)
tidymodels_prefer()
fit <- lm(Sale_Price ~ Neighborhood + log10(Gr_Liv_Area) + Year_Built + Bldg_Type,
data = ames)
head(broom::tidy(fit),3)
## # A tibble: 3 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -2320382. 101509. -22.9 1.84e-106
## 2 NeighborhoodCollege_Creek 6361. 3602. 1.77 7.75e- 2
## 3 NeighborhoodOld_Town 1485. 3673. 0.404 6.86e- 1
set.seed(1234)
split<-initial_split(ames, strata = Sale_Price)
ames_train<-training(split)
ames_test<-testing(split)
simple_ames <-
recipe(Sale_Price ~ Neighborhood + Gr_Liv_Area + Year_Built + Bldg_Type,
data = ames_train) %>%
step_log(Gr_Liv_Area, base = 10) %>%
step_dummy(all_nominal_predictors())
simple_ames