9.5 - AMES Housing example

ames <- AmesHousing::make_ames()

# relocate `Sale_Price`
ames <- ames %>% 
     relocate(Sale_Price, .before = everything())

# Stratified sampling with the rsample package
set.seed(123)  # for reproducibility
split  <- initial_split(ames, prop = 0.7, strata = "Sale_Price")
ames_train  <- training(split)
ames_test   <- testing(split)

We can fit a regression tree using rpart and then visualize it using rpart.plot.

ames_dt1 <- rpart(
  formula = Sale_Price ~ .,
  data    = ames_train,
  method  = "anova"
)
ames_dt1
## n= 2049 
## 
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 2049 1.321981e+13 180922.60  
##    2) Overall_Qual=Very_Poor,Poor,Fair,Below_Average,Average,Above_Average,Good 1701 4.109012e+12 155897.00  
##      4) Neighborhood=North_Ames,Old_Town,Edwards,Sawyer,Mitchell,Brookside,Iowa_DOT_and_Rail_Road,South_and_West_of_Iowa_State_University,Meadow_Village,Briardale,Northpark_Villa,Blueste,Landmark 1023 1.370671e+12 131815.70  
##        8) First_Flr_SF< 1089.5 661 5.643745e+11 119572.90  
##         16) Overall_Qual=Very_Poor,Poor,Fair,Below_Average 149 1.016868e+11  90363.56 *
##         17) Overall_Qual=Average,Above_Average,Good 512 2.985678e+11 128073.30 *
##        9) First_Flr_SF>=1089.5 362 5.263151e+11 154170.60 *
##      5) Neighborhood=College_Creek,Somerset,Northridge_Heights,Gilbert,Northwest_Ames,Sawyer_West,Crawford,Timberland,Northridge,Stone_Brook,Clear_Creek,Bloomington_Heights,Veenker,Green_Hills 678 1.249971e+12 192232.10  
##       10) Gr_Liv_Area< 1725.5 484 5.392924e+11 177594.80  
##         20) Total_Bsmt_SF< 1295.5 342 2.299978e+11 166044.60 *
##         21) Total_Bsmt_SF>=1295.5 142 1.537824e+11 205413.00 *
##       11) Gr_Liv_Area>=1725.5 194 3.482733e+11 228749.90 *
##    3) Overall_Qual=Very_Good,Excellent,Very_Excellent 348 2.838371e+12 303245.90  
##      6) Overall_Qual=Very_Good 242 9.801339e+11 271271.60  
##       12) Gr_Liv_Area< 1920.5 143 2.792781e+11 240517.80 *
##       13) Gr_Liv_Area>=1920.5 99 3.702480e+11 315693.70 *
##      7) Overall_Qual=Excellent,Very_Excellent 106 1.045983e+12 376243.90  
##       14) Gr_Liv_Area< 1956.5 47 8.921667e+10 324506.70 *
##       15) Gr_Liv_Area>=1956.5 59 7.307403e+11 417458.30  
##         30) Neighborhood=Edwards,Somerset,Veenker 8 8.904433e+10 269794.20 *
##         31) Neighborhood=College_Creek,Old_Town,Northridge_Heights,Timberland,Northridge,Stone_Brook 51 4.398958e+11 440621.30  
##           62) First_Flr_SF< 1829 28 9.425118e+10 391454.60 *
##           63) First_Flr_SF>=1829 23 1.955579e+11 500476.40 *