9.5 - AMES Housing example
<- AmesHousing::make_ames()
ames
# relocate `Sale_Price`
<- ames %>%
ames relocate(Sale_Price, .before = everything())
# Stratified sampling with the rsample package
set.seed(123) # for reproducibility
<- initial_split(ames, prop = 0.7, strata = "Sale_Price")
split <- training(split)
ames_train <- testing(split) ames_test
We can fit a regression tree using rpart
and then visualize it using rpart.plot
.
<- rpart(
ames_dt1 formula = Sale_Price ~ .,
data = ames_train,
method = "anova"
)
ames_dt1
## n= 2049
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 2049 1.321981e+13 180922.60
## 2) Overall_Qual=Very_Poor,Poor,Fair,Below_Average,Average,Above_Average,Good 1701 4.109012e+12 155897.00
## 4) Neighborhood=North_Ames,Old_Town,Edwards,Sawyer,Mitchell,Brookside,Iowa_DOT_and_Rail_Road,South_and_West_of_Iowa_State_University,Meadow_Village,Briardale,Northpark_Villa,Blueste,Landmark 1023 1.370671e+12 131815.70
## 8) First_Flr_SF< 1089.5 661 5.643745e+11 119572.90
## 16) Overall_Qual=Very_Poor,Poor,Fair,Below_Average 149 1.016868e+11 90363.56 *
## 17) Overall_Qual=Average,Above_Average,Good 512 2.985678e+11 128073.30 *
## 9) First_Flr_SF>=1089.5 362 5.263151e+11 154170.60 *
## 5) Neighborhood=College_Creek,Somerset,Northridge_Heights,Gilbert,Northwest_Ames,Sawyer_West,Crawford,Timberland,Northridge,Stone_Brook,Clear_Creek,Bloomington_Heights,Veenker,Green_Hills 678 1.249971e+12 192232.10
## 10) Gr_Liv_Area< 1725.5 484 5.392924e+11 177594.80
## 20) Total_Bsmt_SF< 1295.5 342 2.299978e+11 166044.60 *
## 21) Total_Bsmt_SF>=1295.5 142 1.537824e+11 205413.00 *
## 11) Gr_Liv_Area>=1725.5 194 3.482733e+11 228749.90 *
## 3) Overall_Qual=Very_Good,Excellent,Very_Excellent 348 2.838371e+12 303245.90
## 6) Overall_Qual=Very_Good 242 9.801339e+11 271271.60
## 12) Gr_Liv_Area< 1920.5 143 2.792781e+11 240517.80 *
## 13) Gr_Liv_Area>=1920.5 99 3.702480e+11 315693.70 *
## 7) Overall_Qual=Excellent,Very_Excellent 106 1.045983e+12 376243.90
## 14) Gr_Liv_Area< 1956.5 47 8.921667e+10 324506.70 *
## 15) Gr_Liv_Area>=1956.5 59 7.307403e+11 417458.30
## 30) Neighborhood=Edwards,Somerset,Veenker 8 8.904433e+10 269794.20 *
## 31) Neighborhood=College_Creek,Old_Town,Northridge_Heights,Timberland,Northridge,Stone_Brook 51 4.398958e+11 440621.30
## 62) First_Flr_SF< 1829 28 9.425118e+10 391454.60 *
## 63) First_Flr_SF>=1829 23 1.955579e+11 500476.40 *