18.3 Hierarchical Poisson & Negative Binomial regression

# Load data
data(airbnb)

# Number of listings
nrow(airbnb)

## [1] 1561

# Number of neighborhoods
airbnb %>% 
  summarize(nlevels(neighborhood))

##   nlevels(neighborhood)
## 1                    43

 # nlevels(neighborhood)

18.3.1 Model building & simulation

ggplot(airbnb, aes(x = reviews)) + 
  geom_histogram(color = "white", breaks = seq(0, 200, by = 10))

ggplot(airbnb, aes(y = reviews, x = rating)) + 
  geom_jitter()

ggplot(airbnb, aes(y = reviews, x = room_type)) + 
  geom_violin()

airbnb %>% 
  filter(neighborhood %in% 
           c("Albany Park", "East Garfield Park", "The Loop")) %>% 
  ggplot(aes(y = reviews, x = rating, color = room_type)) + 
    geom_jitter() + 
    facet_wrap(~ neighborhood)

airbnb_model_1 <- stan_glmer(
  reviews ~ rating + room_type + (1 | neighborhood), 
  data = airbnb, family = poisson,
  prior_intercept = normal(3, 2.5, autoscale = TRUE),
  prior = normal(0, 2.5, autoscale = TRUE), 
  prior_covariance = decov(reg = 1, conc = 1, shape = 1, scale = 1),
  chains = 4, iter = 5000*2, seed = 84735
)

pp_check(airbnb_model_1) + 
  xlim(0, 200) + 
  xlab("reviews")

airbnb_model_2 <- stan_glmer(
  reviews ~ rating + room_type + (1 | neighborhood), 
  data = airbnb, family = neg_binomial_2,
  prior_intercept = normal(3, 2.5, autoscale = TRUE),
  prior = normal(0, 2.5, autoscale = TRUE), 
  prior_aux = exponential(1, autoscale = TRUE),
  prior_covariance = decov(reg = 1, conc = 1, shape = 1, scale = 1),
  chains = 4, iter = 5000*2, seed = 84735
)

pp_check(airbnb_model_2) + 
  xlim(0, 200) + 
  xlab("reviews")

18.3.2 Posterior analysis

tidy(airbnb_model_2, effects = "fixed", conf.int = TRUE, conf.level = 0.80)

readRDS("data/ch18/airbnb2_tidy1.rds")

tidy(airbnb_model_2, effects = "ran_vals", 
     conf.int = TRUE, conf.level = 0.80) %>% 
  select(level, estimate, conf.low, conf.high) %>% 
  filter(level %in% c("Albany_Park", "East_Garfield_Park", "The_Loop"))

readRDS("data/ch18/airbnb2_tidy2.rds")

Posterior predictions of reviews

set.seed(84735)
predicted_reviews <- posterior_predict(
  airbnb_model_2, 
  newdata = data.frame(
    rating = rep(5, 3), 
    room_type = rep("Entire home/apt", 3), 
    neighborhood = c("Albany Park", "East Garfield Park", "The Loop")))

mcmc_areas(predicted_reviews, prob = 0.8) +
  ggplot2::scale_y_discrete(
    labels = c("Albany Park", "East Garfield Park", "The Loop")) + 
  xlim(0, 150) + 
  xlab("reviews")

## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

## Warning: Removed 3 rows containing missing values (`geom_segment()`).

18.3.3 Model evaluation

set.seed(84735)
pred <- prediction_summary(model = airbnb_model_2, data = airbnb)

pred

##        mae mae_scaled within_50 within_95
## 1 17.66115  0.6740938 0.5188981 0.9577194