4.14 Mean/SEM plots

# calculate means, standard deviations,
# standard errors, and 95% confidence 
# intervals by rank
library(dplyr)
plotdata <- Salaries %>%
  group_by(rank) %>%
  summarize(n = n(),
         mean = mean(salary),
         sd = sd(salary),
         se = sd / sqrt(n),
         ci = qt(0.975, df = n - 1) * sd / 
        sqrt(n))





# plot the means and standard errors
ggplot(plotdata, 
       aes(x = rank, 
           y = mean, 
           group = 1)) +
  geom_point(size = 3) +
  geom_line() +
  geom_errorbar(aes(ymin = mean - se, 
                    ymax = mean + se), 
                width = .1)

# calculate means and standard errors by rank and sex
plotdata <- Salaries %>%
  group_by(rank, sex) %>%
  summarize(n = n(),
            mean = mean(salary),
            sd = sd(salary),
            se = sd/sqrt(n))
## `summarise()` has grouped output by 'rank'. You can override using the
## `.groups` argument.
# plot the means and standard errors by sex
ggplot(plotdata, aes(x = rank,
                     y = mean, 
                     group=sex, 
                     color=sex)) +
  geom_point(size = 3) +
  geom_line(size = 1) +
  geom_errorbar(aes(ymin  =mean - se, 
                    ymax = mean+se), 
                width = .1)

  • Unfortunately, the error bars overlap. We can dodge the horizontal positions a bit to overcome this.
# plot the means and standard errors by sex (dodged)
pd <- position_dodge(0.2)
ggplot(plotdata, 
       aes(x = factor(rank, 
                      labels = c("Assistant\nProfessor",
                                 "Associate\nProfessor",
                                 "Full\nProfessor")), 
           y = mean, 
           group=sex, 
           color=sex)) +
  geom_point(position=pd, 
             size = 3) +
  geom_line(position = pd, 
            size = 1) +
  geom_errorbar(aes(ymin = mean - se, 
                    ymax = mean + se), 
                width = .1, 
                position = pd, 
                size = 1) +
  scale_y_continuous(label = scales::dollar) +
  scale_color_brewer(palette="Set1") +
  theme_minimal() +
  labs(title = "Mean salary by rank and sex",
       subtitle = "(mean +/- standard error)",
       x = "", 
       y = "",
       color = "Gender")