# calculate means, standard deviations,# standard errors, and 95% confidence # intervals by ranklibrary(dplyr)plotdata <- Salaries %>%group_by(rank) %>%summarize(n =n(),mean =mean(salary),sd =sd(salary),se = sd /sqrt(n),ci =qt(0.975, df = n -1) * sd /sqrt(n))# plot the means and standard errorsggplot(plotdata, aes(x = rank, y = mean, group =1)) +geom_point(size =3) +geom_line() +geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = .1)
# calculate means and standard errors by rank and sexplotdata <- Salaries %>%group_by(rank, sex) %>%summarize(n =n(),mean =mean(salary),sd =sd(salary),se = sd/sqrt(n))
## `summarise()` has grouped output by 'rank'. You can override using the
## `.groups` argument.
# plot the means and standard errors by sexggplot(plotdata, aes(x = rank,y = mean, group=sex, color=sex)) +geom_point(size =3) +geom_line(size =1) +geom_errorbar(aes(ymin =mean - se, ymax = mean+se), width = .1)
Unfortunately, the error bars overlap. We can dodge the horizontal positions a bit to overcome this.
# plot the means and standard errors by sex (dodged)pd <-position_dodge(0.2)ggplot(plotdata, aes(x =factor(rank, labels =c("Assistant\nProfessor","Associate\nProfessor","Full\nProfessor")), y = mean, group=sex, color=sex)) +geom_point(position=pd, size =3) +geom_line(position = pd, size =1) +geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = .1, position = pd, size =1) +scale_y_continuous(label = scales::dollar) +scale_color_brewer(palette="Set1") +theme_minimal() +labs(title ="Mean salary by rank and sex",subtitle ="(mean +/- standard error)",x ="", y ="",color ="Gender")