The plots below are based on the same data and convey the same mesesage. The first plot was made in the exploratory analysis and the second plot was make to communicate the results. The exploratory plot took two lines of code while the communicatory plot took ~30 lines of code.
library(tidyverse)
# hour <- read_csv('https://raw.githubusercontent.com/idc9/stor390/master/data/bikes_2011.csv')
hour <- read_csv('/Users/iaincarmichael/Dropbox/stor390/data/bikes_2011.csv')
ggplot(hour) +
geom_point(aes(x=hr, y=cnt, color=factor(workingday)))
by_hour <- hour %>%
mutate(workingday = ifelse(workingday == 1, 'yes', 'no')) %>%
group_by(workingday, hr) %>%
summarise(cnt=median(cnt))
morning_peak <- filter(by_hour, hr == 8, workingday=='yes')$cnt
afternoon_peak <- filter(by_hour, hr == 17, workingday=='yes')$cnt
ggplot(by_hour) +
# geom_point(aes(x=hr, y=cnt, color=workingday, shape=workingday), size=3) + # plot points
geom_line(aes(x=hr, y=cnt, color=workingday, linetype=workingday), size=1) +
theme(panel.background = element_blank(), # backgound/axis
axis.line = element_line(colour = "black", size=.1),
legend.key = element_rect(fill = "white")) +
scale_x_continuous(breaks=c(6, 12, 18, 23), # axis
limits=c(0,23),
expand=c(0, 1)) +
scale_y_continuous(breaks=c(0, 200, 400),
limits=c(0, 450),
expand=c(0,0)) +
labs(x='Hour',
y='Rider Count (median)',
title='Rental patterns differ between working vs. non-working days') +
guides(color=guide_legend(title="Working Day"),
shape=guide_legend(title="Working Day"),
linetype=guide_legend(title="Working Day")) +
geom_text(aes(x=12, y=430, label='rush hour')) +
geom_segment(aes(x=10.3, y=427, xend=8.5, yend=morning_peak), size=.1,
arrow=arrow(length=unit(0.03, 'npc')))+
geom_segment(aes(x=13.5, y=427, xend=16.5, yend=afternoon_peak), size=.1,
arrow=arrow(length=unit(0.03, 'npc')))