Student graph examples from bonus exercise


Authors: Andrea Geraci, Joost Jonker, Friso Schutte, Lisa Verlare

Data Set: txhousing

library(harrypotter)

txhousing_1 <- txhousing %>% 
  filter(city == "Austin"| city == "Dallas"| city == "San Antonio")

ggplot(txhousing_1) +
  geom_line(aes(x = date, y = sales, color = city)) +
  labs(x = "Date", 
       y = "Number of Sales", 
       title = "Housing Market in U.S., Texas in 2000-2015",
       subtitle = "Review of the Largest Populated Cities",
       color = "City") +  
  scale_color_hp_d(option = "Ravenclaw") +
  theme_minimal() +
  annotate(geom = "rect", xmin = 2002, xmax = 2006.5, ymin = 0, ymax = 7500, 
           fill = "red", alpha = 0.15) +
  annotate(geom = "text", x = 2004.3, y = 7000, size = 3.5, 
           label = "U.S. Housing \n Bubble")


Authors: Sevim, Alex, Fabian, Muharrem

Data Set: midwest

library(ggrepel)

ggplot(data = midwest,
       aes(x = percollege,
           y = percbelowpoverty,
           )) + 
  geom_point(alpha = 0.4, aes(size = percblack, col = as.factor(inmetro))) +
  labs(x = "Percentage college educated",
    y = "Percentage living below poverty",
    color = "Living in metro area",
    size = "percentage of black people",
    title = "Poverty rates againt percentage college educated",
    subtitle = "Special attention for counties with high poverty 
    rates and outliers") +
  theme_minimal() +
  annotate(geom = "rect", xmin = 0, xmax = 50, ymin = 25, ymax = 50,
           fill = "grey", alpha = 0.3) +
  annotate(geom = "text", x = 25, y = 40, label = "Counties with over 25% below poverty") +
  geom_text_repel(data = subset(midwest, percbelowpoverty > 24 & 
                                  percollege > 20),
                    mapping = aes(label = county), size = 2)


Authors: Kimberly Wu, Desislava Rizova, Caro Evers

Data Set: diamonds

ggplot(diamonds, mapping = aes(x = price, fill = cut)) + 
  geom_histogram(color = "black") + 
  scale_x_log10() + 
  labs(title = "Number of Cut According to Price", x = "Price", y = "Count")


Authors: Alexander Jacobs, Jacob Hemming, Dizzy van Duijn

Data Set: economics

library(prettydoc)
library(maps)
library(usmap)
library(RColorBrewer)

#First, we use the package usmap and take a subset of the states of which we have
# data in our dataset.
state <- map_data("state")
usa_counties = map_data("county")

midwest1 <- subset(state, region %in% c("indiana", "ohio", "illinois", "wisconsin", "michigan"))

midwest_counties = subset(usa_counties, region %in% c("indiana", "ohio", "illinois", "wisconsin", "michigan")) 

# Concatenate the dataframe containing the information which we are interested in
# and the dataframe used for the map.
names(midwest_counties)[names(midwest_counties)=="subregion"] <- "county"
midwest_counties$county = toupper(midwest_counties$county)

total <- inner_join(midwest_counties, ggplot2::midwest, by = "county")

#Map the states we have data on, and factor out the percentage of college educated.
midwest_map1 <- ggplot(data=midwest1, mapping=aes(x=long, y=lat, group=group)) + 
  coord_fixed(1.3) + 
  geom_polygon(color="black", fill="gray") + 
  geom_polygon(data=total, aes(fill= percollege), color="white") + 
  geom_polygon(color="black", fill=NA) + 
  ggtitle('Midwest Map with Counties') + 
  theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(),
        axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank(),
        legend.title=element_text(size=8))+
  labs(title = "Percent College Educated in Midwest Counties, USA",
       subtitle = "Includes the States Indiana, Ohio, Illinois, Wisconsin and Michigan ",
       fill = "% of population 
college educated")
midwest_map1 + scale_fill_continuous(type = "viridis", trans="log10")

midwest_map2 <- ggplot(data=midwest1, mapping=aes(x=long, y=lat, group=group)) + 
  coord_fixed(1.3) +
  geom_polygon(color="black", fill="gray") + 
  geom_polygon(data=total, aes(fill = popdensity),  color="white") + 
  geom_polygon(color="black", fill=NA) + 
  ggtitle('Midwest Map with Counties') + 
  theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(),
        axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank(),
        legend.title=element_text(size=8))+
  labs(title = "Population Density in Midwest Counties",
       subtitle = "Includes the States Indiana, Ohio, Illinois, Wisconsin and Michigan in the USA ",
       fill = "density (people per square mile)")
  


midwest_map2 + scale_fill_viridis_c(option = "magma", trans = "log10")

midwest_map3 <- ggplot(data=midwest1, mapping=aes(x=long, y=lat, group=group)) + 
  coord_fixed(1.3) +
  geom_polygon(color="black", fill="gray") + 
  geom_polygon(data=total, aes(fill = popadults),  color="white") + 
  geom_polygon(color="black", fill=NA) + 
  ggtitle('Midwest Map with Counties') + 
  theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(),
        axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank(), legend.title=element_text(size=8))+
  labs(title = "Number of Adults in Midwest Counties",
       subtitle = "Includes the States Indiana, Ohio, Illinois, Wisconsin and Michigan in the USA ",
       fill = "Number of adults")

midwest_map3 + scale_fill_viridis_c(option = "magma", trans="log10")


Authors: Victoria Barocsai and Sterre Rood

Data Set: mpg

ggplot(data = mpg, 
       mapping = aes(y = cty,
                     x = displ, color = cyl)) +
  geom_point() +
  geom_hline(yintercept = 20, alpha = 0.2) + #adding a line
  theme_minimal() +
  labs(title = "Engine displacement in litres compared to the city miles per gallon 
and the number of cylinders", x = 'Engine displacement in litres', y = 'city miles per gallon', color = 'number of cylinders', subtitle = 'separated for each car type') +
  facet_wrap(~ class)


Authors: Guido Ammann, Adam Mehdi Arafan, Dennis Vriend

Data Set: presidential

# create and append a new column measuring the time in office for each president
presidential$duration <- as.Date(presidential$end %>% as.character(), format="%Y-%m-%d")-
                  as.Date(presidential$start %>% as.character(), format="%Y-%m-%d")

# bush senior and junior are labeled the same, change
presidential[8,1] <- 'Bush Senior (H.W)'
presidential[10,1] <- 'Bush Junior (W.)'

# order
presidential <- presidential[order(presidential$start),]

# visualize
plot <- ggplot(presidential, aes(x = duration, y = reorder(name, start), ## the "reorder" argument is passed to the y axis to enable the display of presidential names in an ascending fashion depending on the start of the mandate
                                 
fill = party)) + geom_col() + scale_fill_manual(values = c("#0066FFFF","#FF0000FF")) ## we manually encode the colors of blue and red into the plot as the default incorrectly maps red to democrats and blue to republicans. With the scale_fill_manual() argument we can manually encode colors to represent the true brand of each party

plot + labs(title = "56 years of US presidents",
    x = "Time in office (in days)", y = "President in Office", fill = "President's Party")