Topic 6 Graphics in R (Part-II)
6.1 Interactive Plots using Plotly
- Here we will use the COVID-19 data provided by John Hopkins University
library(ggplot2)
library(maps)
library(ggthemes)
library(plotly)
library(scales)
library(dplyr)
library(tidyr)
# download data
= read.csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv",
d1 check.names = FALSE)
# head(d1)
- Data is in wide format let’s convert in long format for visualisation
# rename Provice/State and Country columns
colnames(d1)[1:2] = c("State", "Country")
.2 = pivot_longer(d1, cols = -c(State, Country, Lat, Long), names_to = "Date",
d1values_to = "Cases")
# convert dates
.2$Date = as.Date(d1.2$Date, format = "%m/%e/%y") d1
- Aggregate cases by day (dropping the state)
= aggregate(d1.2$Cases, by = list(Lat = d1.2$Lat, Long = d1.2$Long, Country = d1.2$Country,
d2 Date = d1.2$Date), FUN = sum)
colnames(d2)[5] = "Cases"
# reorder
= d2[, c(4, 1, 2, 3, 5)] d2
- Let’s find top 10 by case numbers
- Using aggregate to find sum by country to find top 10
= aggregate(d2$Cases, by = list(Date = d2$Date, Country = d2$Country), FUN = sum)
top1 # select the last date to get overall total
= top1[top1$Date == "2021-08-11", ]
top10 # select top 10
= top10[order(-top10$x), ][1:10, ]
top10 # let's include Aus
= c(top10$Country, "Australia") top10_country
- Use ggplot to create a line chart 6.1
colnames(top1)[3] = "Cases"
= top1[top1$Country %in% c(as.character(top10_country)), ]
data_p
= ggplot(data = data_p, aes(Date, log(Cases), color = Country, group = Country)) +
p1 geom_line(stat = "identity", size = 1) + scale_x_date(labels = date_format("%m/%y"),
breaks = "2 months") + theme_wsj()
p1
data:image/s3,"s3://crabby-images/5705e/5705ea9ea21aea4485625752f38f0e8b9c2d158a" alt="Line Chart with Custom Theme"
Figure 6.1: Line Chart with Custom Theme
- Create custom color vector and a line chart with basic theme to convert to plotly 6.2
= c("slateblue1", "purple3", "turquoise2", "skyblue", "steelblue", "blue2",
myCol2 "navyblue", "orange", "tomato", "coral2", "palevioletred", "violetred", "red2",
"springgreen2", "yellowgreen", "palegreen4", "wheat2", "tan", "tan2", "tan3",
"brown", "grey70", "grey50", "grey30")
= ggplot(data_p, aes(Date, Cases, group = Country, color = Country)) + geom_line(size = 1.5) +
p2 geom_point(size = 1.5) + scale_colour_manual(values = myCol2, "Countries") +
geom_text(data = data_p[data_p$Date == max(data_p$Date), ], aes(x = as.Date(max(data_p$Date) +
4), label = Country), hjust = -0.01, nudge_y = 0.01, show.legend = FALSE) +
expand_limits(x = as.Date(c(min(data_p$Date), max(data_p$Date) + 5))) + scale_x_date(breaks = seq(as.Date(min(data_p$Date)),
as.Date(max(data_p$Date) + 5), by = "30 days"), date_labels = "%m/%y") + scale_y_continuous(labels = comma) +
theme_classic() + theme(axis.title = element_text(size = 15, face = "bold"))
p2
data:image/s3,"s3://crabby-images/9d8cb/9d8cb214b04589383451eaa2aec467973b6bc775" alt="Line Chart"
Figure 6.2: Line Chart
- Convert to plotly for interactive graphics 6.3
= ggplotly(p2)
fig_p2 fig_p2
Figure 6.3: Interactive Line Chart
6.2 Animation using gganimate
- We can also use the gganimate package to convert the image into a gif.
library(gganimate)
= p1 + transition_reveal(Date)
p1.anim = animate(p1.anim, fps = 10, start_pause = 2, end_pause = 5, rewind = FALSE,
anim_p1 width = 800, height = 1000)
anim_save(filename = "covid_cases_log_2021aug.gif", animation = anim_p1)
data:image/s3,"s3://crabby-images/5613f/5613f97da6d62201ec3b858411e96986bdc8a161" alt="Animated Graph"
Figure 6.4: Animated Graph
- We can also use further customisations to visualise other information on the plot
- The following example uses data from the 2021 COVID-19 spread in Australia
- The plot shows the number of Covid-19 cases along with the Date in the animation
- The data is the last 3 months data (accessed: 30-08-2021), downloaded from https://www.covid19data.com.au/states-and-territories
- The code below imports the data and selects NSW, VIC and ACT, then creates EMA and SMA using last 7 Days of data.
library(tidyverse) #using tidyverse here to easily create grouped statistics
= read.csv("data/Last 3 months.csv")
d_au_3m # the date column name doesnt look ok so lets rename it
colnames(d_au_3m)[1] = "Date"
# convert to date type
$Date = as.Date(d_au_3m$Date, format = "%d/%m/%y")
d_au_3m= d_au_3m[, c(1:3, 9)]
d_au_3m = d_au_3m %>%
data_p2 pivot_longer(cols = -Date, names_to = "State", values_to = "Cases")
= data_p2 %>%
rolling1 arrange(Date, State, Cases) %>%
group_by(State) %>%
mutate(EMA = TTR::EMA(Cases, n = 7)) #add EMA
= rolling1 %>%
rolling1 arrange(Date, State, Cases) %>%
group_by(State) %>%
mutate(SMA = TTR::SMA(Cases, n = 7)) #add SMA
= rolling1[rolling1$Date > as.Date("2021-07-15"), ] #selecting from 15 July 2021
rolling1 $State = factor(rolling1$State, levels = c("NSW", "VIC", "ACT")) rolling1
- Create plots then animate
- Plot
- The plot first layer has points and lines for the EMA (can replace for SMA or plot both)
- The line has an arrow at the end
- The scale is changed to display the dates better and some changes are made to theme elements
- Animation
- A transition_manual is used to access current_frame, cumulative=TRUE keeps the previous data
- Title used elements from the ggtext to modify the colors of the group variables
- Notice the use of element_markdown() in the ggplot
library(ggthemes)
library(ggtext)
= ggplot(rolling1, aes(Date, Cases)) + geom_point(alpha = 0.7, aes(color = State,
p2 group = seq_along(Date), frame = Cases)) + geom_path(aes(y = EMA, color = State),
arrow = arrow(ends = "last", type = "closed", length = unit(0.05, "inches")),
size = 1.05, show.legend = FALSE)
= p2 + scale_x_date(breaks = c(seq(min(rolling1$Date), max(rolling1$Date), by = "5 days")),
p3 date_labels = "%d/%m") + scale_color_discrete(name = "") + labs(x = "Date", y = "Cases") +
theme_minimal() + theme(title = element_text(face = "bold"), legend.position = "none",
axis.title = element_text(size = 8), strip.text.x = element_text(face = "bold"),
axis.text.x = element_text(size = 5, face = "bold"), axis.text.y = element_text(size = 6,
face = "bold"), legend.title = element_text(size = 10), plot.subtitle = element_markdown()) +
labs(x = "Date", y = "Cases/EMA (7 Day)")
= p3 + transition_manual(Date, cumulative = TRUE) + ggtitle("NSW, VIC & ACT Daily Cases/EMA(7 Days) ",
p2.anim subtitle = "Date:{current_frame}<br><span style='color:#F8766D;'>NSW:{rolling1[rolling1$Date==as.Date(current_frame),3][2,1]} </span> | <span style='color:#00BA38;'>VIC:{rolling1[rolling1$Date==as.Date(current_frame),3][3,1]}</span> | <span style='color:#619CFF;'> ACT:{rolling1[rolling1$Date==as.Date(current_frame),3][1,1]}</span>")
= animate(p2.anim, fps = 8, start_pause = 1, end_pause = 30, detail = 2,
anim_p2 rewind = FALSE, width = 720, height = 720, res = 140, renderer = gifski_renderer())
anim_save(filename = "covid_aus_cases_EMA_aug30_.gif", animation = anim_p2)
data:image/s3,"s3://crabby-images/ef191/ef191d1eefb46d84f5cb8eeb147888ba1677ade5" alt="Animated Graph (COVID-19 AU)"
Figure 6.5: Animated Graph (COVID-19 AU)
6.3 Plot Maps
- We can also plot the data on a map 6.6
# take last day's data data from d2
= d2[d2$Date == max(d2$Date), ]
d2 = map_data("world")
world = ggplot() + geom_polygon(data = world, aes(color = region, x = long, y = lat,
w1 group = group), fill = "white") + theme_map() + theme(legend.position = "none") +
scale_fill_brewer(palette = "Blues")
= w1 + geom_point(aes(x = Long, y = Lat, size = Cases, colour = Country), data = d2) +
map1 labs(title = paste("COVID-19 Cases as of ", as.character(unique(d2$Date))))
# static version
map1
data:image/s3,"s3://crabby-images/ccf8c/ccf8c820d254ffd1d58ec61ae5213d33a2d86f2d" alt="Map"
Figure 6.6: Map
- Interactive version using plotly 6.7
# interactive version
= ggplotly(map1, originalData = FALSE, tooltip = c("colour", "size"), width = 750)
map2 map2
Figure 6.7: Interactive Map
# To save htmlwidgets::saveWidget(map2,file='map2.html')