animated-word-cloud.R
Code used to make the graph found within "Animated Word Clouds"
library(tidyverse)
library(dplyr)
library(gganimate)
library(ggwordcloud)
library(stopwords)
library(tokenizers)
library(ggplot2)
headlines <- read_csv("headlines.csv")
nyt_headlines <- headlines |>
filter(Publication == "New York Times")
nyt_headlines$Date <- as.Date(as.character(nyt_headlines$Date), format="%Y%m%d")
nyt_headlines <- nyt_headlines |>
filter(!is.na(Headline)) |>
mutate(Year_Month = as.Date(paste0(format(Date, "%Y"), format(Date, "%m"), "01"), format="%Y%m%d")) |>
filter(Year_Month < "2023-01-01")
nyt_headlines <- nyt_headlines |>
group_by(Year_Month) |>
summarize(
Headlines = paste(Headline, collapse = " "),
Words = unlist(tokenize_words(Headlines, stopwords = stopwords("en")))
)
nyt_headlines <- nyt_headlines[-2]
word_count <- nyt_headlines |>
group_by(Words, Year_Month) |>
summarize(
count = n()
)
word_count_five <- word_count |>
filter(!(tolower(Words) %in% c("new", "york", "times", "nyt"))) |>
filter(!(tolower(Words) %in% c("min", "read", "review", "editorial", "now",
"comments", "getty", "images", "theater",
"movie", "slide", "show", "television", "books",
"op","ed"))) |>
filter(!grepl("[^A-Za-z0-9 ]", Words)) |>
filter(!(Words == "")) |>
filter(nchar(Words) > 1) |>
group_by(Year_Month) |>
arrange(desc(count)) |>
slice_head(n = 5)
word_count_one <- word_count |>
filter(!(tolower(Words) %in% c("new", "york", "times", "nyt"))) |>
filter(!(tolower(Words) %in% c("min", "read", "review", "editorial", "now",
"comments", "getty", "images", "theater",
"movie", "slide", "show", "television", "books",
"op","ed"))) |>
filter(!grepl("[^A-Za-z0-9 ]", Words)) |>
filter(!(Words == "")) |>
filter(nchar(Words) > 1) |>
group_by(Year_Month) |>
arrange(desc(count)) |>
slice_head(n = 1)
set.seed(52)
word_cloud <- word_count_five |>
ggplot(aes(label=Words, size=count))+
geom_text_wordcloud()+
theme_minimal()+
scale_size_area(max_size = 20)+
transition_time(Year_Month)+
labs(subtitle = "{paste(month.name[as.numeric(format(as.Date(frame_time),\"%m\"))],
format(as.Date(frame_time),\"%Y\"))}",
title = "5 Most-Used Words In NYT Headlines Each Month (2007-2022)",
caption="Nikhil Chinchalkar for Princeton University | New York Times | 2024")+
theme(plot.title = ggtext::element_markdown(size=16, hjust=0.5, face="bold"),
plot.subtitle = ggtext::element_markdown(size=20, hjust=0.5), face="bold")
word_count_one$Year_Month <- as.Date(word_count_one$Year_Month)
year_log <- function(date, index){
print(date)
plot <- ggplot(word_count_one)+
table_axis(date)+
table_titles(date)+
geom_text(x=year(date),y=12-month(date), label=word_count_one$Words[which(word_count_one$Year_Month == floor_date(date, "month"))], fontface="plain")+
xlim(2005,2023)+
ylim(-1,13)+
theme_void()
ggsave(plot=plot, file=paste0("image_sequence/",formatC(index, width = 3, format = "d", flag = "0"),".png"), width=15, height=7, units="in",dpi=150)
}
table_axis <- function(date){
label <- geom_text(y=0,x=0,label="")
if (year(date) == 2007){
label <- geom_text(y=12-month(date),x=2006,label=months(date), fontface="bold", color="gray")
}
if(month(date) == 1){
if(year(date) == 2007){
label <- c(label,geom_text(y=12, x=year(date), label=year(date), fontface="bold", color="gray"))
}
else{
label <- geom_text(y=12, x=year(date), label=year(date), fontface="bold", color="gray")
}
}
return(label)
}
table_titles <- function(date){
if(date == as.Date("2007-01-01")){
return(geom_text(y=13, x=2014.5, label="Most-Used Word In NYT Headlines Each Month", fontface="bold", color="black", size=8))
}
}
combined_year_log <- function(start_date){
end_date <- as.Date("2022-12-01")
curr_date <- start_date
index <- 0
while(curr_date <= end_date){
year_log(curr_date, index)
index <- index + 1
curr_date <- curr_date %m+% months(1)
}
}
system.time(combined_year_log(as.Date("2007-01-01")))
png_files <- sort(list.files("image_sequence", pattern = ".*png$", full.names = TRUE))
gifski::gifski(png_files, gif_file = "animation.gif", width = 1500*1.5, height = 700*1.5, delay = 1)
mgif_word_log <- magick::image_read("animation.gif")
mgif_word_cloud <- magick::image_read(animate(word_cloud, fps = 4, duration = 48, height = 7,
width = 7, units = "in", res = 150))
new_gif <- magick::image_append(c(mgif_word_cloud[1], mgif_word_log[1]))
for(i in 2:192){
combined <- magick::image_append(c(mgif_word_cloud[i], mgif_word_log[i]))
new_gif <- c(new_gif, combined)
}
for(x in 1:30){
combined <- magick::image_append(c(mgif_word_cloud[192], mgif_word_log[192]))
new_gif <- c(new_gif, combined)
}
magick::image_write(new_gif, path = "final.gif", format = "gif")
new_gif <- magick::image_read("final.gif")
new_gif <- magick::image_animate(new_gif, fps = 4)
magick::image_write(new_gif, path = "final.gif", format = "gif")
Last updated