library(RCurl)
library(reshape)
library(htmltab)
library(ggplot2)
library(stringr)
library(scales)
#get the table from the url
theurl <- getURL("https://en.wikipedia.org/wiki/Nationwide_opinion_polling_for_the_United_States_presidential_election,_2012", ssl.verifyPeer=FALSE)
table <- htmltab(theurl, which=2)
df = table[2:nrow(table), c(2,3,4,6)]
names(df) = c("Date", "Obama", "Romney", "Size")
df$Date = sub("[0-9]+–([0-9]+)", "\\1", df$Date)
df$Date = sub(".*–", "", df$Date)
df$Date = sub(".*-", "", df$Date)
df$Date = trimws(df$Date)
df$Date = as.Date(df$Date, format="%B %d, %Y")
for (i in c(2:3)) {
df[[i]] = as.numeric(sub("%", "", df[[i]]))/100
}
df$Size = sub(",", "", df$Size)
df$Size = sub("≤", "", df$Size)
df$Size = sub("([0-9]+).*$", "\\1", df$Size)
df$Size = as.numeric(trimws(df$Size))
df$Error = 1/sqrt(df$Size)
mdata <- melt(df, id=c("Date", "Error", "Size"))
names(mdata)[4:5] = c("Candidate", "Support")
colors = c("#3333FF", "#FF3333")
labels = c("Obama", "Romney")
results = mdata
election = data.frame(Date = rep(as.Date("2012/11/6"), 2),
Support = c(0.511, 0.472),
Candidate = labels,
Error = c(1,1))
d = ggplot(results, aes(x=Date, y=Support, colour=Candidate, size=1/Error, weight=1/Error)) +
geom_point(alpha=0.7) +
geom_smooth(span=0.8, show.legend=F) +
scale_colour_manual(values = colors) +
labs(title="Nationwide opinion polling for the 2012 U.S. presidential election") +
scale_size_area(max_size=4,
labels=function(x) x^2,
name="Sample Size") +
scale_y_continuous(breaks=seq(0,1,0.05), minor_breaks=seq(0,1,0.01), labels=percent,
limits=c(0.35, 0.55)) +
scale_x_date(labels=date_format("%b %d"),
breaks=c(seq(as.Date("2012/9/11"), as.Date("2012/11/6"), "week")),
minor_breaks=seq(as.Date("2012/9/10"), as.Date("2012/11/6"), "day"),
limits=c(as.Date("2012/9/10"), as.Date("2012/11/6"))) +
theme(panel.grid.minor=element_line(size=0.2),
panel.grid.major=element_line(size=0.8)) +
geom_point(data=election, size=3, shape=5, show.legend=F) +
geom_point(data=election, size=2, show.legend=F) +
geom_text(data=election, show.legend=F,
aes(label=c("51.1", "47.2")),
size=3, hjust=-0.2, vjust=-0.4, color="#000000")
#save plot as "us2012.svg"
svg(filename="us2012.svg",
width=9,
height=4,
pointsize=12,
bg="transparent")
d
dev.off()