Jump to content

User:Wizmut

From Wikipedia, the free encyclopedia

Below is some source code to assist in making tables and graphs.

The language I use is the R (programming language). I sometimes pull data directly from a url, but will download large files or files not directly downloadable.

After I have finalized a csv file, I do some touching up in Excel/Sheets/Libre to get the formatting just right. I then use a tool[1] to convert spreadsheet data into wikitext.

Energy

[edit]

US electricity

[edit]

This code pulls data directly from the Energy Information Administration.

Used in these articles:

US state table code

[edit]
R code for US state electricity/carbon data
# Attempt to set working directory
# setwd(getSrcDirectory()[1]) # if running entire file
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # if running section
options(scipen=999) # don't use scientific notation
library(dplyr)
library(tidyr)

######################
#### import data #####
######################

## state abbreviations
state_ST = read.csv("https://worldpopulationreview.com/static/states/abbr-name.csv",
                    header=F, col.names=c("ST","State"))
#fix a capitalized "of"
state_ST[state_ST$ST == "DC","State"] = "District of Columbia"

## EIA data https://www.eia.gov/electricity/data/state/
# capacity
httr::GET("https://www.eia.gov/electricity/data/state/existcapacity_annual.xlsx", 
          httr::write_disk(cap <- tempfile(fileext = ".xlsx")))

state_cap = readxl::read_excel(cap,skip=1) %>%
  filter(Year == 2022, `Producer Type` == "Total Electric Power Industry") %>%
  select(c(2,4,8)) %>%
  setNames(nm = c("ST","Source","Capacity.MW")) %>%
  mutate(Source = stringi::stri_replace_all_fixed(Source, "All Sources", "Total"))
# generation
httr::GET("https://www.eia.gov/electricity/data/state/annual_generation_state.xls", 
          httr::write_disk(gen <- tempfile(fileext = ".xls")))
state_gen = readxl::read_excel(gen,skip=1) %>%
  filter(YEAR == 2022, `TYPE OF PRODUCER` == "Total Electric Power Industry") %>%
  select(c(2,4,5)) %>%
  setNames(nm = c("ST","Source","Generation.MWh")) %>%
  mutate(Source = stringi::stri_replace_all_fixed(Source, " Conventional", ""))
# carbon
httr::GET("https://www.eia.gov/electricity/data/state/emission_annual.xlsx", 
          httr::write_disk(carbon <- tempfile(fileext = ".xlsx")))
state_carbon = readxl::read_excel(carbon,skip=0) %>%
  filter(Year == 2022, `Energy Source` == "All Sources",
         `Producer Type` == "Total Electric Power Industry") %>%
  inner_join(state_ST,c("State"="ST")) %>%
  select(c(8,5)) %>%
  setNames(nm = c("State","Carbon.MT"))
state_carbon = state_carbon %>%
  summarise(State = "United States", across(where(is.numeric), sum)) %>%
  bind_rows(state_carbon, .)

rm(cap,gen,carbon)

##############################
#### combine all the data ####
##############################

target = c("Natural Gas", "Other Gases","Hydroelectric","Pumped Storage","Wood and Wood Derived Fuels","Other Biomass","Petroleum","Solar Thermal and Photovoltaic")
replacements = c("Gas","Gas","Hydro","Hydro","Biomass","Biomass","Oil","Solar")
state_elec = state_cap %>% 
             inner_join(state_gen) %>%
             inner_join(state_ST,c("ST"="ST")) %>%
             mutate(Source = stringi::stri_replace_all_fixed(Source, target, replacements, vectorize_all=F)) %>%
             tidyr::pivot_longer(cols = c(Capacity.MW, Generation.MWh),
                                   names_to = "Category",
                                   values_to = "Value") %>%
             group_by(Source,State,Category) %>%
             summarize(Value = sum(Value)) %>% ungroup()
rm(target,replacements)

################
#### tables ####
################

## by state and source (TWh)
state_TWh = state_elec %>%
  filter(Category == "Generation.MWh", Source != "Other") %>%
  mutate(Value = Value/1000^2) %>% # turn figures into terawatt-hours
  # mutate(Value = round(Value,3)) %>% # optionally round everything off
  select(c("State","Source","Value")) %>%
  tidyr::pivot_wider(names_from = Source, values_from = Value) %>%
  mutate_all(~replace(., is.na(.), 0)) %>%
  bind_rows(summarize(.,
                      across(where(is.numeric), sum),
                      across(where(is.character), ~"United States"))) %>%
  select(c(1,10,4,3,7,11,6,9,2,8,5)) %>%
  arrange(desc(Total))
write.csv(state_TWh, "state_TWh_by_source_2022.csv", row.names=F)

## by state and source (percent)
state_Pct = state_TWh %>%
  select(-Total) %>%
  mutate(across(where(is.numeric), ~. / state_TWh$Total)) %>%
  mutate(Pct.of.US = state_TWh$Total / max(state_TWh$Total)) %>%
  select(c(1,11,2:10)) %>%
  mutate(across(where(is.numeric), ~scales::label_percent(scale = 1)(round(.*100,0))))
write.csv(state_Pct, "state_Pct_by_source_2022.csv", row.names=F)

## percent renewable
renew_pct = state_TWh %>%
  mutate(Renew_TWh = Wind+Hydro+Solar+Biomass+Geothermal) %>%
  mutate(Renew_P = Renew_TWh/Total) %>%
  mutate(P_US_Renew = Renew_TWh/max(Renew_TWh)) %>%
  select(State,Total,Renew_P,Renew_TWh,P_US_Renew) %>%
  inner_join(state_carbon) %>%
  mutate(CO2_MT_TWh = Carbon.MT/Total/1000) %>%
  select(-Carbon.MT)
write.csv(renew_pct, "renew_pct_by_source_2022.csv", row.names=F)

US state map code

[edit]

The following code creates some treemaps and maps of the US that illustrate the data in the tables created above.

R code for US treemaps and maps
##################
#### treemaps ####
##################

library(ggplot2)
library(treemapify)

type.palette = c("#804000","#303030","#0fafff",
                 "#a82000","#009080","#d0b090",
                 "#403020","#d8d800","#d0d0d0")
names(type.palette) = c("Biomass","Coal","Nuclear",
                        "Geothermal","Hydro","Gas",
                        "Oil","Solar","Wind")

treemap.theme = theme(legend.position="none",
        panel.border = element_rect(color="#404040", fill=NA, linewidth=1),
        plot.title = element_text(size=40, family="serif",color='#e6e6e6',hjust=.5),
        plot.background = element_rect(fill = '#181818', color = '#404040'),
        plot.margin = unit(c(0,0,0,0), "cm"))

treemap_elec = state_elec %>%
  filter( !(Source %in% c("Total","Other")) ) %>%
  filter(Category == "Generation.MWh") %>%
  group_by(State) %>%
  mutate(State.total = sum(Value)/1000^2) %>%
  group_by(Source) %>%
  mutate(Label = if_else(Value == max(Value), Source, "")) %>% ungroup()

## by state and source
ggplot(treemap_elec, aes(area = Value, fill = Source, subgroup = State)) +
  scale_fill_manual("Energy source",values = type.palette) +
  
  geom_treemap(start="topleft") +
  geom_treemap_subgroup_border(start="topleft", color="#404040", size=2) +
  
  ggfx::with_outer_glow(geom_treemap_text(start="topleft", place="bottom", reflow=T, color="#c49000",
                                          label=treemap_elec$Label,
                                          size=2*treemap_elec$State.total^.3, min.size = 1),
                        sigma=1, expand=1) +
  ggfx::with_outer_glow(geom_treemap_subgroup_text(start="topleft", place="topleft", reflow=F, color="#d0d0d0",
                                                   size = 6*treemap_elec$State.total^.25),
                        sigma=1, expand=2) +
  treemap.theme
ggsave("US_generation_treemap_2022_state_source.png",width=8,height=5)

## by source and state
ggplot(treemap_elec, aes(area = Value, fill = Source, subgroup = Source, subgroup2 = State)) +
  scale_fill_manual("Energy source",values = type.palette) +

  geom_treemap(start="topleft") +
  geom_treemap_subgroup_border(start="topleft", color="#404040", size=2) +
  
  ggfx::with_outer_glow(geom_treemap_text(start="topleft", place="bottom", reflow=T, color="#c49000",
                                          label=treemap_elec$State,
                                          size=2*treemap_elec$State.total^.3, min.size = 1),
                        sigma=1, expand=1) +
  ggfx::with_outer_glow(geom_treemap_subgroup_text(start="topleft", place="topleft", reflow=F, color="#d0d0d0",
                                                   size = 32),
                        sigma=1, expand=2) +
  
  treemap.theme
ggsave("US_generation_treemap_2022_source_state.png",width=8,height=5)


##############
#### maps ####
##############

library(maps)
library(usmap)

map.theme = theme_void() +
            theme(plot.background = element_rect(fill = '#202020', color = '#404040'),
                  legend.position = c(.8,.04), legend.direction = "horizontal", legend.justification = c(1,0),
                  legend.key.size = unit(1, "cm"), legend.key.height = unit(.7, "cm"),
                  text = element_text(color="#e6e6e6", face="bold", size = 15))
# map.guide = guides(fill = guide_colorbar(title.hjust = .5, title.position = "top"))
# map.colors = c("#c0f0f0","#004800","#c0c030","#801010","#100010")


# find the largest source in each state
state_majors = state_twh %>%
  mutate(Major = colnames(.)[-c(1,2,12)][max.col(.[,-c(1,2,12)], ties.method = "first")]) %>%
  mutate(Renew = colnames(.)[c(6:9,11)][max.col(.[,c(6:9,11)], ties.method = "first")]) %>%
  select(State,Major,Renew,Salient)

state_majors %>% count(Major)
state_majors %>% count(Renew)

state_majors_map = state_majors %>%
  inner_join( us_map()[c(1,2,6,9)] ,c("State"="full"))

# map of major overall source
ggplot(state_majors_map, aes(fill=Major)) +
  geom_polygon(aes(x,y, group=group), color='white') +
  scale_fill_manual(values = type.palette) +
  guides(fill="none") + map.theme
ggsave("State_electric_major_overall_2022.png",width=8.5,height=6)

# map of major renewable source
ggplot(state_majors_map, aes(fill=Renew)) +
  geom_polygon(aes(x,y, group=group), color='white') +
  scale_fill_manual(values = type.palette) +
  guides(fill="none") + map.theme
ggsave("State_electric_major_renew_2022.png",width=8.5,height=6)

# map of most salient source
state_salient = state_twh %>%
  mutate(across(3:11, ~./Total * 100, .names = "{.col}")) %>%
  select(-Total) %>%
  mutate_if(is.numeric, funs(c(first(.), (. - first(.))[-1])) ) %>%
  mutate(Salient = colnames(.)[-c(1)][max.col(.[,-c(1)], ties.method = "first")]) %>%
  inner_join( us_map()[c(1,2,6,9)] ,c("State"="full"))

ggplot(state_salient, aes(fill=Salient)) +
  geom_polygon(aes(x,y, group=group), color='white') +
  scale_fill_manual(values = type.palette) +
  guides(fill="none") + map.theme
ggsave("State_electric_salient_2022.png",width=8.5,height=6)

World electricity data tables and graphs

[edit]

This code pulls data directly from the Ember (non-profit organisation).

Used in these articles:

Country table code

[edit]
R code for World electricity data
# Attempt to set working directory
# setwd(getSrcDirectory()[1]) # if running entire file
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # if running section
options(scipen=999) # don't use scientific notation
library(dplyr)

######################
#### import data #####
######################

# download from url - may be slow (30mb file)
# httr::GET("https://ember-climate.org/app/uploads/2022/07/yearly_full_release_long_format.csv", 
#           httr::write_disk(ember_world_full <- tempfile(fileext = ".csv")))
# ember_world_full = read.csv(ember_world_full)

# after manual download, read from local folder
ember_world_full = read.csv("yearly_full_release_long_format.csv")

#######################
#### clean up data ####
#######################

targets = c(" (the)", "Bosnia Herzegovina", "Brunei Darussalam", "Cabo Verde",
            "Congo (the Democratic Republic of the)", "Cote d'Ivoire", "Falkland Islands [Malvinas]",
            "Iran (Islamic Republic of)", "Korea (the Democratic People's Republic of)",
            "Lao People's Democratic Republic", "Palestine, State of", "Russian Federation",
            "Syrian Arab Republic", "Tanzania, the United Republic of", "Timor-Leste",
            "United States of America", "Venezuela (Bolivarian Republic of)", "Viet Nam",
            "Virgin Islands (British)", "Virgin Islands (U.S.)")
replacements = c("", "Bosnia and Herzegovina", "Brunei", "Cape Verde",
                 "DR Congo", "Ivory Coast", "Falkland Islands",
                 "Iran", "North Korea",
                 "Laos", "Palestine", "Russia",
                 "Syria", "Tanzania", "East Timor",
                 "United States", "Venezuela", "Vietnam",
                 "British Virgin Islands", "US Virgin Islands")
ember_world_trim = ember_world_full %>%
  filter(Year >= max(Year)-1) %>% # last two years
  filter(Area.type == "Country" | Area == "World") %>%
  filter(Subcategory != "Aggregate fuel") %>%
  select(Area, Area.type, Year, Category, Subcategory, Variable, Unit, Value, YoY...change) %>%
  mutate(Area = stringi::stri_replace_all_fixed(Area, targets, replacements, vectorize_all = F))
rm(targets,replacements)

######################
#### world tables ####
######################

# names for world tables
ember_world = ember_world_trim %>%
  filter(Year == max(Year)-1) %>% # most recent complete year
  mutate(Area = paste0("{{flagg|uspef|pref=Electricity sector in|",Area,"}}")) %>%
  mutate(Area = ifelse(stringr::str_detect(Area,"World"), "{{noflag|'''World'''}}", Area))

# overall table
world_overall = ember_world %>%
  filter(Variable %in% c("Total Generation","Demand","Demand per capita","Net Imports","CO2 intensity")) %>%
  select(Area, Variable, Value) %>%
  tidyr::pivot_wider(names_from = Variable, values_from = Value) %>%
  setNames(nm = c("Location","Consumption","Consumption_per_capita_MWh","Generation","Net_Imports","gCO2/kWh")) %>%
  select(Location,Generation,Consumption,Consumption_per_capita_MWh,Net_Imports,`gCO2/kWh`) %>%
  filter(Generation > 0) %>%
  arrange(desc(Generation))
write.csv(world_overall, "world_electricity_2021.csv", row.names=F)

# production by source (TWh)
world_twh = ember_world %>%
  filter(Unit == "TWh", Subcategory %in% c("Fuel","Total")) %>%
  select(Area, Variable, Value) %>%
  tidyr::pivot_wider(names_from = Variable, values_from = Value) %>%
  mutate(across(where(is.numeric), ~replace(., is.na(.), 0))) %>%
  setNames(nm = c("Location","Bio","Coal","Gas","Hydro","Nuclear","Oil","Geo","Solar","Wind","Total")) %>%
  select(Location,Total,Coal,Gas,Hydro,Nuclear,Wind,Solar,Oil,Bio,Geo) %>%
  filter(Total > 0) %>%
  arrange(desc(Total))
write.csv(world_twh, "world_twh_by_source_2021.csv", row.names=F)

# production by source (pct)
world_pct = ember_world %>%
  filter(Unit == "%" | Variable == "Total Generation") %>%
  select(Area, Variable, Value) %>%
  tidyr::pivot_wider(names_from = Variable, values_from = Value) %>%
  mutate(across(where(is.numeric), ~replace(., is.na(.), 0))) %>%
  setNames(nm = c("Location","Bio","Coal","Gas","Hydro","Nuclear","Oil","Geo","Solar","Wind","Total")) %>%
  select(Location,Total,Coal,Gas,Hydro,Nuclear,Wind,Solar,Oil,Bio,Geo) %>%
  filter(Total > 0) %>%
  arrange(desc(Total)) %>%
  mutate(Total = Total / max(Total))
write.csv(world_pct, "world_pct_by_source_2021.csv", row.names=F)

# renewable (TWh)
renew_twh = world_twh %>%
  mutate(Renew = Hydro + Wind + Solar + Bio + Geo) %>%
  select(Location,Renew,Hydro,Wind,Solar,Bio,Geo) %>%
  arrange(desc(Renew))
write.csv(renew_twh, "renew_twh_by_source_2021.csv", row.names=F)

# renewable (pct)
renew_pct = world_pct %>%
  mutate(Renew = renew_twh$Renew / world_twh$Total) %>%
  select(Location,Renew,Hydro,Wind,Solar,Bio,Geo) %>%
  arrange(desc(Renew))
write.csv(renew_pct, "renew_pct_by_source_2021.csv", row.names=F)

# rm(ember_world,world_overall,world_twh,world_pct,renew_twh,renew_pct)

#######################
#### source tables ####
#######################

get_source_table = function(source,source_name,link_name) {
  if(missing(link_name)) { link_name = "missing" }
  
  source_table = ember_world_trim %>%
    filter(Variable == source, Unit != "mtCO2") %>%
    select(Area, Year, Unit, Value, YoY...change) %>%
    tidyr::pivot_wider(names_from = Unit, values_from = c(Value, YoY...change)) %>%
    select(c(1:6)) %>%
    mutate(across(where(is.numeric), ~replace(., is.na(.), 0))) %>%
    setNames(nm = c("Area","Year","Cap_GW","Pct_gen","Gen_TWh","Pct_cap_growth")) %>%
    select(Area,Year,Gen_TWh,Pct_gen,Cap_GW,Pct_cap_growth) %>%
    mutate(Cap_fac = Gen_TWh/Cap_GW*1000/365/24) %>%
    group_by(Area) %>% slice_max(order_by = Year) %>% ungroup() %>%
    filter(Gen_TWh > 0.1) %>%
    arrange(desc(Gen_TWh)) %>%
    mutate(Location = link_name) %>%
    mutate(Location = ifelse(Location == "missing",
                         paste0("{{flag|",Area,"}}"),
                         paste0("{{flagg|uspef|pref=",link_name,"|",Area,"}}")
                         )) %>%
    mutate(Location = ifelse(stringr::str_detect(Location,"World"), "{{noflag|'''World'''}}", Location)) %>%
    mutate(Location = ifelse(Year < max(Year), paste0(Location," (",Year,")"), Location)) %>%
    select(Location,Gen_TWh,Pct_gen,Cap_GW,Pct_cap_growth)

  write.csv(source_table, paste0("source_",source_name,"_2021.csv"), row.names=F)
  return (source_table)
}

ember_bio = get_source_table("Bioenergy","biomass")
ember_coal = get_source_table("Coal","coal")
ember_gas = get_source_table("Gas","gas")
ember_hydro = get_source_table("Hydro","hydro","Hydroelectricity in")
ember_nuclear = get_source_table("Nuclear","nuclear","Nuclear power in")
ember_oil = get_source_table("Other Fossil","oil") # mostly oil
ember_geo = get_source_table("Other Renewables","geo") # mostly geo
ember_solar = get_source_table("Solar","solar","Solar power in")
ember_wind = get_source_table("Wind","wind","Wind power in")

# rm(get_source_table_ember_bio,ember_coal,ember_gas,ember_hydro,
#    ember_nuclear,ember_oil,ember_geo,ember_solar,ember_wind)

Country map code

[edit]

The following code creates some treemaps and maps of the US that illustrate the data in the tables created above.

R code for World treemaps and maps
##################
#### treemaps ####
##################

library(ggplot2)
library(treemapify)

type.palette = c("#804000","#303030","#0fafff",
                 "#a82000","#009080","#d0b090",
                 "#403020","#d8d800","#d0d0d0")
names(type.palette) = c("Biomass","Coal","Nuclear",
                        "Geothermal","Hydro","Gas",
                        "Oil","Solar","Wind")

treemap.theme = theme(legend.position="none",
        panel.border = element_rect(color="#404040", fill=NA, linewidth=1),
        plot.title = element_text(size=40, family="serif",color='#e6e6e6',hjust=.5),
        plot.background = element_rect(fill = '#181818', color = '#404040'),
        plot.margin = unit(c(0,0,0,0), "cm"))

targets = c("Bioenergy","Coal","Nuclear",
            "Other Renewables","Hydro","Gas",
            "Other Fossil","Solar","Wind")
replacements = c("Biomass","Coal","Nuclear",
                 "Geothermal","Hydro","Gas",
                 "Oil","Solar","Wind")

treemap_elec = ember_world_trim %>%
  filter(Area.type == "Country", Year == 2021) %>%
  filter(Subcategory == "Fuel", Unit == "TWh") %>%
  select(Area, Variable, Value) %>%
  mutate(Variable = stringi::stri_replace_all_fixed(Variable, targets, replacements, vectorize_all = F)) %>%
  group_by(Area) %>%
  mutate(Area.total = sum(Value)) %>%
  group_by(Variable) %>%
  # mutate(Label = if_else(Value == max(Value), Variable, "")) %>%
  mutate(Label = if_else(Area == "China", Variable, "")) %>%
  ungroup()

# by country and source
ggplot(treemap_elec, aes(area = Value, fill = Variable, subgroup = Area)) +
  scale_fill_manual("Energy source",values = type.palette) +
  
  geom_treemap(start="topleft") +
  geom_treemap_subgroup_border(start="topleft", color="#404040", size=2) +
  
  ggfx::with_outer_glow(geom_treemap_text(start="topleft", place="bottom", 
                                          reflow=T, color="#c49000", 
                                          label=treemap_elec$Label, 
                                          size=1*treemap_elec$Area.total^.3, min.size = 1),
                        sigma=1, expand=1) +
  ggfx::with_outer_glow(geom_treemap_subgroup_text(start="topleft", place="topleft",
                                                   reflow=F, color="#d0d0d0",
                                                   size = 5*treemap_elec$Area.total^.25),
                        sigma=1, expand=2) +
  treemap.theme
ggsave("World_generation_treemap_2021_country_source.png",width=8,height=5)

# by source and country
ggplot(treemap_elec, aes(area = Value, fill = Variable, subgroup = Variable, subgroup2 = Area)) +
  scale_fill_manual("Energy source",values = type.palette) +
  
  geom_treemap(start="topleft") +
  geom_treemap_subgroup_border(start="topleft", color="#404040", size=2) +
  
  ggfx::with_outer_glow(geom_treemap_text(start="topleft", place="bottom", 
                                          reflow=T, color="#c49000", 
                                          label=treemap_elec$Area, 
                                          size=1*treemap_elec$Area.total^.3, min.size = 1),
                        sigma=1, expand=1) +
  ggfx::with_outer_glow(geom_treemap_subgroup_text(start="topleft", place="topleft",
                                                   reflow=F, color="#d0d0d0",
                                                   size = 32, min.size = 1),
                        sigma=1, expand=2) +
  treemap.theme
ggsave("World_generation_treemap_2021_source_country.png",width=8,height=5)


##############
#### maps ####
##############

# library(maps)
library(sf)
library(rnaturalearth)
library(rnaturalearthdata)

map.theme = theme_void() +
            theme(plot.background = element_rect(fill = '#202020', color = '#404040'),
                  legend.position = c(.8,.04), legend.direction = "horizontal", legend.justification = c(1,0),
                  legend.key.size = unit(1, "cm"), legend.key.height = unit(.7, "cm"),
                  text = element_text(color="#e6e6e6", face="bold", size = 15))
# map.guide = guides(fill = guide_colorbar(title.hjust = .5, title.position = "top"))
# map.colors = c("#c0f0f0","#004800","#c0c030","#801010","#100010")


# remake the TWh table
country_twh = ember_world_full %>%
  filter(Area.type == "Country", Unit == "TWh", Subcategory %in% c("Fuel","Total")) %>%
  filter(Year == max(Year)-1) %>%
  select(Country.code, Variable, Value) %>%
  tidyr::pivot_wider(names_from = Variable, values_from = Value) %>%
  mutate(across(where(is.numeric), ~replace(., is.na(.), 0))) %>%
  setNames(nm = c("ISO","Bio","Coal","Gas","Hydro","Nuclear","Oil","Geo","Solar","Wind","Total")) %>%
  select(ISO,Total,Coal,Gas,Hydro,Nuclear,Wind,Solar,Oil,Bio,Geo) %>%
  filter(Total > 0) %>%
  arrange(desc(Total))

# find the largest source in each state
country_majors = country_twh %>%
  mutate(Major = colnames(.)[-c(1,2)][max.col(.[,-c(1,2)], ties.method = "first")]) %>%
  mutate(Renew = colnames(.)[c(5,7,8,10,11)][max.col(.[,c(5,7,8,10,11)], ties.method = "first")]) %>%
  select(ISO,Major,Renew)

country_majors %>% count(Major)
country_majors %>% count(Renew)

# import world map
sf_use_s2(FALSE) # let st_area work
world_map = rnaturalearth::ne_countries(scale = "medium", returnclass = "sf") %>%
  mutate(Area.km2 = units::set_units(st_area(geometry), NULL)/1000^2) %>%
  filter(Area.km2 > 5000)
# fix somalia
somalia_combined = world_map[world_map$sov_a3 %in% c("SOM","SOL"),]
world_map[world_map$sov_a3 == "SOM",]$geometry = sf::st_union(somalia_combined)
# merge map and table
country_majors_map = merge(world_map, country_majors, by.x="iso_a3", by.y="ISO")

# map of major overall source
ggplot() +
  geom_sf(data = country_majors_map, aes(fill = Major), color = "white") +
  coord_sf(crs = "+proj=robin") +
  scale_fill_manual(values = type.palette) +
  guides(fill="none") + map.theme
ggsave("World_electric_major_overall_2021.png",width=8,height=3.4)

# map of major renewable source
ggplot() +
  geom_sf(data = country_majors_map, aes(fill = Renew), color = "white") +
  coord_sf(crs = "+proj=robin") +
  scale_fill_manual(values = type.palette) +
  guides(fill="none") + map.theme
ggsave("World_electric_major_renew_2021.png",width=8,height=3.4)

# map of most salient source
country_salient = country_twh %>%
  mutate(across(3:11, ~./Total * 100, .names = "{.col}")) %>%
  select(-Total) %>%
  mutate_if(is.numeric, funs(c(first(.), (. - first(.))[-1])) ) %>%
  mutate(Salient = colnames(.)[-c(1)][max.col(.[,-c(1)], ties.method = "first")]) %>%
  inner_join(world_map,c("ISO"="iso_a3")) %>% st_as_sf

ggplot() +
  geom_sf(data = country_salient, aes(fill = Salient), color = "white") +
  coord_sf(crs = "+proj=robin") +
  scale_fill_manual(values = type.palette) +
  guides(fill="none") + map.theme
ggsave("World_electric_salient_2021.png",width=8,height=3.4)

Crime and mortality

[edit]

US state crime

[edit]

This code uses data downloaded from the Federal Bureau of Investigation.

Used in these articles:

R code for US state crime data
# Attempt to set working directory
# setwd(getSrcDirectory()[1]) # if running entire file
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # if running section
options(scipen=999) # don't use scientific notation
library(dplyr)

# function to set flag links
fix_links = function(df) {
  df = df %>%
  mutate(state_name = paste0("{{flagg|uspeft|pref=Crime in|",state_name,"}}")) %>%
  mutate(state_name = ifelse(stringr::str_detect(state_name,"United States"),
                        "{{noflag|'''United States'''}}", state_name)) %>%
  mutate(state_name = ifelse(stringr::str_detect(state_name,"Georgia"),
                        "{{flagg|uspeft|pref=Crime in|Georgia (U.S. state)|name=Georgia}}", state_name)) %>%
  mutate(state_name = ifelse(stringr::str_detect(state_name,"District"),
                        "{{flagg|uspeft|pref=Crime in|District of Columbia|name=District of Columbia}}", state_name))
  return(df)
}

#####################
#### import data ####
#####################

# FBI data
# https://cde.ucr.cjis.gov/LATEST/webapp/#/pages/downloads
# Additional Datasets > Summary Reporting System (SRS) > Download

crime = read.csv("estimated_crimes_1979_2022.csv") %>%
  filter(!state_name %in% c("","United States Total")) %>%
  select(-c(state_abbr,rape_legacy,property_crime:caveats))

# find totals for the whole US
us = crime %>%
  group_by(year) %>%
  summarise(across(where(is.numeric), sum, na.rm=T)) %>%
  ungroup() %>%
  mutate(state_name = "United States")

crime = crime %>%
  bind_rows(us)

######################
#### crime totals ####
######################

totals = crime %>%
  filter(year %in% 2018:2022) %>%
  fix_links()

violent_total_yearly = totals %>%
  select(year,state_name,violent_crime) %>%
  tidyr::pivot_wider(names_from = year, values_from = violent_crime) %>%
  arrange(desc(`2022`)) %>%
  arrange(-stringr::str_detect(state_name, "United States"))
write.csv(violent_total_yearly, "violent_total_yearly.csv", row.names=F)

homicide_total_yearly = totals %>%
  select(year,state_name,homicide) %>%
  tidyr::pivot_wider(names_from = year, values_from = homicide) %>%
  arrange(desc(`2022`)) %>%
  arrange(-stringr::str_detect(state_name, "United States"))
write.csv(homicide_total_yearly, "homicide_total_yearly.csv", row.names=F)

#####################
#### crime rates ####
#####################

rate = crime %>%
  mutate(across(!year & where(is.numeric), ~./population*10^5)) %>%
  select(-population)

rate_five = rate %>%
  filter(year %in% 2018:2022) %>%
  fix_links()

violent_rates_type = rate_five %>%
  group_by(state_name) %>%
  summarise(across(where(is.numeric), mean, na.rm=T)) %>%
  ungroup() %>%
  
  arrange(desc(violent_crime)) %>%
  arrange(-stringr::str_detect(state_name, "United States"))
write.csv(violent_rates_type, "violent_rates_type.csv", row.names=F)

violent_rates_yearly = rate_five %>%
  select(year,state_name,violent_crime) %>%
  tidyr::pivot_wider(names_from = year, values_from = violent_crime) %>%
  arrange(desc(`2022`)) %>%
  arrange(-stringr::str_detect(state_name, "United States"))
write.csv(violent_rates_yearly, "violent_rates_yearly.csv", row.names=F)

homicide_rates_yearly = rate_five %>%
  select(year,state_name,homicide) %>%
  tidyr::pivot_wider(names_from = year, values_from = homicide) %>%
  arrange(desc(`2022`)) %>%
  arrange(-stringr::str_detect(state_name, "United States"))
write.csv(homicide_rates_yearly, "homicide_rates_yearly.csv", row.names=F)

homicide_rates_decade = rate %>%
  filter(year > 1979) %>%
  mutate(decade = paste0(floor(year/10)*10,"s")) %>%
  select(decade,state_name,homicide) %>%
  
  group_by(decade,state_name) %>%
  summarise(across(where(is.numeric),mean)) %>%
  ungroup %>%
  
  tidyr::pivot_wider(names_from = decade, values_from = homicide) %>%
  fix_links()
write.csv(homicide_rates_decade, "homicide_rates_decade.csv", row.names=F)

##############
#### maps ####
##############

library(ggplot2)

map.theme = theme_void() +
  theme(plot.background = element_rect(fill = '#202020', color = '#404040'),
        legend.position = c(.78,.04), legend.direction = "horizontal", legend.justification = c(1,0),
        legend.key.size = unit(1, "cm"), legend.key.height = unit(.7, "cm"),
        text = element_text(color="#e6e6e6", face="bold", size = 15))
map.guide = guides(fill = guide_colorbar(title.hjust = .5, title.position = "top"))

rate_map = rate %>%
  filter(year == max(year)) %>%
  inner_join(usmap::us_map(), c("state_name"="full"), multiple="all")

# violent crime
ggplot(rate_map, aes(state_name, fill=violent_crime)) +
  geom_polygon(aes(x,y, group=group), color='white') + 
  map.theme + map.guide +
  scale_fill_gradientn(name = "Violent crimes per\n100k people",
                       limits = c(0,813), breaks = seq(0,800,200), labels = seq(0,800,200),
                       colors = c("#ffffff","#ff8080","#800000","#000000"))
ggsave("state_violent_crime_rate_2022.png",width=8.5,height=6)

# homicide
ggplot(rate_map, aes(state_name, fill=homicide)) +
  geom_polygon(aes(x,y, group=group), color='white') + 
  map.theme + map.guide +
  scale_fill_gradientn(name = "Homicides per\n100k people",
                       limits = c(0,16.2), breaks = seq(0,16,4), labels = seq(0,16,4),
                       colors = c("#ffffff","#ff8080","#800000","#000000"))
ggsave("state_homicide_rate_2022.png",width=8.5,height=6)

US state death

[edit]

This code uses data downloaded from the Centers for Disease Control.

Used in these articles:

R code for US state death data
# Attempt to set working directory
# setwd(getSrcDirectory()[1]) # if running entire file
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # if running section
options(scipen=999) # don't use scientific notation
library(dplyr)

# https://wonder.cdc.gov/ucd-icd10-expanded.html
# Section 1 will vary depending on the data needed. Leave sections 2, 3, 4, 5 as default. In section 6, above the large box, see the five dots and select "Injury Intent and Mechanism"; select either "All Causes" or "Firearm". In section 7 check all boxes except "Show totals".

CDC_import = function(file_name) {
  df = read.table(file_name, header=T, sep="\t", fill=T) %>%
    filter(Notes == "") %>% select(-Notes,-Crude.Rate) %>%
    mutate(Deaths = as.numeric(ifelse(Deaths == "Suppressed", NA, Deaths))) %>%
    mutate(Rate = Deaths/Population*10^5)
  return(df)
}

# set flag links
fix_links = function(df) {
  df = df %>%
  mutate(State = paste0("{{flagg|uspeft|pref=Crime in|",State,"}}")) %>%
  mutate(State = ifelse(stringr::str_detect(State,"United States"),
                        "{{noflag|'''United States'''}}", State)) %>%
  mutate(State = ifelse(stringr::str_detect(State,"Georgia"),
                        "{{flagg|uspeft|pref=Crime in|Georgia (U.S. state)|name=Georgia}}", State)) %>%
  mutate(State = ifelse(stringr::str_detect(State,"District"),
                        "{{flagg|uspeft|pref=Crime in|District of Columbia|name=District of Columbia}}", State))
  return(df)
}

###################
#### gun rates ####
###################

# homicide/suicide totals
state.cide = CDC_import("intentional/CDC - State Intent External.txt") %>%
  select(State,Injury.Intent,Deaths,Population) %>%
  filter(Injury.Intent %in% c("Homicide","Suicide")) %>%
  tidyr::pivot_wider(names_from = Injury.Intent, values_from = Deaths, values_fill=0)

# gun totals
state.gun = CDC_import("intentional/CDC - State Gun.txt") %>%
  select(State,Deaths) %>%
  rename(Gun.Total = Deaths)

# gun homicide/suicide
state.intent.gun = CDC_import("intentional/CDC - State Intent Gun.txt") %>%
  select(State,Injury.Intent,Deaths) %>%
  filter(Injury.Intent %in% c("Homicide","Suicide")) %>%
  rename(Intent = Injury.Intent) %>%
  tidyr::pivot_wider(names_from = Intent, values_from = Deaths, values_fill=0, names_prefix = "Gun.")

# merge gun and general stats
state.all = CDC_import("CDC - Region Division State.txt") %>%
  inner_join(state.gun) %>%
  inner_join(state.cide) %>%
  inner_join(state.intent.gun) %>%
  bind_rows(summarise(., across(where(is.numeric), sum), State = "United States")) %>%
  mutate(across(where(is.numeric), ~./Population*10^5)) %>%
  select(State,Gun.Total,Gun.Suicide,Suicide,Gun.Homicide,Homicide) %>%
  left_join(read.csv("RAND/RAND_gun_ownership_2016.csv"))

# gun death rates
state.gun.rates = state.all %>%
  arrange(desc(Gun.Total)) %>%
  arrange(-stringr::str_detect(State, "United States")) %>%
  fix_links()
write.csv(state.gun.rates,"intentional/state_gun_rates.csv",row.names=F)

####################
#### gun totals ####
####################

# year 2021

# gun totals
state.gun.2021 = CDC_import("intentional/CDC - State Gun 2021.txt") %>%
  bind_rows(CDC_import("intentional/CDC - Gun 2021.txt")) %>%
  mutate(State = ifelse(is.na(State), "United States", State)) %>%
  select(State,Deaths) %>%
  rename(Gun.Total = Deaths)

# gun death by intent
state.intent.gun.2021.alt = CDC_import("intentional/CDC - State Intent Gun 2021.txt") %>%
  bind_rows(CDC_import("intentional/CDC - Intent Gun 2021.txt")) %>%
  mutate(State = ifelse(is.na(State), "United States", State)) %>%
  select(State,Injury.Intent,Deaths) %>%
  tidyr::pivot_wider(names_from = Injury.Intent, values_from = Deaths) %>%
  setNames(c("State","Accident","Suicide","Homicide","Unknown","Law","Non-injury")) %>%
  inner_join(state.gun.2021) %>%
  select(State,Gun.Total,Suicide,Homicide,Accident,Law) %>%
  arrange(desc(Gun.Total)) %>%
  arrange(-stringr::str_detect(State, "United States")) %>%
  mutate_all(~stringr::str_replace_na(., "")) %>%
  fix_links()
write.csv(state.intent.gun.2021.alt,"intentional/state_gun_intentional_2021.csv",row.names=F)

##################
#### homicide ####
##################

# state totals
state.homicide = state.cide %>%
  select(State,Population,Homicide) %>%
  bind_rows(setNames(data.frame("United States", sum(.$Population), sum(.$Homicide)), names(.))) %>%
  mutate(Total = Homicide/Population*10^5)

# mechanism, all US
homicide.mechanism = CDC_import("intentional/CDC - Homicide Mechanism.txt") %>%
  setNames(nm = c("Mechanism","Code","Deaths","Population","Rate")) %>%
  select(Mechanism,Rate) %>%
  filter(Rate > 0) %>%
  mutate(Mechanism = c("Stab","Drown.Other","Fall.Other","Fire","Hot.Other","Gun",
                       "Transport.Other","Poison","Struck","Choke")) %>%
  filter(!stringr::str_detect(Mechanism, "Other"))

# mechanism by state
state.homicide.mechanism = CDC_import("intentional/CDC - State Homicide Mechanism.txt") %>%
  select(State,3,Rate) %>%
  tidyr::pivot_wider(names_from = 2, values_from = Rate, values_fill=0) %>%
  select(State,2,5,7,16,17,18) %>%
  setNames(nm = c("State","Stab","Fire","Gun","Poison","Struck","Choke")) %>%
  
  bind_rows(setNames(data.frame("United States", t(homicide.mechanism$Rate)), names(.))) %>%
  inner_join(state.homicide) %>%
  select(State,Total,Gun,Stab,Choke,Struck,Poison,Fire) %>%
  arrange(desc(Total)) %>%
  arrange(-stringr::str_detect(State, "United States")) %>%
  mutate(across(where(is.numeric), round, 1)) %>%
  fix_links()
write.csv(state.homicide.mechanism,"intentional/state_homicide_mechanism.csv",row.names=F)

##############
#### maps ####
##############

library(ggplot2)

map.theme = theme_void() +
  theme(plot.background = element_rect(fill = '#202020', color = '#404040'),
        legend.position = c(.78,.04), legend.direction = "horizontal", legend.justification = c(1,0),
        legend.key.size = unit(1, "cm"), legend.key.height = unit(.7, "cm"),
        text = element_text(color="#e6e6e6", face="bold", size = 15))
map.guide = guides(fill = guide_colorbar(title.hjust = .5, title.position = "top"))

state.map = state.all %>%
  inner_join(usmap::us_map(), c("State"="full"), multiple="all") %>%
  filter(!stringr::str_detect(State, "District")) #keep DC off the scale

show_map = function(df, rate.var, word, name) {
  ggplot(state.map, aes(State, fill=.data[[rate.var]])) +
    geom_polygon(aes(x,y, group=group), color='white') + 
    map.theme + map.guide +
    scale_fill_gradientn(name = paste(word,"per\n100k people"),
                         colors = c("#ffffff","#ff8080","#800000","#000000"))
  ggsave(paste0("intentional/state_",name,"_rate.png"),width=8.5,height=6)
}

show_map(state.map, "Homicide", "Homicides", "homicide_rate")
show_map(state.map, "Suicide", "Suicides", "suicide_rate")
show_map(state.map, "Gun.Homicide", "Gun homicides", "gun_homicide_rate")
show_map(state.map, "Gun.Suicide", "Gun suicides", "gun_suicide_rate")

US county death

[edit]

This code uses data downloaded from County Health Rankings.[2]

Used in these articles:

R code for US county death data
# Attempt to set working directory
# setwd(getSrcDirectory()[1]) # if running entire file
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # if running section
options(scipen=999) # don't use scientific notation
library(dplyr)

######################
#### import data #####
######################

# CHR institute data
# https://www.countyhealthrankings.org/sites/default/files/media/document/analytic_data2023_0.csv
county_raw = read.csv("analytic_data2023_0.csv")

county_death = county_raw %>%
  slice(-1) %>%
  mutate(FIPS = paste0(State.FIPS.Code, County.FIPS.Code)) %>%
  select(FIPS,State.Abbreviation, Name,
         Homicides.raw.value,Suicides.raw.value,Firearm.Fatalities.raw.value,
         Motor.Vehicle.Crash.Deaths.raw.value) %>%
  setNames(nm = c("FIPS","ST","Location","Homicide","Suicide","Gun.death","Road.death")) %>%
  mutate(across(all_of(c("Homicide","Suicide","Gun.death","Road.death")), as.numeric)) %>%
  mutate(across(where(is.numeric), ~ifelse(is.na(.), 0, .))) %>%
  filter(FIPS != "000")

##############
#### maps ####
##############
library(ggplot2)

map.theme = theme_void() +
  theme(plot.background = element_rect(fill = '#202020', color = '#404040'),
        legend.position = c(.78,.04), legend.direction = "horizontal", legend.justification = c(1,0),
        legend.key.size = unit(1, "cm"), legend.key.height = unit(.7, "cm"),
        text = element_text(color="#e6e6e6", face="bold", size = 15))
map.guide = guides(fill = guide_colorbar(title.hjust = .5, title.position = "top"))

death_map = county_death %>%
  right_join(usmap::us_map(regions="counties"), c("FIPS"="fips"), multiple="all")

# homicide
ggplot(death_map, aes(State, fill=Homicide)) +
  geom_polygon(aes(x,y, group=group), color='grey', linewidth=.1) + 
  map.theme + map.guide +
  scale_fill_gradientn(name = "Homicides per\n100k people",
                       limits = c(0,50), breaks = seq(0,50,10), labels = seq(0,50,10),
                       colors = c("#ffffff","#ff8080","#800000","#000000"))
ggsave("county_homicide_rate_2023.png",width=8.5,height=6)

# suicide
ggplot(death_map, aes(State, fill=Suicide)) +
  geom_polygon(aes(x,y, group=group), color='white', linewidth=0) + 
  map.theme + map.guide +
  scale_fill_gradientn(name = "Suicides per\n100k people",
                       limits = c(0,110), breaks = seq(0,100,25), labels = seq(0,100,25),
                       colors = c("#ffffff","#ff8080","#800000","#000000"))
ggsave("county_suicide_rate_2023.png",width=8.5,height=6)

# gun death
ggplot(death_map, aes(State, fill=Gun.death)) +
  geom_polygon(aes(x,y, group=group), color='white', linewidth=0) + 
  map.theme + map.guide +
  scale_fill_gradientn(name = "Gun deaths per\n100k people",
                       limits = c(0,66), breaks = seq(0,60,20), labels = seq(0,60,20),
                       colors = c("#ffffff","#ff8080","#800000","#000000"))
ggsave("county_gun_death_rate_2023.png",width=8.5,height=6)
  
# road death
ggplot(death_map, aes(State, fill=Road.death)) +
  geom_polygon(aes(x,y, group=group), color='white', linewidth=0) + 
  map.theme + map.guide +
  scale_fill_gradientn(name = "Road deaths per\n100k people",
                       limits = c(0,100), breaks = seq(0,100,25), labels = seq(0,100,25),
                       colors = c("#ffffff","#ff8080","#800000","#000000"))
ggsave("county_road_death_rate_2023.png",width=8.5,height=6)