library(readr)

# URL of the CSV file on GitHub
url <- "https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-06-06/owid-energy.csv"

# Reading the CSV file into a dataframe
energy_data <- read_csv(url)

Question: How does recent economic development impact a region’s energy mix (renewables vs. non-renewables)?


Introduction:

The dataset from Our World in Data’s Energy Data Explorer provides an extensive overview of global energy consumption, delineating the mix between renewable and non-renewable energy sources. In exploring the interplay between economic growth and energy mix over the last two decades, we will examine key columns such as country and year for geographical and temporal context, population and gdp for economic scaling, and various forms of energy production:

For this analysis, the focus is on unpacking the influence of economic growth on the composition of regional energy mixes over the past two decades. This analysis will entail comparing renewable and non-renewable energy sources, examining trends, and identifying shifts in this period.


Approach:

This study analyzes how economic development affects regional energy mixes. We compare energy consumption (renewable and non-renewable) across regions and correlate it with economic indicators (GDP, population). Grouped cluster plots will visually represent changes in consumption types over time (2001, 2009, 2017). Additionally, scatter plots will explore the link between GDP and renewable energy use, with data points sized by population for insights into energy density. These visualizations aim to reveal regional trends and potential outliers for further investigation.


Analysis:

#head function, just to peep
head(energy_data)
## # A tibble: 6 × 129
##   country      year iso_code population   gdp biofuel_cons_change_pct
##   <chr>       <dbl> <chr>         <dbl> <dbl>                   <dbl>
## 1 Afghanistan  1900 AFG         4832414    NA                      NA
## 2 Afghanistan  1901 AFG         4879685    NA                      NA
## 3 Afghanistan  1902 AFG         4935122    NA                      NA
## 4 Afghanistan  1903 AFG         4998861    NA                      NA
## 5 Afghanistan  1904 AFG         5063419    NA                      NA
## 6 Afghanistan  1905 AFG         5128808    NA                      NA
## # ℹ 123 more variables: biofuel_cons_change_twh <dbl>,
## #   biofuel_cons_per_capita <dbl>, biofuel_consumption <dbl>,
## #   biofuel_elec_per_capita <dbl>, biofuel_electricity <dbl>,
## #   biofuel_share_elec <dbl>, biofuel_share_energy <dbl>,
## #   carbon_intensity_elec <dbl>, coal_cons_change_pct <dbl>,
## #   coal_cons_change_twh <dbl>, coal_cons_per_capita <dbl>,
## #   coal_consumption <dbl>, coal_elec_per_capita <dbl>, …
library(ggplot2)
library(dplyr)

# Calculate total renewable and non-renewable electricity
energy_data <- energy_data %>%
  mutate(total_renewable = solar_electricity + wind_electricity + hydro_electricity + other_renewable_electricity,
         total_non_renewable = coal_electricity + oil_electricity + gas_electricity + nuclear_electricity,
         total_electricity = total_renewable + total_non_renewable) %>%
  na.omit() 

# Normalize data
scaled_data <- scale(energy_data[, c("total_renewable", "total_non_renewable")])

# K-means Clustering
set.seed(123)  # For reproducibility
clusters <- kmeans(scaled_data, centers = 3)
energy_data$cluster <- as.factor(clusters$cluster)

# Scatter plot for clustering
ggplot(energy_data, aes(x = total_non_renewable, y = total_renewable, color = cluster)) +
  geom_point(alpha = 0.6) +
  labs(x = "Total Non-Renewable Electricity", y = "Total Renewable Electricity", color = "Cluster", 
       title = "Clustering of Countries by Renewable vs. Non-Renewable Energy Profiles (Most Recent Year)")

energy_data <- energy_data %>%
  filter(!is.na(gdp), !is.na(total_renewable)) %>%
  mutate(cluster = as.factor(cluster))

# Scatter plot of GDP vs. Total Renewable Electricity 
ggplot(energy_data, aes(x = gdp, y = total_renewable, color = cluster)) +
  geom_point(alpha = 0.6) +
  scale_color_brewer(palette = "Set1") +
  labs(x = "GDP (constant 2010 USD)", y = "Total Renewable Electricity (TWh)", color = "Cluster", 
       title = "GDP vs. Renewable Energy Consumption by Cluster")

# Scatter plot of GDP vs. Total Renewable Electricity but accounting for population size
ggplot(energy_data, aes(x = gdp, y = total_renewable, color = cluster, size = population)) +
  geom_point(alpha = 0.6) +
  scale_size(range = c(1, 10)) +
  scale_color_brewer(palette = "Set1") +
  labs(x = "GDP (constant 2010 USD)", 
       y = "Total Renewable Electricity (TWh)", 
       color = "Cluster",
       size = "Population",
       title = "GDP vs. Renewable Energy Consumption by Cluster with Population Sizes")

time_series_data <- energy_data %>%
  filter(year %in% c(2001, 2009, 2017)) %>% 
  mutate(year = as.factor(year)) # Convert year to a factor for faceting

# Faceted time series plot
time_series_plot <- ggplot(time_series_data, aes(x = gdp, y = total_renewable, 
                                                 color = cluster, size = population)) +
  geom_point(alpha = 0.7) +
  facet_wrap(~ year) + 
  scale_color_brewer(palette = "Set1") +
  labs(x = "GDP", 
       y = "Total Renewable Electricity (TWh)", 
       color = "Cluster",
       size = "Population",
       title = "GDP vs. Renewable Energy Consumption by Cluster over Time")

print(time_series_plot)


Discussion: