From 740ffea1399899aebe76e240d21ba1a16dc8d9f0 Mon Sep 17 00:00:00 2001
From: Geo-99 <georgstarz@web.de>
Date: Mon, 27 Nov 2023 20:39:38 +0100
Subject: [PATCH 1/3] ENTSO day2 data prepreparation suggestion

---
 data/day2_prepreparation/prepreparation.R | 99 +++++++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100644 data/day2_prepreparation/prepreparation.R

diff --git a/data/day2_prepreparation/prepreparation.R b/data/day2_prepreparation/prepreparation.R
new file mode 100644
index 0000000..8252d11
--- /dev/null
+++ b/data/day2_prepreparation/prepreparation.R
@@ -0,0 +1,99 @@
+library(dplyr)
+library(tidyr)
+library(RCurl)
+
+# Initial df
+df <- read.csv("day2_data_energy_prod_EU_2020-08-03_2020-08-09.csv")
+old_count <- nrow(df)
+
+# remove unrealistic outliers
+df <- df[!df$ActualGenerationOutput > df$InstalledGenCapacity*4,]
+new_count <- nrow(df)
+difference <- old_count - new_count
+
+# remove almost empty rows
+df <- df[rowSums(is.na(df))<10, ]
+new_count_1 <- nrow(df)
+difference_1 <- new_count - new_count_1
+
+# date col
+df$DATE <- substr(df$DateTime,1,10)
+df$DATE <- as.POSIXct(df$DATE, format = "%Y-%m-%d")
+
+# DE & Northern Ireland adjustments
+df$COUNTRY <- df$MapCode
+df$COUNTRY[grep("DE_", df$MapCode)] <- "DE" # uniform DE
+df$COUNTRY <- gsub("NIE", "GB", df$COUNTRY) # Northern Ireland to GB
+
+# Add column that adds up daily entries per plant
+df <- df %>%
+  group_by(PowerSystemResourceName, DATE) %>%
+  mutate(entries_this_day = n())
+df <- ungroup(df)
+View(df) # Entries per day different! Hungary: 96, GB: 48, Spain: 24
+
+# Delete 2020-08-09 lines because this day is not fully recorded as there is always only one entry (00:00)
+df <- df %>% 
+  filter(DATE != "2020-08-09")
+
+
+
+
+# Sum up daily Energy generation
+df_grouped <- df %>% 
+  group_by(PowerSystemResourceName, DATE, COUNTRY, ProductionTypeName, entries_this_day) %>% 
+  summarise(sum_generation = sum(ActualGenerationOutput)) %>% 
+  spread(DATE, sum_generation, fill = 0)
+View(df_grouped)
+
+# Divide sum through daily entry count (W = J/s)
+df_grouped_1 <- df_grouped %>%
+  mutate(across(starts_with("2020"), ~./entries_this_day, .names = "{.col}"))
+View(df_grouped_1)
+
+# Aggregate per country & sort
+df_aggr <- df_grouped_1[,!colnames(df_grouped_1) %in% "ProductionTypeName"] %>% 
+  group_by(COUNTRY) %>% 
+  summarise(across(`2020-08-03`:`2020-08-08`, sum, .names = "{.col}"))
+df_aggr <- df_aggr %>% arrange(desc(`2020-08-03`))
+df_aggr <- ungroup(df_aggr)
+View(df_aggr) # We could use this df as a basis for our targeted spatial country plot (day 5, slide 7)
+
+# Aggregate per production type per country
+df_type <- df_grouped_1 %>% 
+  group_by(COUNTRY, ProductionTypeName) %>% 
+  summarise(across(`2020-08-03`:`2020-08-08`, sum, .names = "{.col}"))
+df_type <- df_type %>% arrange(desc(COUNTRY))
+df_type <- ungroup(df_type)
+View(df_type)
+
+
+
+
+# Sum up daily Installed Capacity
+df_grouped_IC <- df %>% 
+  group_by(PowerSystemResourceName, DATE, COUNTRY, ProductionTypeName, entries_this_day) %>% 
+  summarise(sum_generation = sum(InstalledGenCapacity)) %>% 
+  spread(DATE, sum_generation, fill = 0)
+View(df_grouped_IC)
+
+# Divide sum through daily entry count (W = J/s)
+df_grouped_1_IC <- df_grouped_IC %>%
+  mutate(across(starts_with("2020"), ~./entries_this_day, .names = "{.col}"))
+View(df_grouped_1_IC) # Installed Capacity for single plants fits, e.g. Isar 2 1410 MW, see https://de.wikipedia.org/wiki/Kernkraftwerk_Isar
+
+# Aggregate per country & sort
+df_aggr_IC <- df_grouped_1_IC[,!colnames(df_grouped_1_IC) %in% "ProductionTypeName"] %>% 
+  group_by(COUNTRY) %>% 
+  summarise(across(`2020-08-03`:`2020-08-08`, sum, .names = "{.col}"))
+df_aggr_IC <- df_aggr_IC %>% arrange(desc(`2020-08-03`))
+df_aggr_IC <- ungroup(df_aggr_IC)
+View(df_aggr_IC) # It seems like there is quite a lot missing, e.g. DE, https://www.smard.de/page/en/wiki-article/5884/6038 says 232,000 MW (Nov 23) vs. 73,500 MW here
+
+# Aggregate per production type per country
+df_type_IC <- df_grouped_1_IC %>% 
+  group_by(COUNTRY, ProductionTypeName) %>% 
+  summarise(across(`2020-08-03`:`2020-08-08`, sum, .names = "{.col}"))
+df_type_IC <- df_type %>% arrange(desc(COUNTRY))
+df_type_IC <- ungroup(df_type_IC)
+View(df_type_IC) # Wind, Photovoltaics, Biomass are missing completely and other numbers like Fossil Gas don´t match at all

From d33603e39462f894932e6a8c46c410b5b540853a Mon Sep 17 00:00:00 2001
From: Geo-99 <georgstarz@web.de>
Date: Mon, 27 Nov 2023 22:27:57 +0100
Subject: [PATCH 2/3] small adjustments

---
 data/day2_prepreparation/prepreparation.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/data/day2_prepreparation/prepreparation.R b/data/day2_prepreparation/prepreparation.R
index 8252d11..caf1b3d 100644
--- a/data/day2_prepreparation/prepreparation.R
+++ b/data/day2_prepreparation/prepreparation.R
@@ -57,7 +57,10 @@ df_aggr <- df_grouped_1[,!colnames(df_grouped_1) %in% "ProductionTypeName"] %>%
   summarise(across(`2020-08-03`:`2020-08-08`, sum, .names = "{.col}"))
 df_aggr <- df_aggr %>% arrange(desc(`2020-08-03`))
 df_aggr <- ungroup(df_aggr)
-View(df_aggr) # We could use this df as a basis for our targeted spatial country plot (day 5, slide 7)
+df_aggr <- df_aggr %>% 
+  mutate(AVERAGE_GENERATION = (`2020-08-03` + `2020-08-04` + `2020-08-05` + `2020-08-06` + `2020-08-07` +`2020-08-08`) / 6)
+View(df_aggr)
+# We could use this df as a basis for our targeted spatial country plot (day 5, slide 7)
 
 # Aggregate per production type per country
 df_type <- df_grouped_1 %>% 

From f2e89dff02e2fdcfa7c5d9673019c867e12584e9 Mon Sep 17 00:00:00 2001
From: Geo-99 <georgstarz@web.de>
Date: Mon, 27 Nov 2023 22:54:39 +0100
Subject: [PATCH 3/3] Prepreparation (minor adjustments)

---
 data/day2_prepreparation/prepreparation.R | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/data/day2_prepreparation/prepreparation.R b/data/day2_prepreparation/prepreparation.R
index caf1b3d..0b66c3b 100644
--- a/data/day2_prepreparation/prepreparation.R
+++ b/data/day2_prepreparation/prepreparation.R
@@ -60,7 +60,7 @@ df_aggr <- ungroup(df_aggr)
 df_aggr <- df_aggr %>% 
   mutate(AVERAGE_GENERATION = (`2020-08-03` + `2020-08-04` + `2020-08-05` + `2020-08-06` + `2020-08-07` +`2020-08-08`) / 6)
 View(df_aggr)
-# We could use this df as a basis for our targeted spatial country plot (day 5, slide 7)
+# -> We could use this df as a basis for our targeted spatial country plot (day 5, slide 7)
 
 # Aggregate per production type per country
 df_type <- df_grouped_1 %>% 
@@ -72,6 +72,7 @@ View(df_type)
 
 
 
+# EXTRA: Same for Installed Capacity
 
 # Sum up daily Installed Capacity
 df_grouped_IC <- df %>% 
@@ -91,7 +92,10 @@ df_aggr_IC <- df_grouped_1_IC[,!colnames(df_grouped_1_IC) %in% "ProductionTypeNa
   summarise(across(`2020-08-03`:`2020-08-08`, sum, .names = "{.col}"))
 df_aggr_IC <- df_aggr_IC %>% arrange(desc(`2020-08-03`))
 df_aggr_IC <- ungroup(df_aggr_IC)
-View(df_aggr_IC) # It seems like there is quite a lot missing, e.g. DE, https://www.smard.de/page/en/wiki-article/5884/6038 says 232,000 MW (Nov 23) vs. 73,500 MW here
+df_aggr_IC <- df_aggr_IC %>% 
+  mutate(AVERAGE_IC = (`2020-08-03` + `2020-08-04` + `2020-08-05` + `2020-08-06` + `2020-08-07` +`2020-08-08`) / 6)
+View(df_aggr_IC)
+# -> It seems like there is quite a lot missing, e.g. DE, https://www.smard.de/page/en/wiki-article/5884/6038 says 232,000 MW (Nov 23) vs. 73,500 MW here
 
 # Aggregate per production type per country
 df_type_IC <- df_grouped_1_IC %>%