From 740ffea1399899aebe76e240d21ba1a16dc8d9f0 Mon Sep 17 00:00:00 2001 From: Geo-99 Date: Mon, 27 Nov 2023 20:39:38 +0100 Subject: [PATCH 1/3] ENTSO day2 data prepreparation suggestion --- data/day2_prepreparation/prepreparation.R | 99 +++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 data/day2_prepreparation/prepreparation.R diff --git a/data/day2_prepreparation/prepreparation.R b/data/day2_prepreparation/prepreparation.R new file mode 100644 index 0000000..8252d11 --- /dev/null +++ b/data/day2_prepreparation/prepreparation.R @@ -0,0 +1,99 @@ +library(dplyr) +library(tidyr) +library(RCurl) + +# Initial df +df <- read.csv("day2_data_energy_prod_EU_2020-08-03_2020-08-09.csv") +old_count <- nrow(df) + +# remove unrealistic outliers +df <- df[!df$ActualGenerationOutput > df$InstalledGenCapacity*4,] +new_count <- nrow(df) +difference <- old_count - new_count + +# remove almost empty rows +df <- df[rowSums(is.na(df))<10, ] +new_count_1 <- nrow(df) +difference_1 <- new_count - new_count_1 + +# date col +df$DATE <- substr(df$DateTime,1,10) +df$DATE <- as.POSIXct(df$DATE, format = "%Y-%m-%d") + +# DE & Northern Ireland adjustments +df$COUNTRY <- df$MapCode +df$COUNTRY[grep("DE_", df$MapCode)] <- "DE" # uniform DE +df$COUNTRY <- gsub("NIE", "GB", df$COUNTRY) # Northern Ireland to GB + +# Add column that adds up daily entries per plant +df <- df %>% + group_by(PowerSystemResourceName, DATE) %>% + mutate(entries_this_day = n()) +df <- ungroup(df) +View(df) # Entries per day different! Hungary: 96, GB: 48, Spain: 24 + +# Delete 2020-08-09 lines because this day is not fully recorded as there is always only one entry (00:00) +df <- df %>% + filter(DATE != "2020-08-09") + + + + +# Sum up daily Energy generation +df_grouped <- df %>% + group_by(PowerSystemResourceName, DATE, COUNTRY, ProductionTypeName, entries_this_day) %>% + summarise(sum_generation = sum(ActualGenerationOutput)) %>% + spread(DATE, sum_generation, fill = 0) +View(df_grouped) + +# Divide sum through daily entry count (W = J/s) +df_grouped_1 <- df_grouped %>% + mutate(across(starts_with("2020"), ~./entries_this_day, .names = "{.col}")) +View(df_grouped_1) + +# Aggregate per country & sort +df_aggr <- df_grouped_1[,!colnames(df_grouped_1) %in% "ProductionTypeName"] %>% + group_by(COUNTRY) %>% + summarise(across(`2020-08-03`:`2020-08-08`, sum, .names = "{.col}")) +df_aggr <- df_aggr %>% arrange(desc(`2020-08-03`)) +df_aggr <- ungroup(df_aggr) +View(df_aggr) # We could use this df as a basis for our targeted spatial country plot (day 5, slide 7) + +# Aggregate per production type per country +df_type <- df_grouped_1 %>% + group_by(COUNTRY, ProductionTypeName) %>% + summarise(across(`2020-08-03`:`2020-08-08`, sum, .names = "{.col}")) +df_type <- df_type %>% arrange(desc(COUNTRY)) +df_type <- ungroup(df_type) +View(df_type) + + + + +# Sum up daily Installed Capacity +df_grouped_IC <- df %>% + group_by(PowerSystemResourceName, DATE, COUNTRY, ProductionTypeName, entries_this_day) %>% + summarise(sum_generation = sum(InstalledGenCapacity)) %>% + spread(DATE, sum_generation, fill = 0) +View(df_grouped_IC) + +# Divide sum through daily entry count (W = J/s) +df_grouped_1_IC <- df_grouped_IC %>% + mutate(across(starts_with("2020"), ~./entries_this_day, .names = "{.col}")) +View(df_grouped_1_IC) # Installed Capacity for single plants fits, e.g. Isar 2 1410 MW, see https://de.wikipedia.org/wiki/Kernkraftwerk_Isar + +# Aggregate per country & sort +df_aggr_IC <- df_grouped_1_IC[,!colnames(df_grouped_1_IC) %in% "ProductionTypeName"] %>% + group_by(COUNTRY) %>% + summarise(across(`2020-08-03`:`2020-08-08`, sum, .names = "{.col}")) +df_aggr_IC <- df_aggr_IC %>% arrange(desc(`2020-08-03`)) +df_aggr_IC <- ungroup(df_aggr_IC) +View(df_aggr_IC) # It seems like there is quite a lot missing, e.g. DE, https://www.smard.de/page/en/wiki-article/5884/6038 says 232,000 MW (Nov 23) vs. 73,500 MW here + +# Aggregate per production type per country +df_type_IC <- df_grouped_1_IC %>% + group_by(COUNTRY, ProductionTypeName) %>% + summarise(across(`2020-08-03`:`2020-08-08`, sum, .names = "{.col}")) +df_type_IC <- df_type %>% arrange(desc(COUNTRY)) +df_type_IC <- ungroup(df_type_IC) +View(df_type_IC) # Wind, Photovoltaics, Biomass are missing completely and other numbers like Fossil Gas don“t match at all From d33603e39462f894932e6a8c46c410b5b540853a Mon Sep 17 00:00:00 2001 From: Geo-99 Date: Mon, 27 Nov 2023 22:27:57 +0100 Subject: [PATCH 2/3] small adjustments --- data/day2_prepreparation/prepreparation.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/data/day2_prepreparation/prepreparation.R b/data/day2_prepreparation/prepreparation.R index 8252d11..caf1b3d 100644 --- a/data/day2_prepreparation/prepreparation.R +++ b/data/day2_prepreparation/prepreparation.R @@ -57,7 +57,10 @@ df_aggr <- df_grouped_1[,!colnames(df_grouped_1) %in% "ProductionTypeName"] %>% summarise(across(`2020-08-03`:`2020-08-08`, sum, .names = "{.col}")) df_aggr <- df_aggr %>% arrange(desc(`2020-08-03`)) df_aggr <- ungroup(df_aggr) -View(df_aggr) # We could use this df as a basis for our targeted spatial country plot (day 5, slide 7) +df_aggr <- df_aggr %>% + mutate(AVERAGE_GENERATION = (`2020-08-03` + `2020-08-04` + `2020-08-05` + `2020-08-06` + `2020-08-07` +`2020-08-08`) / 6) +View(df_aggr) +# We could use this df as a basis for our targeted spatial country plot (day 5, slide 7) # Aggregate per production type per country df_type <- df_grouped_1 %>% From f2e89dff02e2fdcfa7c5d9673019c867e12584e9 Mon Sep 17 00:00:00 2001 From: Geo-99 Date: Mon, 27 Nov 2023 22:54:39 +0100 Subject: [PATCH 3/3] Prepreparation (minor adjustments) --- data/day2_prepreparation/prepreparation.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/data/day2_prepreparation/prepreparation.R b/data/day2_prepreparation/prepreparation.R index caf1b3d..0b66c3b 100644 --- a/data/day2_prepreparation/prepreparation.R +++ b/data/day2_prepreparation/prepreparation.R @@ -60,7 +60,7 @@ df_aggr <- ungroup(df_aggr) df_aggr <- df_aggr %>% mutate(AVERAGE_GENERATION = (`2020-08-03` + `2020-08-04` + `2020-08-05` + `2020-08-06` + `2020-08-07` +`2020-08-08`) / 6) View(df_aggr) -# We could use this df as a basis for our targeted spatial country plot (day 5, slide 7) +# -> We could use this df as a basis for our targeted spatial country plot (day 5, slide 7) # Aggregate per production type per country df_type <- df_grouped_1 %>% @@ -72,6 +72,7 @@ View(df_type) +# EXTRA: Same for Installed Capacity # Sum up daily Installed Capacity df_grouped_IC <- df %>% @@ -91,7 +92,10 @@ df_aggr_IC <- df_grouped_1_IC[,!colnames(df_grouped_1_IC) %in% "ProductionTypeNa summarise(across(`2020-08-03`:`2020-08-08`, sum, .names = "{.col}")) df_aggr_IC <- df_aggr_IC %>% arrange(desc(`2020-08-03`)) df_aggr_IC <- ungroup(df_aggr_IC) -View(df_aggr_IC) # It seems like there is quite a lot missing, e.g. DE, https://www.smard.de/page/en/wiki-article/5884/6038 says 232,000 MW (Nov 23) vs. 73,500 MW here +df_aggr_IC <- df_aggr_IC %>% + mutate(AVERAGE_IC = (`2020-08-03` + `2020-08-04` + `2020-08-05` + `2020-08-06` + `2020-08-07` +`2020-08-08`) / 6) +View(df_aggr_IC) +# -> It seems like there is quite a lot missing, e.g. DE, https://www.smard.de/page/en/wiki-article/5884/6038 says 232,000 MW (Nov 23) vs. 73,500 MW here # Aggregate per production type per country df_type_IC <- df_grouped_1_IC %>%