diff --git a/.lintr b/.lintr new file mode 100644 index 0000000..5927d24 --- /dev/null +++ b/.lintr @@ -0,0 +1,4 @@ +# Ensure all lines have proper tags and remove unmatched parentheses +linters: with_defaults( + line_length_linter = line_length_linter(120) +) diff --git a/NAMESPACE b/NAMESPACE index ea711ed..5d0ae00 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,4 @@ # Generated by roxygen2: do not edit by hand - export(bs_add_scenario_type) export(bs_gen_dm_nmecr) export(bs_gen_root_doc) diff --git a/R/bsync_utils.R b/R/bsync_utils.R index 103be0c..6dcfb2c 100644 --- a/R/bsync_utils.R +++ b/R/bsync_utils.R @@ -3,6 +3,10 @@ library("rnoaa") library("lubridate") + +# source file in same directory for weather_data class +# source("weather_data_fetcher.R") + # closure for generate_id so we can store static var "count" make.f <- function() { count <- 0 @@ -159,11 +163,12 @@ bs_stub_derived_model <- function(x, #' Generate a data frame usable by nmecr for modeling #' #' @param tree XML tree to parse - +#' @param insert_weather_data Boolean indicating whether to insert weather data into the XML tree #' #' @return x Data frame for use with nmecr's model_with_* functions #' @export bs_parse_nmecr_df <- function(tree, insert_weather_data = FALSE) { + # Extract latitude and longitude from the XML tree lat_str <- xml2::xml_text(xml2::xml_find_first(tree, "//auc:Building/auc:Latitude")) lng_str <- xml2::xml_text(xml2::xml_find_first(tree, "//auc:Building/auc:Longitude")) lat_dbl <- as.double(lat_str) @@ -172,7 +177,7 @@ bs_parse_nmecr_df <- function(tree, insert_weather_data = FALSE) { ts_nodes <- xml2::xml_find_all(tree, sprintf("//auc:TimeSeries[auc:ResourceUseID/@IDref = '%s']", resource_use_id_str)) - # construct the eload dataframe + # Construct the eload dataframe n_samples <- length(ts_nodes) ts_matrix <- matrix(ncol = 2, nrow = n_samples) for (i in 1:n_samples) { @@ -184,46 +189,26 @@ bs_parse_nmecr_df <- function(tree, insert_weather_data = FALSE) { ts_df <- data.frame(ts_matrix) colnames(ts_df) <- c("time", "eload") - # fix data types + # Fix data types ts_df[, "eload"] <- as.double(ts_df[, "eload"]) ts_df[, "time"] <- as.POSIXct(ts_df[, "time"]) ts_start <- min(ts_df$time) ts_end <- max(ts_df$time) - # handle weather - station_data <- rnoaa::ghcnd_stations() # Takes a while to run - lat_lon_df <- data.frame( - id = c("my_building"), - latitude = c(lat_dbl), - longitude = c(lng_dbl) - ) - - # get nearest station with average temp data - nearby_stations <- rnoaa::meteo_nearby_stations( - lat_lon_df = lat_lon_df, - station_data = station_data, - limit = 1, - var = c("TAVG") - ) - - # get MONTHLY temp data from station - weather_result <- rnoaa::ncdc( - datasetid = "GSOM", - stationid = sprintf("GHCND:%s", nearby_stations$my_building$id), - datatypeid = "TAVG", - # messy solution, but ensures that we get data before our start time - startdate = strftime(ts_start - (60 * 60 * 24 * 31), "%Y-%m-%dT%H:%M:%S"), - enddate = ts_end, - add_units = TRUE, + # Use the weather_data_fetcher class to get weather data + fetcher <- weather_data_fetcher$new( + station_id = NULL, + ts_start = ts_start, + ts_end = ts_end ) + fetcher$get_nearest_station(lat = lat_dbl, long = lng_dbl) + weather_data <- fetcher$get_weather_data() - weather_data <- weather_result$data + # Match weather data to time series data temp_matrix <- matrix(ncol = 2, nrow = n_samples) for (row in 1:n_samples) { - # find the weather row with a date closest to our current eload time - # this is not the correct way to do this, but good enough for now - # it would seem the nmecr package should do this for us, but it didn't sometimes... + # Find the weather row with a date closest to our current eload time date_diffs <- abs(as.POSIXct(weather_data$date) - ts_df[[row, "time"]]) closest_row <- weather_data[which.min(date_diffs), ] row_date <- ts_df[[row, "time"]] @@ -240,7 +225,7 @@ bs_parse_nmecr_df <- function(tree, insert_weather_data = FALSE) { temp_df <- data.frame(temp_matrix) colnames(temp_df) <- c("time", "temp") - # fix data types + # Fix data types temp_df[, "temp"] <- as.double(temp_df[, "temp"]) temp_df[, "time"] <- as.POSIXct.numeric(temp_df[, "time"], origin = lubridate::origin) diff --git a/R/weather_data_fetcher.R b/R/weather_data_fetcher.R new file mode 100644 index 0000000..228bc74 --- /dev/null +++ b/R/weather_data_fetcher.R @@ -0,0 +1,89 @@ +# BuildingSync®, Copyright (c) Alliance for Sustainable Energy, LLC, and other contributors. +# See also https://github.com/BuildingSync/bsyncr/blob/main/LICENSE.txt + +library(R6) +library(rnoaa) + +weather_data_fetcher <- R6Class( + "weather_data_fetcher", + public = list( + station_id = NULL, + datatype_id = "TAVG", # Temperature Average + ts_start = NULL, + ts_end = NULL, + station_data = NULL, + weather_data = NULL, + initialize = function(station_id, ts_start, ts_end) { + self$station_id <- station_id + self$ts_start <- ts_start + self$ts_end <- ts_end + + # Takes a while to run - but should be cached when initializing + # the this class for the first time. + self$station_data <- rnoaa::ghcnd_stations() + }, + get_nearest_station = function(lat, long) { + lat_lon_df <- data.frame( + id = c("my_building"), + latitude = c(lat), + longitude = c(long) + ) + nearby_stations <- rnoaa::meteo_nearby_stations( + lat_lon_df = lat_lon_df, + station_data = self$station_data, + limit = 1, + var = c("TAVG") + ) + self$station_id <- nearby_stations$my_building$id + }, + get_weather_data = function() { + # check if station_id is NULL + if (is.null(self$station_id)) { + stop("Station ID is NULL. Please set the station ID first or call get_nearest_station()") + } + + # get MONTHLY temp data from station + weather_result <- rnoaa::ncdc( + datasetid = "GSOM", + stationid = sprintf("GHCND:%s", self$station_id), + datatypeid = "TAVG", + # messy solution, but ensures that we get data before our start time + startdate = strftime(self$ts_start - (60 * 60 * 24 * 31), "%Y-%m-%dT%H:%M:%S"), + enddate = self$ts_end, + add_units = TRUE + ) + + self$weather_data <- weather_result$data + return(self$weather_data) + }, + to_df = function(n_samples = 12) { + # convert the weather data to a data frame + + # check if weather_data is NULL + if (is.null(self$weather_data)) { + stop("Weather data is NULL. Please call get_weather_data() first.") + } + # check if weather_data is empty + if (length(self$weather_data) == 0) { + stop("Weather data is empty. Please check the station ID and date range.") + } + # check if weather_data has the expected structure + if (!all(c("date", "value", "units") %in% names(self$weather_data))) { + stop("Weather data does not have the expected structure.") + } + + cat(paste("Weather data:", self$weather_data, "\n")) + temp_df <- data.frame( + date = self$weather_data$date, + temp = self$weather_data$value, + units = self$weather_data$units + ) + + # fix data types? + # temp_df[, "temp"] <- as.double(temp_df[, "temp"]) + # temp_df[, "time"] <- as.POSIXct.numeric(temp_df[, "time"], origin = lubridate::origin) + + return(temp_df) + } + ) +) diff --git a/README.md b/README.md index 626b004..7712c14 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ Rscript -e "testthat::test_dir('tests')" - In RStudio, format all the R files by running the following commands in RStudio ```R -install.packages("styler") +install.packages("styler", repos = "http://cran.us.r-project.org") styler::style_dir() ``` diff --git a/cspell.json b/cspell.json index 323b77f..84e2a7a 100644 --- a/cspell.json +++ b/cspell.json @@ -11,23 +11,34 @@ "corymosiman", "crul", "dataframe", + "datasetid", "datatypeid", "dplyr", "Drybulb", "elec", + "eload", + "enddate", "geonames", "ggplot", + "GHCND", + "GSOM", "hoardr", "isdparser", "lubridate", "macintoshpie", + "meteo", + "ncdc", "nllong", "NMEC", "nmecr", "NOAA", "noaakey", + "nolint", "rappdirs", "rnoaa", + "startdate", + "stationid", + "TAVG", "testthat", "tidyr" ], diff --git a/tests/bsyncr_example.Rmd b/tests/bsyncr_example.Rmd index 6a70b1d..5a819c8 100644 --- a/tests/bsyncr_example.Rmd +++ b/tests/bsyncr_example.Rmd @@ -26,6 +26,7 @@ library(ggplot2) # Source the utility functions +source("./R/weather_data_fetcher.R") source("./R/bsync_utils.R") # get root path @@ -42,7 +43,7 @@ options(noaakey = NOAA_TOKEN) ```{r} # Path to the test file # bsync path from root path -bsync_filepath <- file.path(root_path, "tests", "data", "ex_bsync.xml") +bsync_filepath <- file.path(root_path, "tests", "data", "ex_bsync_2.xml") baseline_scenario_id <- "Scenario-bsyncr" @@ -57,6 +58,85 @@ not_used <- sc_baseline %>% bsyncr::bs_stub_derived_model( dm_period = "Baseline" ) +lat_str <- xml2::xml_text(xml2::xml_find_first(bsync_doc, "//auc:Building/auc:Latitude")) +lng_str <- xml2::xml_text(xml2::xml_find_first(bsync_doc, "//auc:Building/auc:Longitude")) +lat_dbl <- as.double(lat_str) +lng_dbl <- as.double(lng_str) +resource_use_id_str <- xml2::xml_attr(xml2::xml_find_first(bsync_doc, "//auc:ResourceUses/auc:ResourceUse[auc:EnergyResource = 'Electricity']"), "ID") + +ts_nodes <- xml2::xml_find_all(bsync_doc, sprintf("//auc:TimeSeries[auc:ResourceUseID/@IDref = '%s']", resource_use_id_str)) + +n_samples <- length(ts_nodes) +ts_matrix <- matrix(ncol = 2, nrow = n_samples) +for (i in 1:n_samples) { + ts_node <- ts_nodes[i] + start_timestamp <- xml2::xml_text(xml2::xml_find_first(ts_node, "auc:StartTimestamp")) + reading <- xml2::xml_text(xml2::xml_find_first(ts_node, "auc:IntervalReading")) + ts_matrix[i, ] <- c(start_timestamp, reading) +} +ts_df <- data.frame(ts_matrix) +colnames(ts_df) <- c("time", "eload") + +# fix data types +ts_df[, "eload"] <- as.double(ts_df[, "eload"]) +ts_df[, "time"] <- as.POSIXct(ts_df[, "time"]) + +ts_start <- min(ts_df$time) +ts_end <- max(ts_df$time) + +# Use the weather_data_fetcher class to get weather data +fetcher <- weather_data_fetcher$new( + station_id = NULL, + ts_start = ts_start, + ts_end = ts_end +) +fetcher$get_nearest_station(lat = lat_dbl, long = lng_dbl) +weather_data <- fetcher$get_weather_data() + +temp_matrix <- matrix(ncol = 2, nrow = n_samples) +for (row in 1:n_samples) { + # find the weather row with a date closest to our current eload time + # this is not the correct way to do this, but good enough for now + # it would seem the nmecr package should do this for us, but it didn't sometimes... + date_diffs <- abs(as.POSIXct(weather_data$date) - ts_df[[row, "time"]]) + closest_row <- weather_data[which.min(date_diffs), ] + row_date <- ts_df[[row, "time"]] + row_units <- closest_row$units + if (row_units == "celsius") { + row_temp <- (closest_row$value * 9 / 5) + 32 + } else if (row_units == "fahrenheit") { + row_temp <- closest_row$value + } else { + stop(sprintf("Invalid unit type: %s", row_units)) + } + temp_matrix[row, ] <- c(row_date, row_temp) +} +temp_df <- data.frame(temp_matrix) +colnames(temp_df) <- c("time", "temp") + +# fix data types +temp_df[, "temp"] <- as.double(temp_df[, "temp"]) +temp_df[, "time"] <- as.POSIXct.numeric(temp_df[, "time"], origin = lubridate::origin) + +ts_data_elem <- xml2::xml_find_first(bsync_doc, '//auc:Scenario[auc:ResourceUses/auc:ResourceUse/auc:EnergyResource/text() = "Electricity"]/auc:TimeSeriesData') +for (row in 1:n_samples) { + ts_data_elem %>% + xml2::xml_add_child("auc:TimeSeries", "ID" = generate_id("TimeSeries")) %>% + xml2::xml_add_child("auc:TimeSeriesReadingQuantity", "Dry Bulb Temperature") %>% + xml2::xml_add_sibling("auc:StartTimestamp", strftime(temp_df[[row, "time"]], "%Y-%m-%dT%H:%M:%S")) %>% + xml2::xml_add_sibling("auc:IntervalFrequency", "Month") %>% + xml2::xml_add_sibling("auc:IntervalReading", temp_df[[row, "temp"]]) +} + +data_int <- "Monthly" +new_df <- nmecr::create_dataframe( + eload_data = ts_df, + temp_data = temp_df, + start_date = format(ts_start, format = "%m/%d/%y"), + end_date = format(ts_end, format = "%m/%d/%y"), + convert_to_data_interval = data_int, + temp_balancepoint = 65 +) b_df <- bsyncr::bs_parse_nmecr_df(bsync_doc, insert_weather_data = TRUE) ``` @@ -74,7 +154,8 @@ print(model_df) intercept <- model$model$coefficients[["(Intercept)"]] / 3.41214 # btu to kwh # Model is in °C, so convert to F. slope <- model$model$coefficients[["temp"]] * 9 / 5 # °C to °F - +print(slope) +print(intercept) ggplot2::ggplot(model_df, aes(x = temp, y = value)) + geom_point(aes(color = variable), data = model_df[model_df$variable == "eload", ]) + geom_line(aes(color = variable), data = model_df[model_df$variable == "model_fit", ]) + diff --git a/tests/data/ex_bsync_2.xml b/tests/data/ex_bsync_2.xml new file mode 100644 index 0000000..e40c517 --- /dev/null +++ b/tests/data/ex_bsync_2.xml @@ -0,0 +1,113 @@ + + + + + + + + My-Building + -91.27998 + 36.82129 + + + + + + + + + + + + + + + + + + Electricity + kWh + All end uses + + + + + 2022-06-01T07:00:00+00:00 + 2022-07-01T07:00:00+00:00 + 33979.9339 + + + + 2022-07-01T07:00:00+00:00 + 2022-08-01T07:00:00+00:00 + 37348.9798 + + + + 2022-08-01T07:00:00+00:00 + 2022-09-01T07:00:00+00:00 + 36235.5125 + + + + 2022-09-01T07:00:00+00:00 + 2022-10-01T07:00:00+00:00 + 26497.2686 + + + + 2022-10-01T07:00:00+00:00 + 2022-11-01T07:00:00+00:00 + 11398.8116 + + + + 2022-11-01T07:00:00+00:00 + 2022-12-01T07:00:00+00:00 + 7873.5536 + + + + 2022-12-01T08:00:00+00:00 + 2023-01-01T08:00:00+00:00 + 6094.658899999999 + + + + 2023-01-01T08:00:00+00:00 + 2023-02-01T08:00:00+00:00 + 3143.8154000000004 + + + + 2023-02-01T08:00:00+00:00 + 2023-03-01T08:00:00+00:00 + 5724.8785 + + + + 2023-03-01T08:00:00+00:00 + 2023-04-01T08:00:00+00:00 + 9382.5127 + + + + 2023-04-01T07:00:00+00:00 + 2023-05-01T07:00:00+00:00 + 11834.166299999999 + + + + 2023-05-01T07:00:00+00:00 + 2023-06-01T07:00:00+00:00 + 23704.444499999998 + + + + + + + + + + diff --git a/tests/test-bsyncr.R b/tests/test_bsyncr.R similarity index 78% rename from tests/test-bsyncr.R rename to tests/test_bsyncr.R index cf922f4..7e0a79c 100644 --- a/tests/test-bsyncr.R +++ b/tests/test_bsyncr.R @@ -45,3 +45,17 @@ test_that("Dataframe is created successfully", { expect_true(!is.null(result), "Resulting dataframe should not be NULL") expect_gt(nrow(result), 0, "Resulting dataframe should have rows") }) + + +test_that("Dataframe for file 2 is created successfully", { + # Path to the test file + bsync_filepath <- "./data/ex_bsync_2.xml" + + # Run the function and check the result + result <- test_create_dataframe(bsync_filepath) + print(result) + + # Ensure the result is not NULL and has rows + expect_true(!is.null(result), "Resulting dataframe should not be NULL") + expect_gt(nrow(result), 0, "Resulting dataframe should have rows") +}) diff --git a/tests/test_weather_data_fetcher.R b/tests/test_weather_data_fetcher.R new file mode 100644 index 0000000..770c30f --- /dev/null +++ b/tests/test_weather_data_fetcher.R @@ -0,0 +1,80 @@ +# BuildingSync®, Copyright (c) Alliance for Sustainable Energy, LLC, and other contributors. +# See also https://github.com/BuildingSync/bsyncr/blob/main/LICENSE.txt + +library(testthat) +library(rnoaa) + +source("../R/weather_data_fetcher.R") + +# Ensure NOAA token is set +NOAA_TOKEN <- Sys.getenv("NOAA_TOKEN") +if (NOAA_TOKEN == "") { + stop("Missing NOAA token env var: NOAA_TOKEN") +} +options(noaakey = NOAA_TOKEN) + + +test_that("get_nearest_station correctly identifies the nearest station", { + # Mock data for testing - NYC + mock_lat <- 40.7128 + mock_long <- -74.0060 + + # Create an instance of the weather_data_fetcher class + # ts_start and ts_end are not used in this test + fetcher <- weather_data_fetcher$new( + station_id = NULL, + ts_start = Sys.time() - (60 * 60 * 24 * 30), + ts_end = Sys.time() + ) + + fetcher$get_nearest_station(lat = mock_lat, long = mock_long) + cat(paste(" Nearest station ID:", fetcher$station_id, "\n")) + + # Check that the station_id is not NULL and is the value for central park NYC + expect_false(is.null(fetcher$station_id)) + expect_true(fetcher$station_id == "USW00094728") +}) + +# test valid error when station_id is NULL +test_that("get_weather_data throws error when station_id is NULL", { + # Create an instance of the weather_data_fetcher class + fetcher <- weather_data_fetcher$new( + station_id = NULL, + ts_start = Sys.time() - (60 * 60 * 24 * 30), + ts_end = Sys.time() + ) + + # Expect an error when calling get_weather_data with a NULL station_id + expect_error(fetcher$get_weather_data(), "Station ID is NULL. Please set the station ID first or call get_nearest_station()") +}) + +# now test getting the weather data for the same station as above +test_that("get_weather_data fetches data correctly", { + # Mock data for testing - NYC + mock_lat <- 40.7128 + mock_long <- -74.0060 + + # Create an instance of the weather_data_fetcher class + fetcher <- weather_data_fetcher$new( + station_id = "USW00094728", + ts_start = as.POSIXct("2022-06-01"), + ts_end = as.POSIXct("2023-07-01") + ) + + # Fetch the weather data + weather_data <- fetcher$get_weather_data() + + # Check that the data is not NULL and has the expected structure + expect_false(is.null(fetcher$weather_data)) + expect_true(nrow(fetcher$weather_data) > 0) + + df_weather_data <- fetcher$to_df() + print(df_weather_data) + + # check the structure of the data frame + expect_true(all(c("date", "temp", "units") %in% names(df_weather_data))) + expect_true(nrow(df_weather_data) > 0) + expect_true(ncol(df_weather_data) == 3) + expect_true(all(df_weather_data$temp > 0)) + expect_true(all(df_weather_data$units == "celsius")) +})