BuildingSync · kflemin · May 9, 2025 · May 9, 2025 · May 9, 2025 · May 9, 2025
diff --git a/.lintr b/.lintr
@@ -0,0 +1,4 @@
+# Ensure all lines have proper tags and remove unmatched parentheses
+linters: with_defaults(
+  line_length_linter = line_length_linter(120)
+)
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,4 @@
 # Generated by roxygen2: do not edit by hand
-
 export(bs_add_scenario_type)
 export(bs_gen_dm_nmecr)
 export(bs_gen_root_doc)

diff --git a/R/bsync_utils.R b/R/bsync_utils.R
@@ -3,6 +3,10 @@
 
 library("rnoaa")
 library("lubridate")
+
+# source file in same directory for weather_data class
+# source("weather_data_fetcher.R")
+
 # closure for generate_id so we can store static var "count"
 make.f <- function() {
   count <- 0
@@ -159,11 +163,12 @@ bs_stub_derived_model <- function(x,
 #' Generate a data frame usable by nmecr for modeling
 #'
 #' @param tree XML tree to parse
-
+#' @param insert_weather_data Boolean indicating whether to insert weather data into the XML tree
 #'
 #' @return x Data frame for use with nmecr's model_with_* functions
 #' @export
 bs_parse_nmecr_df <- function(tree, insert_weather_data = FALSE) {
+  # Extract latitude and longitude from the XML tree
   lat_str <- xml2::xml_text(xml2::xml_find_first(tree, "//auc:Building/auc:Latitude"))
   lng_str <- xml2::xml_text(xml2::xml_find_first(tree, "//auc:Building/auc:Longitude"))
   lat_dbl <- as.double(lat_str)
@@ -172,7 +177,7 @@ bs_parse_nmecr_df <- function(tree, insert_weather_data = FALSE) {
 
   ts_nodes <- xml2::xml_find_all(tree, sprintf("//auc:TimeSeries[auc:ResourceUseID/@IDref = '%s']", resource_use_id_str))
 
-  # construct the eload dataframe
+  # Construct the eload dataframe
   n_samples <- length(ts_nodes)
   ts_matrix <- matrix(ncol = 2, nrow = n_samples)
   for (i in 1:n_samples) {
@@ -184,46 +189,26 @@ bs_parse_nmecr_df <- function(tree, insert_weather_data = FALSE) {
   ts_df <- data.frame(ts_matrix)
   colnames(ts_df) <- c("time", "eload")
 
-  # fix data types
+  # Fix data types
   ts_df[, "eload"] <- as.double(ts_df[, "eload"])
   ts_df[, "time"] <- as.POSIXct(ts_df[, "time"])
 
   ts_start <- min(ts_df$time)
   ts_end <- max(ts_df$time)
 
-  # handle weather
-  station_data <- rnoaa::ghcnd_stations() # Takes a while to run
-  lat_lon_df <- data.frame(
-    id = c("my_building"),
-    latitude = c(lat_dbl),
-    longitude = c(lng_dbl)
-  )
-
-  # get nearest station with average temp data
-  nearby_stations <- rnoaa::meteo_nearby_stations(
-    lat_lon_df = lat_lon_df,
-    station_data = station_data,
-    limit = 1,
-    var = c("TAVG")
-  )
-
-  # get MONTHLY temp data from station
-  weather_result <- rnoaa::ncdc(
-    datasetid = "GSOM",
-    stationid = sprintf("GHCND:%s", nearby_stations$my_building$id),
-    datatypeid = "TAVG",
-    # messy solution, but ensures that we get data before our start time
-    startdate = strftime(ts_start - (60 * 60 * 24 * 31), "%Y-%m-%dT%H:%M:%S"),
-    enddate = ts_end,
-    add_units = TRUE,
+  # Use the weather_data_fetcher class to get weather data
+  fetcher <- weather_data_fetcher$new(
+    station_id = NULL,
+    ts_start = ts_start,
+    ts_end = ts_end
   )
+  fetcher$get_nearest_station(lat = lat_dbl, long = lng_dbl)
+  weather_data <- fetcher$get_weather_data()
 
-  weather_data <- weather_result$data
+  # Match weather data to time series data
   temp_matrix <- matrix(ncol = 2, nrow = n_samples)
   for (row in 1:n_samples) {
-    # find the weather row with a date closest to our current eload time
-    # this is not the correct way to do this, but good enough for now
-    # it would seem the nmecr package should do this for us, but it didn't sometimes...
+    # Find the weather row with a date closest to our current eload time
     date_diffs <- abs(as.POSIXct(weather_data$date) - ts_df[[row, "time"]])
     closest_row <- weather_data[which.min(date_diffs), ]
     row_date <- ts_df[[row, "time"]]
@@ -240,7 +225,7 @@ bs_parse_nmecr_df <- function(tree, insert_weather_data = FALSE) {
   temp_df <- data.frame(temp_matrix)
   colnames(temp_df) <- c("time", "temp")
 
-  # fix data types
+  # Fix data types
   temp_df[, "temp"] <- as.double(temp_df[, "temp"])
   temp_df[, "time"] <- as.POSIXct.numeric(temp_df[, "time"], origin = lubridate::origin)
 

diff --git a/R/weather_data_fetcher.R b/R/weather_data_fetcher.R
@@ -0,0 +1,89 @@
+# BuildingSync®, Copyright (c) Alliance for Sustainable Energy, LLC, and other contributors.
+# See also https://github.com/BuildingSync/bsyncr/blob/main/LICENSE.txt
+
+library(R6)
+library(rnoaa)
+
+weather_data_fetcher <- R6Class(
+  "weather_data_fetcher",
+  public = list(
+    station_id = NULL,
+    datatype_id = "TAVG", # Temperature Average
+    ts_start = NULL,
+    ts_end = NULL,
+    station_data = NULL,
+    weather_data = NULL,
+    initialize = function(station_id, ts_start, ts_end) {
+      self$station_id <- station_id
+      self$ts_start <- ts_start
+      self$ts_end <- ts_end
+
+      # Takes a while to run - but should be cached when initializing
+      # the this class for the first time.
+      self$station_data <- rnoaa::ghcnd_stations()
+    },
+    get_nearest_station = function(lat, long) {
+      lat_lon_df <- data.frame(
+        id = c("my_building"),
+        latitude = c(lat),
+        longitude = c(long)
+      )
+      nearby_stations <- rnoaa::meteo_nearby_stations(
+        lat_lon_df = lat_lon_df,
+        station_data = self$station_data,
+        limit = 1,
+        var = c("TAVG")
+      )
+      self$station_id <- nearby_stations$my_building$id
+    },
+    get_weather_data = function() {
+      # check if station_id is NULL
+      if (is.null(self$station_id)) {
+        stop("Station ID is NULL. Please set the station ID first or call get_nearest_station()")
+      }
+
+      # get MONTHLY temp data from station
+      weather_result <- rnoaa::ncdc(
+        datasetid = "GSOM",
+        stationid = sprintf("GHCND:%s", self$station_id),
+        datatypeid = "TAVG",
+        # messy solution, but ensures that we get data before our start time
+        startdate = strftime(self$ts_start - (60 * 60 * 24 * 31), "%Y-%m-%dT%H:%M:%S"),
+        enddate = self$ts_end,
+        add_units = TRUE
+      )
+
+      self$weather_data <- weather_result$data
+      return(self$weather_data)
+    },
+    to_df = function(n_samples = 12) {
+      # convert the weather data to a data frame
+
+      # check if weather_data is NULL
+      if (is.null(self$weather_data)) {
+        stop("Weather data is NULL. Please call get_weather_data() first.")
+      }
+      # check if weather_data is empty
+      if (length(self$weather_data) == 0) {
+        stop("Weather data is empty. Please check the station ID and date range.")
+      }
+      # check if weather_data has the expected structure
+      if (!all(c("date", "value", "units") %in% names(self$weather_data))) {
+        stop("Weather data does not have the expected structure.")
+      }
+
+      cat(paste("Weather data:", self$weather_data, "\n"))
+      temp_df <- data.frame(
+        date = self$weather_data$date,
+        temp = self$weather_data$value,
+        units = self$weather_data$units
+      )
+
+      # fix data types?
+      # temp_df[, "temp"] <- as.double(temp_df[, "temp"])
+      # temp_df[, "time"] <- as.POSIXct.numeric(temp_df[, "time"], origin = lubridate::origin)
+
+      return(temp_df)
+    }
+  )
+)
diff --git a/README.md b/README.md
@@ -40,7 +40,7 @@ Rscript -e "testthat::test_dir('tests')"
 - In RStudio, format all the R files by running the following commands in RStudio
 
 ```R
-install.packages("styler")
+install.packages("styler", repos = "http://cran.us.r-project.org")
 styler::style_dir()
 ```
 

diff --git a/cspell.json b/cspell.json
@@ -11,23 +11,34 @@
     "corymosiman",
     "crul",
     "dataframe",
+    "datasetid",
     "datatypeid",
     "dplyr",
     "Drybulb",
     "elec",
+    "eload",
+    "enddate",
     "geonames",
     "ggplot",
+    "GHCND",
+    "GSOM",
     "hoardr",
     "isdparser",
     "lubridate",
     "macintoshpie",
+    "meteo",
+    "ncdc",
     "nllong",
     "NMEC",
     "nmecr",
     "NOAA",
     "noaakey",
+    "nolint",
     "rappdirs",
     "rnoaa",
+    "startdate",
+    "stationid",
+    "TAVG",
     "testthat",
     "tidyr"
   ],

diff --git a/tests/bsyncr_example.Rmd b/tests/bsyncr_example.Rmd
@@ -26,6 +26,7 @@ library(ggplot2)
 
 
 # Source the utility functions
+source("./R/weather_data_fetcher.R")
 source("./R/bsync_utils.R")
 
 # get root path
@@ -42,7 +43,7 @@ options(noaakey = NOAA_TOKEN)
 ```{r}
 # Path to the test file
 # bsync path from root path
-bsync_filepath <- file.path(root_path, "tests", "data", "ex_bsync.xml")
+bsync_filepath <- file.path(root_path, "tests", "data", "ex_bsync_2.xml")
 
 
 baseline_scenario_id <- "Scenario-bsyncr"
@@ -57,6 +58,85 @@ not_used <- sc_baseline %>% bsyncr::bs_stub_derived_model(
   dm_period = "Baseline"
 )
 
+lat_str <- xml2::xml_text(xml2::xml_find_first(bsync_doc, "//auc:Building/auc:Latitude"))
+lng_str <- xml2::xml_text(xml2::xml_find_first(bsync_doc, "//auc:Building/auc:Longitude"))
+lat_dbl <- as.double(lat_str)
+lng_dbl <- as.double(lng_str)
+resource_use_id_str <- xml2::xml_attr(xml2::xml_find_first(bsync_doc, "//auc:ResourceUses/auc:ResourceUse[auc:EnergyResource = 'Electricity']"), "ID")
+
+ts_nodes <- xml2::xml_find_all(bsync_doc, sprintf("//auc:TimeSeries[auc:ResourceUseID/@IDref = '%s']", resource_use_id_str))
+
+n_samples <- length(ts_nodes)
+ts_matrix <- matrix(ncol = 2, nrow = n_samples)
+for (i in 1:n_samples) {
+  ts_node <- ts_nodes[i]
+  start_timestamp <- xml2::xml_text(xml2::xml_find_first(ts_node, "auc:StartTimestamp"))
+  reading <- xml2::xml_text(xml2::xml_find_first(ts_node, "auc:IntervalReading"))
+  ts_matrix[i, ] <- c(start_timestamp, reading)
+}
+ts_df <- data.frame(ts_matrix)
+colnames(ts_df) <- c("time", "eload")
+
+# fix data types
+ts_df[, "eload"] <- as.double(ts_df[, "eload"])
+ts_df[, "time"] <- as.POSIXct(ts_df[, "time"])
+
+ts_start <- min(ts_df$time)
+ts_end <- max(ts_df$time)
+
+# Use the weather_data_fetcher class to get weather data
+fetcher <- weather_data_fetcher$new(
+  station_id = NULL,
+  ts_start = ts_start,
+  ts_end = ts_end
+)
+fetcher$get_nearest_station(lat = lat_dbl, long = lng_dbl)
+weather_data <- fetcher$get_weather_data()
+
+temp_matrix <- matrix(ncol = 2, nrow = n_samples)
+for (row in 1:n_samples) {
+  # find the weather row with a date closest to our current eload time
+  # this is not the correct way to do this, but good enough for now
+  # it would seem the nmecr package should do this for us, but it didn't sometimes...
+  date_diffs <- abs(as.POSIXct(weather_data$date) - ts_df[[row, "time"]])
+  closest_row <- weather_data[which.min(date_diffs), ]
+  row_date <- ts_df[[row, "time"]]
+  row_units <- closest_row$units
+  if (row_units == "celsius") {
+    row_temp <- (closest_row$value * 9 / 5) + 32
+  } else if (row_units == "fahrenheit") {
+    row_temp <- closest_row$value
+  } else {
+    stop(sprintf("Invalid unit type: %s", row_units))
+  }
+  temp_matrix[row, ] <- c(row_date, row_temp)
+}
+temp_df <- data.frame(temp_matrix)
+colnames(temp_df) <- c("time", "temp")
+
+# fix data types
+temp_df[, "temp"] <- as.double(temp_df[, "temp"])
+temp_df[, "time"] <- as.POSIXct.numeric(temp_df[, "time"], origin = lubridate::origin)
+
+ts_data_elem <- xml2::xml_find_first(bsync_doc, '//auc:Scenario[auc:ResourceUses/auc:ResourceUse/auc:EnergyResource/text() = "Electricity"]/auc:TimeSeriesData')
+for (row in 1:n_samples) {
+  ts_data_elem %>%
+    xml2::xml_add_child("auc:TimeSeries", "ID" = generate_id("TimeSeries")) %>%
+    xml2::xml_add_child("auc:TimeSeriesReadingQuantity", "Dry Bulb Temperature") %>%
+    xml2::xml_add_sibling("auc:StartTimestamp", strftime(temp_df[[row, "time"]], "%Y-%m-%dT%H:%M:%S")) %>%
+    xml2::xml_add_sibling("auc:IntervalFrequency", "Month") %>%
+    xml2::xml_add_sibling("auc:IntervalReading", temp_df[[row, "temp"]])
+}
+
+data_int <- "Monthly"
+new_df <- nmecr::create_dataframe(
+  eload_data = ts_df,
+  temp_data = temp_df,
+  start_date = format(ts_start, format = "%m/%d/%y"),
+  end_date = format(ts_end, format = "%m/%d/%y"),
+  convert_to_data_interval = data_int,
+  temp_balancepoint = 65
+)
 b_df <- bsyncr::bs_parse_nmecr_df(bsync_doc, insert_weather_data = TRUE)
 ```
 
@@ -74,7 +154,8 @@ print(model_df)
 intercept <- model$model$coefficients[["(Intercept)"]] / 3.41214 # btu to kwh
 # Model is in °C, so convert to F.
 slope <- model$model$coefficients[["temp"]] * 9 / 5 # °C to °F
-
+print(slope)
+print(intercept)
 ggplot2::ggplot(model_df, aes(x = temp, y = value)) +
   geom_point(aes(color = variable), data = model_df[model_df$variable == "eload", ]) +
   geom_line(aes(color = variable), data = model_df[model_df$variable == "model_fit", ]) +