BuildingSync · nllong · Apr 26, 2025 · Apr 25, 2025 · Apr 26, 2025 · Apr 26, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,9 @@
-# Version 0.1
+# Version 0.2
 
-- initial release. This is currently only released to GitHub.
 - Upgrade to latest version of NMECR (Version 1.0.17)
+- Upgrade to RNOAA 1.4.0
 - Add integration and unit test
-- Move source code out of R directory and into src directory
+
+# Version 0.1
+
+Initial release of the bsyncr package from 2021, which was not previously tagged on GitHub.
diff --git a/R/bsync_utils.R b/R/bsync_utils.R
@@ -1,17 +1,16 @@
 # BuildingSync®, Copyright (c) Alliance for Sustainable Energy, LLC, and other contributors.
 # See also https://github.com/BuildingSync/bsyncr/blob/main/LICENSE.txt
 
-library('rnoaa');
-library('lubridate');
-
+library("rnoaa")
+library("lubridate")
 # closure for generate_id so we can store static var "count"
 make.f <- function() {
   count <- 0
   f <- function(prefix) {
     count <<- count + 1
     return(sprintf("bsyncr-%s-%d", prefix, count))
   }
-  return( f )
+  return(f)
 }
 
 generate_id <- make.f()
@@ -24,10 +23,11 @@ generate_id <- make.f()
 #' @export
 bs_gen_root_doc <- function(raw_schema_location = "https://raw.githubusercontent.com/BuildingSync/schema/c620c7e58688698901edcb8560cd3e1b4b34d971/BuildingSync.xsd") {
   doc <- xml2::xml_new_root("auc:BuildingSync",
-                      "xmlns:auc" = "http://buildingsync.net/schemas/bedes-auc/2019",
-                      "xsi:schemaLocation" = paste("http://buildingsync.net/schemas/bedes-auc/2019", raw_schema_location),
-                      "xmlns:xsi" = "http://www.w3.org/2001/XMLSchema-instance",
-                      "version" = "dev")
+    "xmlns:auc" = "http://buildingsync.net/schemas/bedes-auc/2019",
+    "xsi:schemaLocation" = paste("http://buildingsync.net/schemas/bedes-auc/2019", raw_schema_location),
+    "xmlns:xsi" = "http://www.w3.org/2001/XMLSchema-instance",
+    "version" = "dev"
+  )
   return(doc)
 }
 
@@ -62,9 +62,9 @@ bs_stub_bldg <- function(doc, bldg_id = "Building-1") {
 #' @return doc An xml_document, with two additional scenarios stubbed out.
 #' @export
 bs_stub_scenarios <- function(doc,
-                                 baseline_id = "Scenario-Baseline",
-                                 reporting_id = "Scenario-Reporting",
-                                 linked_building_id = "Building-1") {
+                              baseline_id = "Scenario-Baseline",
+                              reporting_id = "Scenario-Reporting",
+                              linked_building_id = "Building-1") {
   xml2::xml_find_first(doc, "//auc:Reports") %>%
     xml2::xml_add_child("auc:Report", "ID" = generate_id("Report")) %>%
     xml2::xml_add_child("auc:Scenarios") %>%
@@ -84,8 +84,8 @@ bs_stub_scenarios <- function(doc,
 #' @return x An xml_node for the same auc:Scenario, with addition of the Scenario typing information.
 #' @export
 bs_add_scenario_type <- function(x,
-                                    sc_type = c("Current Building", "Package of Measures"),
-                                    measured = TRUE) {
+                                 sc_type = c("Current Building", "Package of Measures"),
+                                 measured = TRUE) {
   x2 <- x %>%
     xml2::xml_add_child("auc:ScenarioType")
 
@@ -137,9 +137,9 @@ bs_link_bldg <- function(x, linked_building_id) {
 #' @return x An xml_node for the same auc:Scenario, with the addition of the derived model
 #' @export
 bs_stub_derived_model <- function(x,
-                               dm_id,
-                               dm_period = c("Baseline", "Reporting"),
-                               measured_scenario_id = "Scenario-Measured") {
+                                  dm_id,
+                                  dm_period = c("Baseline", "Reporting"),
+                                  measured_scenario_id = "Scenario-Measured") {
   x2 <- x %>% xml2::xml_find_first("auc:ScenarioType/auc:DerivedModel")
 
   x2 %>%
@@ -163,23 +163,23 @@ bs_stub_derived_model <- function(x,
 #'
 #' @return x Data frame for use with nmecr's model_with_* functions
 #' @export
-bs_parse_nmecr_df <- function(tree, insert_weather_data=FALSE) {
+bs_parse_nmecr_df <- function(tree, insert_weather_data = FALSE) {
   lat_str <- xml2::xml_text(xml2::xml_find_first(tree, "//auc:Building/auc:Latitude"))
   lng_str <- xml2::xml_text(xml2::xml_find_first(tree, "//auc:Building/auc:Longitude"))
   lat_dbl <- as.double(lat_str)
   lng_dbl <- as.double(lng_str)
   resource_use_id_str <- xml2::xml_attr(xml2::xml_find_first(tree, "//auc:ResourceUses/auc:ResourceUse[auc:EnergyResource = 'Electricity']"), "ID")
 
-  ts_nodes = xml2::xml_find_all(tree, sprintf("//auc:TimeSeries[auc:ResourceUseID/@IDref = '%s']", resource_use_id_str))
+  ts_nodes <- xml2::xml_find_all(tree, sprintf("//auc:TimeSeries[auc:ResourceUseID/@IDref = '%s']", resource_use_id_str))
 
   # construct the eload dataframe
-  n_samples = length(ts_nodes)
-  ts_matrix <- matrix(ncol=2, nrow=n_samples)
-  for(i in 1:n_samples){
+  n_samples <- length(ts_nodes)
+  ts_matrix <- matrix(ncol = 2, nrow = n_samples)
+  for (i in 1:n_samples) {
     ts_node <- ts_nodes[i]
     start_timestamp <- xml2::xml_text(xml2::xml_find_first(ts_node, "auc:StartTimestamp"))
     reading <- xml2::xml_text(xml2::xml_find_first(ts_node, "auc:IntervalReading"))
-    ts_matrix[i,] <- c(start_timestamp, reading)
+    ts_matrix[i, ] <- c(start_timestamp, reading)
   }
   ts_df <- data.frame(ts_matrix)
   colnames(ts_df) <- c("time", "eload")
@@ -188,42 +188,44 @@ bs_parse_nmecr_df <- function(tree, insert_weather_data=FALSE) {
   ts_df[, "eload"] <- as.double(ts_df[, "eload"])
   ts_df[, "time"] <- as.POSIXct(ts_df[, "time"])
 
-  ts_start = min(ts_df$time)
-  ts_end = max(ts_df$time)
+  ts_start <- min(ts_df$time)
+  ts_end <- max(ts_df$time)
 
   # handle weather
   station_data <- rnoaa::ghcnd_stations() # Takes a while to run
-  lat_lon_df <- data.frame(id = c("my_building"),
-                           latitude = c(lat_dbl),
-                           longitude = c(lng_dbl))
+  lat_lon_df <- data.frame(
+    id = c("my_building"),
+    latitude = c(lat_dbl),
+    longitude = c(lng_dbl)
+  )
 
   # get nearest station with average temp data
   nearby_stations <- rnoaa::meteo_nearby_stations(
     lat_lon_df = lat_lon_df,
     station_data = station_data,
-    limit=1,
-    var=c("TAVG")
+    limit = 1,
+    var = c("TAVG")
   )
 
   # get MONTHLY temp data from station
   weather_result <- rnoaa::ncdc(
-    datasetid='GSOM',
-    stationid=sprintf('GHCND:%s', nearby_stations$my_building$id),
-    datatypeid='TAVG',
+    datasetid = "GSOM",
+    stationid = sprintf("GHCND:%s", nearby_stations$my_building$id),
+    datatypeid = "TAVG",
     # messy solution, but ensures that we get data before our start time
-    startdate =  strftime(ts_start - (60 * 60 * 24 * 31) , "%Y-%m-%dT%H:%M:%S"),
+    startdate = strftime(ts_start - (60 * 60 * 24 * 31), "%Y-%m-%dT%H:%M:%S"),
     enddate = ts_end,
-    add_units=TRUE,
+    add_units = TRUE,
   )
 
   weather_data <- weather_result$data
-  temp_matrix <- matrix(ncol=2, nrow=n_samples)
+  temp_matrix <- matrix(ncol = 2, nrow = n_samples)
   for (row in 1:n_samples) {
     # find the weather row with a date closest to our current eload time
     # this is not the correct way to do this, but good enough for now
     # it would seem the nmecr package should do this for us, but it didn't sometimes...
     date_diffs <- abs(as.POSIXct(weather_data$date) - ts_df[[row, "time"]])
-    closest_row <- weather_data[which.min(date_diffs),]
+    closest_row <- weather_data[which.min(date_diffs), ]
     row_date <- ts_df[[row, "time"]]
     row_units <- closest_row$units
     if (row_units == "celsius") {
@@ -233,34 +235,36 @@ bs_parse_nmecr_df <- function(tree, insert_weather_data=FALSE) {
     } else {
       stop(sprintf("Invalid unit type: %s", row_units))
     }
-    temp_matrix[row,] <- c(row_date, row_temp)
+    temp_matrix[row, ] <- c(row_date, row_temp)
   }
   temp_df <- data.frame(temp_matrix)
   colnames(temp_df) <- c("time", "temp")
 
   # fix data types
   temp_df[, "temp"] <- as.double(temp_df[, "temp"])
-  temp_df[, "time"] <- as.POSIXct.numeric(temp_df[, "time"], origin=lubridate::origin)
+  temp_df[, "time"] <- as.POSIXct.numeric(temp_df[, "time"], origin = lubridate::origin)
 
   if (insert_weather_data == TRUE) {
     ts_data_elem <- xml2::xml_find_first(tree, '//auc:Scenario[auc:ResourceUses/auc:ResourceUse/auc:EnergyResource/text() = "Electricity"]/auc:TimeSeriesData')
     for (row in 1:n_samples) {
       ts_data_elem %>%
         xml2::xml_add_child("auc:TimeSeries", "ID" = generate_id("TimeSeries")) %>%
-          xml2::xml_add_child("auc:TimeSeriesReadingQuantity", "Dry Bulb Temperature") %>%
-          xml2::xml_add_sibling("auc:StartTimestamp", strftime(temp_df[[row, "time"]] , "%Y-%m-%dT%H:%M:%S")) %>%
-          xml2::xml_add_sibling("auc:IntervalFrequency", "Month") %>%
-          xml2::xml_add_sibling("auc:IntervalReading", temp_df[[row, "temp"]])
+        xml2::xml_add_child("auc:TimeSeriesReadingQuantity", "Dry Bulb Temperature") %>%
+        xml2::xml_add_sibling("auc:StartTimestamp", strftime(temp_df[[row, "time"]], "%Y-%m-%dT%H:%M:%S")) %>%
+        xml2::xml_add_sibling("auc:IntervalFrequency", "Month") %>%
+        xml2::xml_add_sibling("auc:IntervalReading", temp_df[[row, "temp"]])
     }
   }
 
   data_int <- "Monthly"
-  return(nmecr::create_dataframe(eload_data = ts_df,
-                                  temp_data = temp_df,
-                                  start_date = format(ts_start, format="%m/%d/%y %H:%M"),
-                                  end_date = format(ts_end, format="%m/%d/%y %H:%M"),
-                                  convert_to_data_interval = data_int,
-                                  temp_balancepoint = 65))
+  return(nmecr::create_dataframe(
+    eload_data = ts_df,
+    temp_data = temp_df,
+    start_date = format(ts_start, format = "%m/%d/%y %H:%M"),
+    end_date = format(ts_end, format = "%m/%d/%y %H:%M"),
+    convert_to_data_interval = data_int,
+    temp_balancepoint = 65
+  ))
 }
 
 #' Add inputs, parameters, and performance statistics to a auc:DerivedModel
@@ -277,13 +281,12 @@ bs_parse_nmecr_df <- function(tree, insert_weather_data=FALSE) {
 #' @return x An xml_node
 #' @export
 bs_gen_dm_nmecr <- function(nmecr_baseline_model, x,
-                                 normalization_method = "Forecast",
-                                 response_variable = "Electricity",
-                                 response_variable_units = "kWh",
-                                 response_variable_end_use = "All end uses",
-                                 explanatory_variable_name = "Drybulb Temperature",
-                                 explanatory_variable_units = "Fahrenheit, F") {
-
+                            normalization_method = "Forecast",
+                            response_variable = "Electricity",
+                            response_variable_units = "kWh",
+                            response_variable_end_use = "All end uses",
+                            explanatory_variable_name = "Drybulb Temperature",
+                            explanatory_variable_units = "Fahrenheit, F") {
   # Extract the necessary nodes to manipulate
   dm_start <- xml2::xml_find_first(x, "//auc:Models/auc:Model/auc:StartTimestamp")
   dm_end <- xml2::xml_find_first(x, "//auc:Models/auc:Model/auc:EndTimestamp")
@@ -310,8 +313,8 @@ bs_gen_dm_nmecr <- function(nmecr_baseline_model, x,
   # Extract desired concepts from nmecr model
   model_int <- nmecr_baseline_model$model_input_options$chosen_modeling_interval
   model_type <- nmecr_baseline_model$model_input_options$regression_type
-  model_start_dt <- head(nmecr_baseline_model$training_data$time, n=1)
-  model_end_dt <- tail(nmecr_baseline_model$training_data$time, n=1)
+  model_start_dt <- head(nmecr_baseline_model$training_data$time, n = 1)
+  model_end_dt <- tail(nmecr_baseline_model$training_data$time, n = 1)
 
   # Map above to correct BSync representation
   bsync_interval <- nmecr_to_bsync[[model_int]]
@@ -347,51 +350,54 @@ bs_gen_dm_nmecr <- function(nmecr_baseline_model, x,
   bsync_beta2 <- NULL
   bsync_beta3 <- NULL
   # TODO: find a better way of catching cases where we failed to fit the model
-  tryCatch({
-    if (bsync_model_type == "2 parameter simple linear regression") {
-      bsync_intercept <- coeffs[["(Intercept)"]]
-      bsync_beta1 <- coeffs[["temp"]]
-    } else if (bsync_model_type == "3 parameter heating change point model" || bsync_model_type == "3 parameter cooling change point model") {
-      bsync_intercept <- coeffs[["(Intercept)"]]
-      bsync_beta1 <- coeffs[["U1.independent_variable"]]
-      # psi[2] contains the estimated change point
-      bsync_beta2 <- nmecr_baseline_model$model$psi[2]
-
-      # for current nmecr implementation, the sign for beta 1 and 2 is flipped for
-      # the heating models, which we account for here
-      if (grepl('heating', bsync_model_type)) {
-        bsync_beta1 <- -1 * bsync_beta1
-        bsync_beta2 <- -1 * bsync_beta2
+  tryCatch(
+    {
+      if (bsync_model_type == "2 parameter simple linear regression") {
+        bsync_intercept <- coeffs[["(Intercept)"]]
+        bsync_beta1 <- coeffs[["temp"]]
+      } else if (bsync_model_type == "3 parameter heating change point model" || bsync_model_type == "3 parameter cooling change point model") {
+        bsync_intercept <- coeffs[["(Intercept)"]]
+        bsync_beta1 <- coeffs[["U1.independent_variable"]]
+        # psi[2] contains the estimated change point
+        bsync_beta2 <- nmecr_baseline_model$model$psi[2]
+
+        # for current nmecr implementation, the sign for beta 1 and 2 is flipped for
+        # the heating models, which we account for here
+        if (grepl("heating", bsync_model_type)) {
+          bsync_beta1 <- -1 * bsync_beta1
+          bsync_beta2 <- -1 * bsync_beta2
+        }
+      } else if (bsync_model_type == "4 parameter change point model") {
+        # to get the intercept `C` according to ASHRAE Guideline 14-2014, Figure D-1
+        # we must predict the eload at the estimated temperature change point
+        temp_change_point <- nmecr_baseline_model$model$psi[2]
+        predictions <- calculate_model_predictions(
+          training_data = nmecr_baseline_model$training_data,
+          prediction_data = as.data.frame(list(time = c(2019 - 01 - 01), temp = c(temp_change_point))),
+          modeled_object = nmecr_baseline_model
+        )
+        bsync_intercept <- predictions$predictions[1]
+        bsync_beta1 <- coeffs[["independent_variable"]]
+
+        # TODO: verify this is _always_ the wrong sign
+        # flip the sign b/c current nmecr implementation has it incorrectly set
+        bsync_beta2 <- -1 * coeffs[["U1.independent_variable"]]
+
+        bsync_beta3 <- temp_change_point
+      } else {
+        stop("Unhandled model type")
       }
-    } else if (bsync_model_type == "4 parameter change point model") {
-      # to get the intercept `C` according to ASHRAE Guideline 14-2014, Figure D-1
-      # we must predict the eload at the estimated temperature change point
-      temp_change_point <- nmecr_baseline_model$model$psi[2]
-      predictions <- calculate_model_predictions(
-        training_data=nmecr_baseline_model$training_data,
-        prediction_data=as.data.frame(list(time=c(2019-01-01), temp=c(temp_change_point))),
-        modeled_object=nmecr_baseline_model
-      )
-      bsync_intercept <- predictions$predictions[1]
-      bsync_beta1 <- coeffs[["independent_variable"]]
-
-      # TODO: verify this is _always_ the wrong sign
-      # flip the sign b/c current nmecr implementation has it incorrectly set
-      bsync_beta2 <- -1 * coeffs[["U1.independent_variable"]]
-
-      bsync_beta3 <- temp_change_point
-    } else {
-      stop("Unhandled model type")
-    }
-  }, error = function(e) {
-    print(e)
-    # if we get a subscript out of bounds error, assume it's b/c we failed
-    # to fit the model and as a result we would be missing values inside of our result
-    if (e$message == "subscript out of bounds") {
-      stop('Failed to parse model for BuildingSync. This is most likely because the model failed to fit')
+    },
+    error = function(e) {
+      print(e)
+      # if we get a subscript out of bounds error, assume it's b/c we failed
+      # to fit the model and as a result we would be missing values inside of our result
+      if (e$message == "subscript out of bounds") {
+        stop("Failed to parse model for BuildingSync. This is most likely because the model failed to fit")
+      }
+      stop(e$message)
     }
-    stop(e$message)
-  })
+  )
 
   dm_params <- dm_coeff %>% xml2::xml_add_child("auc:Guideline14Model")
   dm_params %>% xml2::xml_add_child("auc:ModelType", bsync_model_type)
@@ -418,8 +424,8 @@ bs_gen_dm_nmecr <- function(nmecr_baseline_model, x,
   dm_perf %>%
     xml2::xml_add_child("auc:RSquared", format(perf[["R_squared"]], scientific = FALSE)) %>%
     xml2::xml_add_sibling("auc:CVRMSE", format(max(perf[["CVRMSE %"]], 0), scientific = FALSE)) %>%
-    xml2::xml_add_sibling("auc:NDBE", format(round(max(as.numeric(perf[["NDBE %"]]), 0), 2), scientific = FALSE, nsmall=2)) %>%
-    xml2::xml_add_sibling("auc:NMBE", format(round(max(as.numeric(perf[["NMBE %"]]), 0), 2), scientific = FALSE, nsmall=2))
+    xml2::xml_add_sibling("auc:NDBE", format(round(max(as.numeric(perf[["NDBE %"]]), 0), 2), scientific = FALSE, nsmall = 2)) %>%
+    xml2::xml_add_sibling("auc:NMBE", format(round(max(as.numeric(perf[["NMBE %"]]), 0), 2), scientific = FALSE, nsmall = 2))
 
   return(x)
 }