Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .lintr
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Ensure all lines have proper tags and remove unmatched parentheses
linters: with_defaults(
line_length_linter = line_length_linter(120)
)
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# Generated by roxygen2: do not edit by hand

export(bs_add_scenario_type)
export(bs_gen_dm_nmecr)
export(bs_gen_root_doc)
Expand Down
51 changes: 18 additions & 33 deletions R/bsync_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@

library("rnoaa")
library("lubridate")

# source file in same directory for weather_data class
# source("weather_data_fetcher.R")

# closure for generate_id so we can store static var "count"
make.f <- function() {
count <- 0
Expand Down Expand Up @@ -159,11 +163,12 @@ bs_stub_derived_model <- function(x,
#' Generate a data frame usable by nmecr for modeling
#'
#' @param tree XML tree to parse

#' @param insert_weather_data Boolean indicating whether to insert weather data into the XML tree
#'
#' @return x Data frame for use with nmecr's model_with_* functions
#' @export
bs_parse_nmecr_df <- function(tree, insert_weather_data = FALSE) {
# Extract latitude and longitude from the XML tree
lat_str <- xml2::xml_text(xml2::xml_find_first(tree, "//auc:Building/auc:Latitude"))
lng_str <- xml2::xml_text(xml2::xml_find_first(tree, "//auc:Building/auc:Longitude"))
lat_dbl <- as.double(lat_str)
Expand All @@ -172,7 +177,7 @@ bs_parse_nmecr_df <- function(tree, insert_weather_data = FALSE) {

ts_nodes <- xml2::xml_find_all(tree, sprintf("//auc:TimeSeries[auc:ResourceUseID/@IDref = '%s']", resource_use_id_str))

# construct the eload dataframe
# Construct the eload dataframe
n_samples <- length(ts_nodes)
ts_matrix <- matrix(ncol = 2, nrow = n_samples)
for (i in 1:n_samples) {
Expand All @@ -184,46 +189,26 @@ bs_parse_nmecr_df <- function(tree, insert_weather_data = FALSE) {
ts_df <- data.frame(ts_matrix)
colnames(ts_df) <- c("time", "eload")

# fix data types
# Fix data types
ts_df[, "eload"] <- as.double(ts_df[, "eload"])
ts_df[, "time"] <- as.POSIXct(ts_df[, "time"])

ts_start <- min(ts_df$time)
ts_end <- max(ts_df$time)

# handle weather
station_data <- rnoaa::ghcnd_stations() # Takes a while to run
lat_lon_df <- data.frame(
id = c("my_building"),
latitude = c(lat_dbl),
longitude = c(lng_dbl)
)

# get nearest station with average temp data
nearby_stations <- rnoaa::meteo_nearby_stations(
lat_lon_df = lat_lon_df,
station_data = station_data,
limit = 1,
var = c("TAVG")
)

# get MONTHLY temp data from station
weather_result <- rnoaa::ncdc(
datasetid = "GSOM",
stationid = sprintf("GHCND:%s", nearby_stations$my_building$id),
datatypeid = "TAVG",
# messy solution, but ensures that we get data before our start time
startdate = strftime(ts_start - (60 * 60 * 24 * 31), "%Y-%m-%dT%H:%M:%S"),
enddate = ts_end,
add_units = TRUE,
# Use the weather_data_fetcher class to get weather data
fetcher <- weather_data_fetcher$new(
station_id = NULL,
ts_start = ts_start,
ts_end = ts_end
)
fetcher$get_nearest_station(lat = lat_dbl, long = lng_dbl)
weather_data <- fetcher$get_weather_data()

weather_data <- weather_result$data
# Match weather data to time series data
temp_matrix <- matrix(ncol = 2, nrow = n_samples)
for (row in 1:n_samples) {
# find the weather row with a date closest to our current eload time
# this is not the correct way to do this, but good enough for now
# it would seem the nmecr package should do this for us, but it didn't sometimes...
# Find the weather row with a date closest to our current eload time
date_diffs <- abs(as.POSIXct(weather_data$date) - ts_df[[row, "time"]])
closest_row <- weather_data[which.min(date_diffs), ]
row_date <- ts_df[[row, "time"]]
Expand All @@ -240,7 +225,7 @@ bs_parse_nmecr_df <- function(tree, insert_weather_data = FALSE) {
temp_df <- data.frame(temp_matrix)
colnames(temp_df) <- c("time", "temp")

# fix data types
# Fix data types
temp_df[, "temp"] <- as.double(temp_df[, "temp"])
temp_df[, "time"] <- as.POSIXct.numeric(temp_df[, "time"], origin = lubridate::origin)

Expand Down
89 changes: 89 additions & 0 deletions R/weather_data_fetcher.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# BuildingSync®, Copyright (c) Alliance for Sustainable Energy, LLC, and other contributors.
# See also https://github.com/BuildingSync/bsyncr/blob/main/LICENSE.txt

library(R6)
library(rnoaa)

weather_data_fetcher <- R6Class(
"weather_data_fetcher",
public = list(
station_id = NULL,
datatype_id = "TAVG", # Temperature Average
ts_start = NULL,
ts_end = NULL,
station_data = NULL,
weather_data = NULL,
initialize = function(station_id, ts_start, ts_end) {
self$station_id <- station_id
self$ts_start <- ts_start
self$ts_end <- ts_end

# Takes a while to run - but should be cached when initializing
# the this class for the first time.
self$station_data <- rnoaa::ghcnd_stations()
},
get_nearest_station = function(lat, long) {
lat_lon_df <- data.frame(
id = c("my_building"),
latitude = c(lat),
longitude = c(long)
)
nearby_stations <- rnoaa::meteo_nearby_stations(
lat_lon_df = lat_lon_df,
station_data = self$station_data,
limit = 1,
var = c("TAVG")
)
self$station_id <- nearby_stations$my_building$id
},
get_weather_data = function() {
# check if station_id is NULL
if (is.null(self$station_id)) {
stop("Station ID is NULL. Please set the station ID first or call get_nearest_station()")
}

# get MONTHLY temp data from station
weather_result <- rnoaa::ncdc(
datasetid = "GSOM",
stationid = sprintf("GHCND:%s", self$station_id),
datatypeid = "TAVG",
# messy solution, but ensures that we get data before our start time
startdate = strftime(self$ts_start - (60 * 60 * 24 * 31), "%Y-%m-%dT%H:%M:%S"),
enddate = self$ts_end,
add_units = TRUE
)

self$weather_data <- weather_result$data
return(self$weather_data)
},
to_df = function(n_samples = 12) {
# convert the weather data to a data frame

# check if weather_data is NULL
if (is.null(self$weather_data)) {
stop("Weather data is NULL. Please call get_weather_data() first.")
}
# check if weather_data is empty
if (length(self$weather_data) == 0) {
stop("Weather data is empty. Please check the station ID and date range.")
}
# check if weather_data has the expected structure
if (!all(c("date", "value", "units") %in% names(self$weather_data))) {
stop("Weather data does not have the expected structure.")
}

cat(paste("Weather data:", self$weather_data, "\n"))
temp_df <- data.frame(
date = self$weather_data$date,
temp = self$weather_data$value,
units = self$weather_data$units
)

# fix data types?
# temp_df[, "temp"] <- as.double(temp_df[, "temp"])
# temp_df[, "time"] <- as.POSIXct.numeric(temp_df[, "time"], origin = lubridate::origin)

return(temp_df)
}
)
)
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Rscript -e "testthat::test_dir('tests')"
- In RStudio, format all the R files by running the following commands in RStudio

```R
install.packages("styler")
install.packages("styler", repos = "http://cran.us.r-project.org")
styler::style_dir()
```

Expand Down
11 changes: 11 additions & 0 deletions cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,34 @@
"corymosiman",
"crul",
"dataframe",
"datasetid",
"datatypeid",
"dplyr",
"Drybulb",
"elec",
"eload",
"enddate",
"geonames",
"ggplot",
"GHCND",
"GSOM",
"hoardr",
"isdparser",
"lubridate",
"macintoshpie",
"meteo",
"ncdc",
"nllong",
"NMEC",
"nmecr",
"NOAA",
"noaakey",
"nolint",
"rappdirs",
"rnoaa",
"startdate",
"stationid",
"TAVG",
"testthat",
"tidyr"
],
Expand Down
85 changes: 83 additions & 2 deletions tests/bsyncr_example.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ library(ggplot2)


# Source the utility functions
source("./R/weather_data_fetcher.R")
source("./R/bsync_utils.R")

# get root path
Expand All @@ -42,7 +43,7 @@ options(noaakey = NOAA_TOKEN)
```{r}
# Path to the test file
# bsync path from root path
bsync_filepath <- file.path(root_path, "tests", "data", "ex_bsync.xml")
bsync_filepath <- file.path(root_path, "tests", "data", "ex_bsync_2.xml")


baseline_scenario_id <- "Scenario-bsyncr"
Expand All @@ -57,6 +58,85 @@ not_used <- sc_baseline %>% bsyncr::bs_stub_derived_model(
dm_period = "Baseline"
)

lat_str <- xml2::xml_text(xml2::xml_find_first(bsync_doc, "//auc:Building/auc:Latitude"))
lng_str <- xml2::xml_text(xml2::xml_find_first(bsync_doc, "//auc:Building/auc:Longitude"))
lat_dbl <- as.double(lat_str)
lng_dbl <- as.double(lng_str)
resource_use_id_str <- xml2::xml_attr(xml2::xml_find_first(bsync_doc, "//auc:ResourceUses/auc:ResourceUse[auc:EnergyResource = 'Electricity']"), "ID")

ts_nodes <- xml2::xml_find_all(bsync_doc, sprintf("//auc:TimeSeries[auc:ResourceUseID/@IDref = '%s']", resource_use_id_str))

n_samples <- length(ts_nodes)
ts_matrix <- matrix(ncol = 2, nrow = n_samples)
for (i in 1:n_samples) {
ts_node <- ts_nodes[i]
start_timestamp <- xml2::xml_text(xml2::xml_find_first(ts_node, "auc:StartTimestamp"))
reading <- xml2::xml_text(xml2::xml_find_first(ts_node, "auc:IntervalReading"))
ts_matrix[i, ] <- c(start_timestamp, reading)
}
ts_df <- data.frame(ts_matrix)
colnames(ts_df) <- c("time", "eload")

# fix data types
ts_df[, "eload"] <- as.double(ts_df[, "eload"])
ts_df[, "time"] <- as.POSIXct(ts_df[, "time"])

ts_start <- min(ts_df$time)
ts_end <- max(ts_df$time)

# Use the weather_data_fetcher class to get weather data
fetcher <- weather_data_fetcher$new(
station_id = NULL,
ts_start = ts_start,
ts_end = ts_end
)
fetcher$get_nearest_station(lat = lat_dbl, long = lng_dbl)
weather_data <- fetcher$get_weather_data()

temp_matrix <- matrix(ncol = 2, nrow = n_samples)
for (row in 1:n_samples) {
# find the weather row with a date closest to our current eload time
# this is not the correct way to do this, but good enough for now
# it would seem the nmecr package should do this for us, but it didn't sometimes...
date_diffs <- abs(as.POSIXct(weather_data$date) - ts_df[[row, "time"]])
closest_row <- weather_data[which.min(date_diffs), ]
row_date <- ts_df[[row, "time"]]
row_units <- closest_row$units
if (row_units == "celsius") {
row_temp <- (closest_row$value * 9 / 5) + 32
} else if (row_units == "fahrenheit") {
row_temp <- closest_row$value
} else {
stop(sprintf("Invalid unit type: %s", row_units))
}
temp_matrix[row, ] <- c(row_date, row_temp)
}
temp_df <- data.frame(temp_matrix)
colnames(temp_df) <- c("time", "temp")

# fix data types
temp_df[, "temp"] <- as.double(temp_df[, "temp"])
temp_df[, "time"] <- as.POSIXct.numeric(temp_df[, "time"], origin = lubridate::origin)

ts_data_elem <- xml2::xml_find_first(bsync_doc, '//auc:Scenario[auc:ResourceUses/auc:ResourceUse/auc:EnergyResource/text() = "Electricity"]/auc:TimeSeriesData')
for (row in 1:n_samples) {
ts_data_elem %>%
xml2::xml_add_child("auc:TimeSeries", "ID" = generate_id("TimeSeries")) %>%
xml2::xml_add_child("auc:TimeSeriesReadingQuantity", "Dry Bulb Temperature") %>%
xml2::xml_add_sibling("auc:StartTimestamp", strftime(temp_df[[row, "time"]], "%Y-%m-%dT%H:%M:%S")) %>%
xml2::xml_add_sibling("auc:IntervalFrequency", "Month") %>%
xml2::xml_add_sibling("auc:IntervalReading", temp_df[[row, "temp"]])
}

data_int <- "Monthly"
new_df <- nmecr::create_dataframe(
eload_data = ts_df,
temp_data = temp_df,
start_date = format(ts_start, format = "%m/%d/%y"),
end_date = format(ts_end, format = "%m/%d/%y"),
convert_to_data_interval = data_int,
temp_balancepoint = 65
)
b_df <- bsyncr::bs_parse_nmecr_df(bsync_doc, insert_weather_data = TRUE)
```

Expand All @@ -74,7 +154,8 @@ print(model_df)
intercept <- model$model$coefficients[["(Intercept)"]] / 3.41214 # btu to kwh
# Model is in °C, so convert to F.
slope <- model$model$coefficients[["temp"]] * 9 / 5 # °C to °F

print(slope)
print(intercept)
ggplot2::ggplot(model_df, aes(x = temp, y = value)) +
geom_point(aes(color = variable), data = model_df[model_df$variable == "eload", ]) +
geom_line(aes(color = variable), data = model_df[model_df$variable == "model_fit", ]) +
Expand Down
Loading