diff --git a/.Rbuildignore b/.Rbuildignore index 3fd7a0d..22136d8 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,3 +3,5 @@ ^LICENSE\.md$ ^\.github$ ^README\.Rmd$ +^[\.]?air\.toml$ +^\.vscode$ diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..344f76e --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,5 @@ +{ + "recommendations": [ + "Posit.air-vscode" + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..f2d0b79 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "[r]": { + "editor.formatOnSave": true, + "editor.defaultFormatter": "Posit.air-vscode" + } +} diff --git a/DESCRIPTION b/DESCRIPTION index c51c6f7..e47f3af 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: jupycost Title: Monitor costs and usage of AWS-hosted 2i2c JupyterHubs -Version: 0.0.0.9000 +Version: 0.1.0 Authors@R: c( person("Andy", "Teucher", , "andy.teucher@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-7840-692X")), @@ -12,7 +12,7 @@ Description: Functions to query the AWS Cost Explorer API and Prometheus to License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 URL: https://github.com/Openscapes/jupycost BugReports: https://github.com/Openscapes/jupycost/issues Depends: diff --git a/NAMESPACE b/NAMESPACE index ba93175..542f5df 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,16 +1,26 @@ # Generated by roxygen2: do not edit by hand +S3method(format_prom_result,default) +S3method(format_prom_result,prom_instant) +S3method(format_prom_result,prom_range) export(aws_ce_palette) export(ce_categories) export(ce_to_df) -export(create_range_df) +export(dir_sizes) +export(format_prom_result) export(get_daily_usage_costs) export(get_daily_users) export(get_hourly_users) export(get_prometheus_labels) export(get_prometheus_metrics) +export(query_hub_names) export(query_prometheus_instant) export(query_prometheus_range) export(unsanitize_dir_names) +export(user_cpu_requests) +export(user_cpu_usage) +export(user_dir_snapshot) +export(user_mem_requests) +export(user_mem_usage) importFrom(rlang,":=") importFrom(rlang,.data) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..c413a82 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,21 @@ +# jupycost 0.1.0 + +## New Features + +* Added `user_dir_snapshot()` function for retrieving user directory information at a point in time + +* Added `dir_sizes()` function to query home directory sizes over time + +* Added functions to track user CPU and memory requests and usage + +* Added ability to query hub names and filter hubs by tags + +* Improved Prometheus query handling with S3 classes and methods for range and instant queries + +* Removed `create_range_df()` and use `format_prom_result()` to format results from the various prometheus query functions + +## Minor Improvements + +* Updated NASA hub names + +* Improved documentation diff --git a/R/cost-explorer.R b/R/cost-explorer.R index 138bc04..4c6e23b 100644 --- a/R/cost-explorer.R +++ b/R/cost-explorer.R @@ -118,6 +118,7 @@ ce_categories <- function(df, n_categories = 10, cost_col) { ) } + #' AWS Cost Explorer palette #' #' @param n number of categories @@ -142,3 +143,94 @@ aws_ce_palette <- function(n) { rev(pal[seq(1, n)]) } + +ce_service_map <- function() { + c( + "AWS Backup" = "backup", + "EC2 - Other" = "compute", + "Amazon Elastic Compute Cloud - Compute" = "compute", + "Amazon Elastic Container Service for Kubernetes" = "fixed", + "Amazon Elastic File System" = "home storage", + "Amazon Elastic Load Balancing" = "networking", + "Amazon Simple Storage Service" = "object storage", + "Amazon Virtual Private Cloud" = "networking" + ) +} + +ce_filter_attributable_costs <- function( + cluster = c("openscapeshub", "nmfs-openscapes") +) { + list( + # ref: https://github.com/2i2c-org/infrastructure/issues/4787#issue-2519110356 + # https://github.com/2i2c-org/infrastructure/blob/4c8fa0c264c592a50db2109a3cb9e7e540784af2/helm-charts/aws-ce-grafana-backend/mounted-files/const.py#L52 + Or = list( + list( + Tags = list( + Key = "alpha.eksctl.io/cluster-name", + Values = list(cluster), + MatchOptions = list("EQUALS") + ) + ), + list( + Tags = list( + Key = paste0("kubernetes.io/cluster/", cluster), + Values = list("owned"), + MatchOptions = list("EQUALS") + ) + ), + list( + Tags = list( + Key = "2i2c.org/cluster-name", + Values = list(cluster), + MatchOptions = list("EQUALS") + ) + ), + # FIXME: The inclusion of tags 2i2c:hub-name and 2i2c:node-purpose below + # in this filter is a patch to capture openscapes data from 1st + # July and up to 24th September 2024, and can be removed once + # that date range is considered irrelevant. + list( + Not = list( + Tags = list( + Key = "2i2c:hub-name", + MatchOptions = list("ABSENT") + ) + ) + ), + list( + Not = list( + Tags = list( + Key = "2i2c:node-purpose", + MatchOptions = list("ABSENT") + ) + ) + ) + ) + ) +} + +#' Get the names of the hubs from AWS Cost Explorer +#' +#' @param start_date A date or date-like object that can be coerced to a string. +#' @param end_date A date or date-like object that can be coerced to a string. +#' +#' @returns +#' A character vector of hub names, with missing or empty values replaced by `"support"`. +#' +#' @export +query_hub_names <- function(start_date, end_date) { + aws_ce_client <- sixtyfour::con_ce() + # ref: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ce/client/get_tags.html + response <- aws_ce_client$get_tags( + TimePeriod = list( + Start = as.character(start_date), + End = as.character(end_date) + ), + TagKey = "2i2c:hub-name" + ) + + hub_names <- response$Tags + hub_names[is.na(hub_names) | hub_names == ""] <- "support" + + hub_names +} diff --git a/R/cost-summaries.R b/R/cost-summaries.R index 7724830..46d467f 100644 --- a/R/cost-summaries.R +++ b/R/cost-summaries.R @@ -3,6 +3,8 @@ #' @param end_date A Date object representing the end of the desired date range. #' @param months_back Optional. A single integer specifying how many months back to query. #' @param cost_type The type of costs. "unblended" (default), "blended", or "all" +#' @param hub which hub (or "all") you want costs for +#' @param cluster which cluster ("openscapeshub" or "nmfs-openscapes") you want information for. Ensure that you are authenticated to the correct AWS account using the appropriate API keys. #' #' @returns #' A data frame of AWS usage costs. @@ -11,10 +13,16 @@ get_daily_usage_costs <- function( end_date = Sys.Date(), months_back = 6, - cost_type = c("unblended", "blended", "all") + cost_type = c("unblended", "blended", "all"), + hub = c("all", "prod", "staging", "workshop", "support"), + cluster = c("openscapeshub", "nmfs-openscapes") ) { end_date <- check_valid_date(end_date) + hub <- match.arg(hub) + + cluster = match.arg(cluster) + if (!rlang::is_integerish(months_back) || months_back > 12) { cli::cli_abort("{.arg months_back} must be an integer <= 12.") } @@ -26,21 +34,61 @@ get_daily_usage_costs <- function( unit = "month" ) + filter_list <- list( + And = list( + list( + Dimensions = list( + Key = "RECORD_TYPE", + Values = list("Usage") + ) + ), + # TODO: figure out why attributable costs aren't being filtered for shared and individual hubs (only for all) + # https://github.com/2i2c-org/jupyterhub-cost-monitoring/blob/main/src/jupyterhub_cost_monitoring/query_usage.py + ce_filter_attributable_costs(cluster) + ) + ) + + if (hub == "support") { + filter_list = list( + And = list( + filter_list[["And"]][[1]], + list( + Tags = list( + Key = "2i2c:hub-name", + MatchOptions = list("ABSENT") + ) + ) + ) + ) + } else if (hub != "all") { + filter_list = list( + And = list( + filter_list[["And"]][[1]], + list( + Tags = list( + Key = "2i2c:hub-name", + Values = list(hub), + MatchOptions = list("EQUALS") + ) + ) + ) + ) + } + raw_daily <- sixtyfour::aws_billing( as.character(start_date), as.character(end_date), - filter = list( - Dimensions = list( - Key = "RECORD_TYPE", - Values = "Usage" - ) - ) + filter = filter_list ) if (cost_type != "all") { raw_daily <- dplyr::filter(raw_daily, .data$id == cost_type) } - raw_daily |> + daily_ce <- raw_daily |> dplyr::mutate(date = lubridate::ymd(.data$date)) + + daily_ce$service_component <- ce_service_map()[daily_ce$service] + daily_ce$service_component[is.na(daily_ce$service_component)] <- "other" + daily_ce } diff --git a/R/prometheus-usage-summaries.R b/R/prometheus-usage-summaries.R index 2df75ba..f9f17d1 100644 --- a/R/prometheus-usage-summaries.R +++ b/R/prometheus-usage-summaries.R @@ -1,9 +1,9 @@ -#' Get daily users +#' Get daily user counts #' #' @inheritParams query_prometheus_range -#' @param step Time step in days (default `1`). #' @param aggregation time period over which to aggregate, in days (integer, #' default `1`). +#' @param step Time step in days (default `1`). #' @inheritParams query_prometheus_range #' #' @returns @@ -39,7 +39,7 @@ get_daily_users <- function( step = glue::glue(step * 24, "h0m0s") ) - create_range_df(res, "n_users") |> + format_prom_result(res, "n_users") |> dplyr::mutate(date = as.Date(date)) |> # Fill in zeros for missing dates tidyr::complete( @@ -52,8 +52,6 @@ get_daily_users <- function( #' Get hourly user counts #' #' @inheritParams query_prometheus_range -#' @param step Time step in seconds, or a string formatted as `"*h*m*s"` -#' Eg., Default 1 hour: `"1h0m0s"`. #' #' @returns #' A dataframe of hourly user counts, grouped by namespace. @@ -78,7 +76,7 @@ get_hourly_users <- function( end_time = end_time, step = step ) - create_range_df(res, "n_users") |> + format_prom_result(res, "n_users") |> dplyr::rename(date_time = date) |> # Fill in zeros for missing dates tidyr::complete( @@ -87,3 +85,337 @@ get_hourly_users <- function( fill = list(n_users = 0) ) } + +#' Get user directory information +#' +#' @inheritParams query_prometheus_instant +#' +#' @returns +#' A data frame of directory information: +#' - `namespace`: Hub Namespace (prod, staging, workshop) +#' - `directory`: User directory +#' - `last_accessed`: Date of last access +#' - `dirsize_mb`: Size of directory in MB +#' - `n_files`: Number of files +#' - `percent_total_size`: Percentage of total directory size +#' +#' @export +user_dir_snapshot <- function( + grafana_url = "https://grafana.openscapes.2i2c.cloud", + grafana_token = Sys.getenv("GRAFANA_TOKEN"), + time = Sys.time() +) { + last_accessed <- query_prometheus_instant( + grafana_url = grafana_url, + grafana_token = grafana_token, + query = "min(dirsize_latest_mtime) by (namespace, directory)", + time = time + ) |> + format_prom_result( + value_name = "last_accessed", + value_fn = prom_date + ) + + size <- query_prometheus_instant( + grafana_url = grafana_url, + grafana_token = grafana_token, + query = "max(dirsize_total_size_bytes) by (namespace, directory)", + time = time + ) |> + format_prom_result( + value_name = "dirsize_mb", + value_fn = \(x) as.numeric(x) * 1e-6 + ) + + n_files <- query_prometheus_instant( + grafana_url = grafana_url, + grafana_token = grafana_token, + query = "max(dirsize_entries_count) by (namespace, directory)", + time = time + ) |> + format_prom_result( + value_name = "n_files" + ) + + join_cols <- c("namespace", "directory", "date") + + last_accessed |> + dplyr::left_join(size, by = join_cols) |> + dplyr::left_join(n_files, by = join_cols) |> + dplyr::mutate( + directory = unsanitize_dir_names(.data$directory), + percent_total_size = .data$dirsize_mb / sum(.data$dirsize_mb) * 100, + .by = "namespace" + ) |> + dplyr::select( + "date", + "namespace", + "directory", + "last_accessed", + "n_files", + "dirsize_mb", + "percent_total_size" + ) +} + +#' Query directory sizes over time from Grafana +#' +#' @param by_user A logical value indicating whether to group by user (directory). Defau +#' @inheritParams query_prometheus_range +#' +#' @returns +#' A data frame of directory sizes over time, with dirctory sizes in megabytes. +#' +#' @export +dir_sizes <- function( + grafana_url = "https://grafana.openscapes.2i2c.cloud", + grafana_token = Sys.getenv("GRAFANA_TOKEN"), + start_time = end_time - 30, + end_time = Sys.Date(), + by_user = FALSE, + step = "1h0m0s" +) { + # dirsize_total_size_bytes is a metric calculated at the root user directory + # level (it doesn't calculate for subdirectories; + # https://github.com/yuvipanda/prometheus-dirsize-exporter/tree/main?tab=readme-ov-file#metrics-recorded), + # so if grouping by directory or select a single user, sum() will be equal to + # max(). But if not grouping by directory, if we want the total size of all + # user directories, we need to sum(). + if (by_user) { + query <- 'max(dirsize_total_size_bytes) by (namespace, directory)' + } else { + query <- 'sum(dirsize_total_size_bytes) by (namespace)' + } + + ret <- query_prometheus_range( + grafana_url = grafana_url, + grafana_token = grafana_token, + query = query, + start_time = start_time, + end_time = end_time, + step = step + ) |> + format_prom_result( + value_name = "dirsize_mb", + value_fn = \(x) as.numeric(x) * 1e-6 + ) + + if (by_user) { + ret <- ret |> + dplyr::mutate( + directory = unsanitize_dir_names(.data$directory) + ) + } + + ret +} + + +#' Query user memory requests from Grafana +#' +#' @description +#' Query user memory requests from Grafana. This gives the memory requests +#' by a user, in addition to the instance type and container image they are using, +#' by specified time step for a given time range. +#' +#' @inheritParams query_prometheus_range +#' +#' @returns +#' User memory request data. +#' +#' @export +user_mem_requests <- function( + grafana_url = "https://grafana.openscapes.2i2c.cloud", + grafana_token = Sys.getenv("GRAFANA_TOKEN"), + start_time = end_time - 30, + end_time = Sys.Date(), + step = "0h10m0s" +) { + ret <- query_prometheus_range( + grafana_url = grafana_url, + grafana_token = grafana_token, + query = resource_requests_query("memory"), + start_time = start_time, + end_time = end_time, + step = step + ) |> + format_prom_result( + value_name = "mem_mb", + value_fn = \(x) as.numeric(x) * 1e-6 + ) + + ret +} + +#' Query user CPU requests from Grafana +#' +#' @description +#' Query user cpu requests from Grafana. This gives the cpu requests +#' by a user, in addition to the instance type and container image they are using, +#' by specified time step for a given time range. +#' +#' @inheritParams query_prometheus_range +#' +#' @returns +#' User cpu request data. +#' +#' @export +user_cpu_requests <- function( + grafana_url = "https://grafana.openscapes.2i2c.cloud", + grafana_token = Sys.getenv("GRAFANA_TOKEN"), + start_time = end_time - 30, + end_time = Sys.Date(), + step = "0h10m0s" +) { + ret <- query_prometheus_range( + grafana_url = grafana_url, + grafana_token = grafana_token, + query = resource_requests_query("cpu"), + start_time = start_time, + end_time = end_time, + step = step + ) |> + format_prom_result( + value_name = "cpu_cores", + value_fn = \(x) as.numeric(x) + ) + + ret +} + +resource_requests_query <- function(resource) { + glue::glue( + 'sum( + kube_pod_container_resource_requests{resource="", pod=~"jupyter-.*"} + * on(node) group_left(label_beta_kubernetes_io_instance_type) + kube_node_labels +) by (namespace, pod, label_beta_kubernetes_io_instance_type, node) +* on(namespace, pod) group_left(image_id) +kube_pod_container_info{namespace=~".*", pod=~"jupyter-.*"}', + .open = "<", + .close = ">" + ) +} + +# Resource allocation is set here: +# https://github.com/2i2c-org/infrastructure/blob/bf1225f89162e525f58caa537b6181c27d9c941e/config/clusters/openscapes/common.values.yaml#L106-L172. +# AFAICT the mem_limit and mem_guarantee essentially dictate how many pods can fit in a node. cpu_limit is the upper cpu resources a user will get, +# depending on how many pods are running on a node, and how cpu intensive the workloads are. +# If you choose a Resource Allocation that has the highest memory for the CPU, then you will get a node to yourself... + +#' Query user memory usage from Grafana +#' +#' @description +#' Query user memory usage from Grafana. This gives the actual memory usage (in MB) +#' by a user, in addition to the user pod and hub namespace, +#' by specified time step for a given time range. +#' +#' @inheritParams query_prometheus_range +#' +#' @returns +#' A data frame containing pod memory usage. +#' +#' @export +user_mem_usage <- function( + grafana_url = "https://grafana.openscapes.2i2c.cloud", + grafana_token = Sys.getenv("GRAFANA_TOKEN"), + start_time = end_time - 30, + end_time = Sys.Date(), + step = "0h10m0s" +) { + ret <- query_prometheus_range( + grafana_url = grafana_url, + grafana_token = grafana_token, + query = 'sum( + # exclude name="" because the same container can be reported + # with both no name and `name=k8s_...`, + # in which case sum() by (pod) reports double the actual metric + container_memory_working_set_bytes{name!="", instance=~".*"} + * on (namespace, pod) group_left(container) + group( + kube_pod_labels{label_app="jupyterhub", label_component="singleuser-server", namespace=~".*", pod=~".*"} + ) by (pod, namespace) +) by (pod, namespace)', + start_time = start_time, + end_time = end_time, + step = step + ) + + res <- ret |> + format_prom_result( + value_name = "mem_mb", + value_fn = \(x) as.numeric(x) * 1e-6 + ) + + res +} + +#' Query user CPU usage from Grafana +#' +#' @description +#' Query user cpu usage from Grafana. This gives the actual cpu usage (in percentage) +#' by a user, in addition to the user pod and hub namespace, +#' by specified time step for a given time range. +#' +#' @inheritParams query_prometheus_range +#' +#' @returns +#' A data frame containing user CPU usage. +#' +#' @export +user_cpu_usage <- function( + grafana_url = "https://grafana.openscapes.2i2c.cloud", + grafana_token = Sys.getenv("GRAFANA_TOKEN"), + start_time = end_time - 30, + end_time = Sys.Date(), + step = "0h10m0s" +) { + ret <- query_prometheus_range( + grafana_url = grafana_url, + grafana_token = grafana_token, + query = 'sum( + # exclude name="" because the same container can be reported + # with both no name and `name=k8s_...`, + # in which case sum() by (pod) reports double the actual metric + irate(container_cpu_usage_seconds_total{name!="", instance=~".*"}[5m]) + * on (namespace, pod) group_left(container) + group( + kube_pod_labels{label_app="jupyterhub", label_component="singleuser-server", namespace=~".*", pod=~".*"} + ) by (pod, namespace) +) by (pod, namespace)', + start_time = start_time, + end_time = end_time, + step = step + ) + + res <- ret |> + format_prom_result( + value_name = "cpu_percent", + value_fn = \(x) as.numeric(x) * 1e-6 + ) + + res +} + + +resource_usage_query <- function(resource) { + sum_line <- switch( + resource, + "cpu" = 'irate(container_cpu_usage_seconds_total{name!="", instance=~".*", pod!="jupyter-deployment-service-check",pod=~"jupyter-.*"}[5m])', + "memory" = 'container_memory_working_set_bytes{name!="", instance=~".*", pod!="jupyter-deployment-service-check",pod=~"jupyter-.*"}' # "container_memory_usage_bytes" includes cache which may be misleading + ) + + paste0( + 'sum( + # exclude name="" because the same container can be reported + # with both no name and `name=k8s_...`, + # in which case sum() by (pod) reports double the actual metric + # TODO: Not irate for memory!', + sum_line, + '* on (namespace, pod) group_left(annotation_hub_jupyter_org_username) + group( + kube_pod_annotations{namespace=~".*", annotation_hub_jupyter_org_username=~".*"} + ) by (pod, namespace, annotation_hub_jupyter_org_username) +) by (annotation_hub_jupyter_org_username, namespace)' + ) +} diff --git a/R/prometheus.R b/R/prometheus.R index 30953f6..d00b0e5 100644 --- a/R/prometheus.R +++ b/R/prometheus.R @@ -49,7 +49,7 @@ get_prometheus_labels <- function( grafana_token = Sys.getenv("GRAFANA_TOKEN") ) { prometheus_uid <- get_default_prometheus_uid(grafana_url, grafana_token) - httr2::request(grafana_url) |> + resp <- httr2::request(grafana_url) |> httr2::req_url_path( "/api/datasources/proxy/uid", prometheus_uid, @@ -59,6 +59,8 @@ get_prometheus_labels <- function( httr2::req_perform() |> httr2::resp_check_status() |> httr2::resp_body_json(simplifyVector = TRUE, simplifyDataFrame = TRUE) + + resp$data } #' Get a data.frame of metrics available from Prometheus @@ -97,6 +99,10 @@ get_prometheus_metrics <- function( #' Query Prometheus for an instant in time #' #' @inheritParams query_prometheus_range +#' @param time Date or date-time object, or character of the form +#' "YYYY-MM-DD HH:MM:SS". Time components are optional. Default is current +#' time (`Sys.time()`). If a `POSIXt` object, it will be converted +#' to UTC, if a `Date` or `character` object, it will be assumed to be `UTC` #' #' @return List containing the response from Prometheus, in the #' [instant vector format](https://prometheus.io/docs/prometheus/latest/querying/api/#instant-vectors) @@ -110,10 +116,11 @@ get_prometheus_metrics <- function( query_prometheus_instant <- function( grafana_url = "https://grafana.openscapes.2i2c.cloud", grafana_token = Sys.getenv("GRAFANA_TOKEN"), - query + query, + time = Sys.time() ) { prometheus_uid <- get_default_prometheus_uid(grafana_url, grafana_token) - httr2::request(grafana_url) |> + resp <- httr2::request(grafana_url) |> httr2::req_url_path( "/api/datasources/proxy/uid", prometheus_uid, @@ -121,12 +128,13 @@ query_prometheus_instant <- function( ) |> httr2::req_options(http_version = 2) |> httr2::req_auth_bearer_token(grafana_token) |> - httr2::req_url_query( - query = query - ) |> + httr2::req_url_query(query = query, time = time_string(time)) |> httr2::req_perform() |> - httr2::resp_check_status() |> - httr2::resp_body_json(simplifyVector = TRUE) + httr2::resp_check_status() + + ret <- httr2::resp_body_json(resp, simplifyVector = TRUE) + + as.prom_instant(ret) } #' Query prometheus for a range of dates @@ -141,12 +149,14 @@ query_prometheus_instant <- function( #' ([Prometheus Query Language](https://prometheus.io/docs/prometheus/latest/querying/basics/)) #' @param start_time Start of time range to query. Date or date-time object, or #' character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. -#' Default is `end_time` - 30 days. +#' Default is `end_time` - 30 days. If a `POSIXt` object, it will be converted +#' to UTC, if a `Date` or `character` object, it will be assumed to be `UTC` #' @param end_time End of time range to query. Date or date-time object, or #' character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. -#' Default is today (`Sys.Date()`) +#' Default is today (`Sys.Date()`). If a `POSIXt` object, it will be converted +#' to UTC, if a `Date` or `character` object, it will be assumed to be `UTC` #' @param step Time step in seconds, or a string formatted as `"*h*m*s"` Eg., 1 -#' day would be `"24h0m0s"`. +#' day would be `"24h0m0s"`. Default is 1 hour () #' #' @return List containing the response from Prometheus, in the #' [range vector format](https://prometheus.io/docs/prometheus/latest/querying/api/#range-vectors) @@ -165,7 +175,7 @@ query_prometheus_range <- function( query, start_time = end_time - 30, end_time = Sys.Date(), - step + step = "1h0m0s" ) { prometheus_uid <- get_default_prometheus_uid(grafana_url, grafana_token) req <- httr2::request(grafana_url) |> @@ -186,8 +196,8 @@ query_prometheus_range <- function( httr2::req_auth_bearer_token(grafana_token) |> httr2::req_url_query( query = query, - start = format(as.POSIXct(start_time, tz = "UTC"), "%Y-%m-%dT%H:%M:%SZ"), - end = format(as.POSIXct(end_time, tz = "UTC"), "%Y-%m-%dT%H:%M:%SZ"), + start = time_string(start_time), + end = time_string(end_time), step = step ) @@ -195,14 +205,15 @@ query_prometheus_range <- function( httr2::req_perform() |> httr2::resp_check_status() - resp |> - httr2::resp_body_json(simplifyVector = TRUE, simplifyDataFrame = TRUE) + ret <- httr2::resp_body_json(resp, simplifyVector = TRUE) + as.prom_range(ret) } #' Create a data frame from a prometheus range query result #' -#' @param res A list containing data results; the result of running `query_prometheus_range()` +#' @param x A `prom_result` object; the result of running [query_prometheus_range()] or [query_prometheus_instant()] #' @param value_name A single string specifying the name for the value column. +#' @param value_fn A function to transform the value. Default [as.numeric()] #' #' @returns #' A data frame with columns for metrics, a UTC datetime column named 'date', @@ -216,11 +227,29 @@ query_prometheus_range <- function( #' end_time = "2024-05-28", #' step = 60 * 60 * 24 #' ) -#' create_range_df(range_res, "size (bytes)") +#' format_prom_result(range_res, "size (bytes)") #' @export -create_range_df <- function(res, value_name) { - metrics <- as.data.frame(res$data$result$metric) - vals <- res$data$result$values +format_prom_result <- function(x, value_name, value_fn = base::as.numeric) { + UseMethod("format_prom_result") +} + +#' @export +format_prom_result.default <- function( + x, + value_name, + value_fn = base::as.numeric +) { + cli::cli_abort("Unsupported type for {.fun format_prom_result}") +} + +#' @export +format_prom_result.prom_range <- function( + x, + value_name, + value_fn = base::as.numeric +) { + metrics <- as.data.frame(x$data$result$metric) + vals <- x$data$result$values out_df <- lapply(seq_along(vals), \(x) { vals <- as.data.frame(vals[[x]]) @@ -229,12 +258,48 @@ create_range_df <- function(res, value_name) { purrr::list_rbind() out_df |> + format_prom_df(value_name = value_name, value_fn = value_fn) +} + +#' @export +format_prom_result.prom_instant <- function( + x, + value_name, + value_fn = base::as.numeric +) { + metrics <- as.data.frame(x$data$result$metric) + vals <- as.data.frame(do.call(rbind, x$data$result$value)) + + cbind(metrics, vals) |> + format_prom_df(value_name = value_name, value_fn = value_fn) +} + +format_prom_df <- function(x, value_name, value_fn = base::as.numeric) { + if (!"V1" %in% names(x)) { + cli::cli_abort("Missing date column") + } + + if (!"V2" %in% names(x)) { + cli::cli_abort("Missing value column") + } + + x <- x |> dplyr::rename( date = "V1", "{value_name}" := "V2" - ) |> - dplyr::mutate( - date = as.POSIXct(as.numeric(date), origin = "1970-01-01", tz = "UTC"), - "{value_name}" := as.numeric(.data[[value_name]]) ) + + dplyr::mutate( + x, + date = prom_date(date), + "{value_name}" := value_fn(.data[[value_name]]) + ) +} + +as.prom_range <- function(x) { + structure(x, class = c("prom_range", "prom_result")) +} + +as.prom_instant <- function(x) { + structure(x, class = c("prom_instant", "prom_result")) } diff --git a/R/utils.R b/R/utils.R index cad2c6b..c8d8e98 100644 --- a/R/utils.R +++ b/R/utils.R @@ -96,3 +96,39 @@ set_env_vars <- function(org = c("nasa", "nmfs"), env = parent.frame()) { stats::setNames(were_vars_set & !empty_vars, names(env_vars)) } + + +#' Convert Prometheus numeric date to POSIXct +#' +#' @param x A numeric vector of unix timestamps. +#' +#' @returns +#' A POSIXct vector in UTC timezone. +#' +#' @noRd +prom_date <- function(x) { + as.POSIXct(as.numeric(x), origin = "1970-01-01", tz = "UTC") +} + +#' Format time as string in the format that prometheus expects +#' +#' @param x A Date, POSIXt time object or character string in UTC +#' +#' @returns +#' A string in ISO 8601 format with UTC timezone (e.g. "2024-01-01T12:00:00Z"). +#' +#' @noRd +time_string <- function( + x, + arg = rlang::caller_arg(x), + call = rlang::caller_env() +) { + if (!inherits(x, c("POSIXt", "character", "Date")) || length(x) != 1) { + cli::cli_abort( + "{.arg {arg}} must be a length 1 Date or POSIXt object, or character in a standard unambiguous date format", + arg = arg, + call = call + ) + } + format(as.POSIXct(x, tz = "UTC"), "%Y-%m-%dT%H:%M:%SZ") +} diff --git a/air.toml b/air.toml new file mode 100644 index 0000000..e69de29 diff --git a/inst/pricing.R b/inst/pricing.R new file mode 100644 index 0000000..81789c2 --- /dev/null +++ b/inst/pricing.R @@ -0,0 +1,46 @@ +library(readr) +library(dplyr) +library(paws) +library(duckplyr) + +svc <- pricing() +# Retrieves the service for the given Service Code. +svc$describe_services( + FormatVersion = "aws_v1", + MaxResults = 10L, + ServiceCode = "AmazonEC2" +) + +price_lists <- svc$list_price_lists( + ServiceCode = "AmazonEC2", + EffectiveDate = as.POSIXct("2025-03-17"), + CurrencyCode = "USD" +) + +arn <- Filter( + \(x) x$RegionCode == "us-west-2", + price_lists$PriceLists +)[[1]]$PriceListArn + +url <- svc$get_price_list_file_url( + PriceListArn = arn, + FileFormat = "csv" +)$Url + +download.file(url[[1]], "inst/ec2-prices.csv") + +db_exec("INSTALL httpfs") +db_exec("LOAD httpfs") + +ret <- read_csv_duckdb(url) |> + filter( + `Instance Type` == "r5.xlarge", + TermType == "OnDemand", + `Operating System` == "Linux", + CapacityStatus == "Used", + (`Pre Installed S/W` == "NA" | is.na(`Pre Installed S/W`)) + ) |> + collect() + +# Standard Kubernetes version support $0.10 per cluster per hour +# What does EBS cost? diff --git a/inst/scratch-users-over-time.R b/inst/scratch-users-over-time.R index 164391f..486ab15 100644 --- a/inst/scratch-users-over-time.R +++ b/inst/scratch-users-over-time.R @@ -13,12 +13,14 @@ dir_sizes <- query_prometheus_range( end_time = end_date, step = 60 * 60 * 24 ) |> - create_range_df(value_name = "size") |> + format_prom_result(value_name = "size") |> mutate( directory = unsanitize_dir_names(directory), size = size * 1e-9 ) |> - filter(!directory %in% c(".ipynb_checkpoints", "_shared")) |> + filter( + !directory %in% c(".ipynb_checkpoints", "_shared") + ) |> filter(namespace != "staging") n_users_over_time <- dir_sizes |> @@ -44,4 +46,17 @@ ever <- dir_sizes |> ggplot(n_users_over_time, aes(x = date, y = n_users)) + geom_line() -daily_users <- get_daily_users(start_time = start_date, end_time = end_date) +daily_users <- get_daily_users( + start_time = start_date, + end_time = end_date +) + +user_mem_requests( + start_time = as.POSIXct("2025-01-01 00:00:00"), + end_time = as.POSIXct("2025-01-10 00:00:00"), +) + +user_cpu_requests( + start_time = as.POSIXct("2025-01-01 00:00:00"), + end_time = as.POSIXct("2025-01-10 00:00:00") +) diff --git a/inst/scratch.R b/inst/scratch.R deleted file mode 100644 index 5985d95..0000000 --- a/inst/scratch.R +++ /dev/null @@ -1,19 +0,0 @@ -# 'sum( -# kube_pod_container_resource_requests{resource="memory", namespace=~"$hub", node=~"$instance"} -# ) by (pod, namespace)' - -Sys.setenv( - "GRAFANA_TOKEN" = Sys.getenv("NASA_GRAFANA_TOKEN"), - "AWS_ACCESS_KEY_ID" = Sys.getenv("NASA_AWS_ACCESS_KEY_ID"), - "AWS_SECRET_ACCESS_KEY" = Sys.getenv("NASA_AWS_SECRET_ACCESS_KEY"), - "AWS_REGION" = "us-east-1" -) - -query_prometheus_range( - query = 'sum( - kube_pod_container_resource_requests{resource="cpu", namespace="prod", instance=".*"} -) by (pod, namespace)', - start_time = "2024-01-01", - end_time = "2024-01-30", - step = 1 -) diff --git a/jupycost.Rproj b/jupycost.Rproj index aaa62a5..f1487cd 100644 --- a/jupycost.Rproj +++ b/jupycost.Rproj @@ -1,12 +1,18 @@ Version: 1.0 +ProjectId: ada89266-513e-4cc9-852a-a0006199802f RestoreWorkspace: No SaveWorkspace: No AlwaysSaveHistory: Default EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 Encoding: UTF-8 +RnwWeave: Sweave +LaTeX: pdfLaTeX + AutoAppendNewline: Yes StripTrailingWhitespace: Yes LineEndingConversion: Posix diff --git a/man/dir_sizes.Rd b/man/dir_sizes.Rd new file mode 100644 index 0000000..13a792c --- /dev/null +++ b/man/dir_sizes.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prometheus-usage-summaries.R +\name{dir_sizes} +\alias{dir_sizes} +\title{Query directory sizes over time from Grafana} +\usage{ +dir_sizes( + grafana_url = "https://grafana.openscapes.2i2c.cloud", + grafana_token = Sys.getenv("GRAFANA_TOKEN"), + start_time = end_time - 30, + end_time = Sys.Date(), + by_user = FALSE, + step = "1h0m0s" +) +} +\arguments{ +\item{grafana_url}{URL of the Grafana instance. Default +\verb{""https://grafana.openscapes.2i2c.cloud""}} + +\item{grafana_token}{Authentication token for Grafana. By default reads from +the environment variable \code{GRAFANA_TOKEN}} + +\item{start_time}{Start of time range to query. Date or date-time object, or +character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. +Default is \code{end_time} - 30 days. If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} + +\item{end_time}{End of time range to query. Date or date-time object, or +character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. +Default is today (\code{Sys.Date()}). If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} + +\item{by_user}{A logical value indicating whether to group by user (directory). Defau} + +\item{step}{Time step in seconds, or a string formatted as \code{"*h*m*s"} Eg., 1 +day would be \code{"24h0m0s"}. Default is 1 hour ()} +} +\value{ +A data frame of directory sizes over time, with dirctory sizes in megabytes. +} +\description{ +Query directory sizes over time from Grafana +} diff --git a/man/create_range_df.Rd b/man/format_prom_result.Rd similarity index 58% rename from man/create_range_df.Rd rename to man/format_prom_result.Rd index 5a65ef8..7e4fab7 100644 --- a/man/create_range_df.Rd +++ b/man/format_prom_result.Rd @@ -1,15 +1,17 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/prometheus.R -\name{create_range_df} -\alias{create_range_df} +\name{format_prom_result} +\alias{format_prom_result} \title{Create a data frame from a prometheus range query result} \usage{ -create_range_df(res, value_name) +format_prom_result(x, value_name, value_fn = base::as.numeric) } \arguments{ -\item{res}{A list containing data results; the result of running \code{query_prometheus_range()}} +\item{x}{A \code{prom_result} object; the result of running \code{\link[=query_prometheus_range]{query_prometheus_range()}} or \code{\link[=query_prometheus_instant]{query_prometheus_instant()}}} \item{value_name}{A single string specifying the name for the value column.} + +\item{value_fn}{A function to transform the value. Default \code{\link[=as.numeric]{as.numeric()}}} } \value{ A data frame with columns for metrics, a UTC datetime column named 'date', @@ -19,7 +21,7 @@ and a numeric value column named according to \code{value_name}. Create a data frame from a prometheus range query result } \examples{ -\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (FALSE) withAutoprint(\{ # examplesIf} range_res <- query_prometheus_range( query = "max(dirsize_total_size_bytes) by (directory, namespace)", @@ -27,6 +29,6 @@ range_res <- query_prometheus_range( end_time = "2024-05-28", step = 60 * 60 * 24 ) -create_range_df(range_res, "size (bytes)") +format_prom_result(range_res, "size (bytes)") \dontshow{\}) # examplesIf} } diff --git a/man/get_daily_usage_costs.Rd b/man/get_daily_usage_costs.Rd index 92c915b..edb5a86 100644 --- a/man/get_daily_usage_costs.Rd +++ b/man/get_daily_usage_costs.Rd @@ -7,7 +7,9 @@ get_daily_usage_costs( end_date = Sys.Date(), months_back = 6, - cost_type = c("unblended", "blended", "all") + cost_type = c("unblended", "blended", "all"), + hub = c("all", "prod", "staging", "workshop", "support"), + cluster = c("openscapeshub", "nmfs-openscapes") ) } \arguments{ @@ -16,6 +18,10 @@ get_daily_usage_costs( \item{months_back}{Optional. A single integer specifying how many months back to query.} \item{cost_type}{The type of costs. "unblended" (default), "blended", or "all"} + +\item{hub}{which hub (or "all") you want costs for} + +\item{cluster}{which cluster ("openscapeshub" or "nmfs-openscapes") you want information for. Ensure that you are authenticated to the correct AWS account using the appropriate API keys.} } \value{ A data frame of AWS usage costs. diff --git a/man/get_daily_users.Rd b/man/get_daily_users.Rd index 807b90f..310756f 100644 --- a/man/get_daily_users.Rd +++ b/man/get_daily_users.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/prometheus-usage-summaries.R \name{get_daily_users} \alias{get_daily_users} -\title{Get daily users} +\title{Get daily user counts} \usage{ get_daily_users( grafana_url = "https://grafana.openscapes.2i2c.cloud", @@ -22,11 +22,13 @@ the environment variable \code{GRAFANA_TOKEN}} \item{start_time}{Start of time range to query. Date or date-time object, or character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. -Default is \code{end_time} - 30 days.} +Default is \code{end_time} - 30 days. If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} \item{end_time}{End of time range to query. Date or date-time object, or character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. -Default is today (\code{Sys.Date()})} +Default is today (\code{Sys.Date()}). If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} \item{aggregation}{time period over which to aggregate, in days (integer, default \code{1}).} @@ -37,5 +39,5 @@ default \code{1}).} A data frame with daily user counts, grouped by namespace. } \description{ -Get daily users +Get daily user counts } diff --git a/man/get_hourly_users.Rd b/man/get_hourly_users.Rd index 2e3f26e..16da5ff 100644 --- a/man/get_hourly_users.Rd +++ b/man/get_hourly_users.Rd @@ -21,14 +21,16 @@ the environment variable \code{GRAFANA_TOKEN}} \item{start_time}{Start of time range to query. Date or date-time object, or character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. -Default is \code{end_time} - 30 days.} +Default is \code{end_time} - 30 days. If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} \item{end_time}{End of time range to query. Date or date-time object, or character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. -Default is today (\code{Sys.Date()})} +Default is today (\code{Sys.Date()}). If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} -\item{step}{Time step in seconds, or a string formatted as \code{"*h*m*s"} -Eg., Default 1 hour: \code{"1h0m0s"}.} +\item{step}{Time step in seconds, or a string formatted as \code{"*h*m*s"} Eg., 1 +day would be \code{"24h0m0s"}. Default is 1 hour ()} } \value{ A dataframe of hourly user counts, grouped by namespace. diff --git a/man/query_hub_names.Rd b/man/query_hub_names.Rd new file mode 100644 index 0000000..34952d9 --- /dev/null +++ b/man/query_hub_names.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cost-explorer.R +\name{query_hub_names} +\alias{query_hub_names} +\title{Get the names of the hubs from AWS Cost Explorer} +\usage{ +query_hub_names(start_date, end_date) +} +\arguments{ +\item{start_date}{A date or date-like object that can be coerced to a string.} + +\item{end_date}{A date or date-like object that can be coerced to a string.} +} +\value{ +A character vector of hub names, with missing or empty values replaced by \code{"support"}. +} +\description{ +Get the names of the hubs from AWS Cost Explorer +} diff --git a/man/query_prometheus_instant.Rd b/man/query_prometheus_instant.Rd index 0a7a813..9c3deac 100644 --- a/man/query_prometheus_instant.Rd +++ b/man/query_prometheus_instant.Rd @@ -7,7 +7,8 @@ query_prometheus_instant( grafana_url = "https://grafana.openscapes.2i2c.cloud", grafana_token = Sys.getenv("GRAFANA_TOKEN"), - query + query, + time = Sys.time() ) } \arguments{ @@ -19,6 +20,11 @@ the environment variable \code{GRAFANA_TOKEN}} \item{query}{Query in "PromQL" (\href{https://prometheus.io/docs/prometheus/latest/querying/basics/}{Prometheus Query Language})} + +\item{time}{Date or date-time object, or character of the form +"YYYY-MM-DD HH:MM:SS". Time components are optional. Default is current +time (\code{Sys.time()}). If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} } \value{ List containing the response from Prometheus, in the @@ -28,7 +34,7 @@ List containing the response from Prometheus, in the Query Prometheus for an instant in time } \examples{ -\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (FALSE) withAutoprint(\{ # examplesIf} current_size <- query_prometheus_instant( query = "max(dirsize_total_size_bytes) by (directory, namespace)" ) diff --git a/man/query_prometheus_range.Rd b/man/query_prometheus_range.Rd index f04eb7e..ddaab6d 100644 --- a/man/query_prometheus_range.Rd +++ b/man/query_prometheus_range.Rd @@ -10,7 +10,7 @@ query_prometheus_range( query, start_time = end_time - 30, end_time = Sys.Date(), - step + step = "1h0m0s" ) } \arguments{ @@ -25,14 +25,16 @@ the environment variable \code{GRAFANA_TOKEN}} \item{start_time}{Start of time range to query. Date or date-time object, or character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. -Default is \code{end_time} - 30 days.} +Default is \code{end_time} - 30 days. If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} \item{end_time}{End of time range to query. Date or date-time object, or character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. -Default is today (\code{Sys.Date()})} +Default is today (\code{Sys.Date()}). If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} \item{step}{Time step in seconds, or a string formatted as \code{"*h*m*s"} Eg., 1 -day would be \code{"24h0m0s"}.} +day would be \code{"24h0m0s"}. Default is 1 hour ()} } \value{ List containing the response from Prometheus, in the @@ -42,7 +44,7 @@ List containing the response from Prometheus, in the Adapted from https://hackmd.io/NllqOUfaTLCXcDQPipr4rg } \examples{ -\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (FALSE) withAutoprint(\{ # examplesIf} query_prometheus_range( query = "max(dirsize_total_size_bytes) by (directory, namespace)", start_time = "2024-01-01", diff --git a/man/user_cpu_requests.Rd b/man/user_cpu_requests.Rd new file mode 100644 index 0000000..45a4643 --- /dev/null +++ b/man/user_cpu_requests.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prometheus-usage-summaries.R +\name{user_cpu_requests} +\alias{user_cpu_requests} +\title{Query user CPU requests from Grafana} +\usage{ +user_cpu_requests( + grafana_url = "https://grafana.openscapes.2i2c.cloud", + grafana_token = Sys.getenv("GRAFANA_TOKEN"), + start_time = end_time - 30, + end_time = Sys.Date(), + step = "0h10m0s" +) +} +\arguments{ +\item{grafana_url}{URL of the Grafana instance. Default +\verb{""https://grafana.openscapes.2i2c.cloud""}} + +\item{grafana_token}{Authentication token for Grafana. By default reads from +the environment variable \code{GRAFANA_TOKEN}} + +\item{start_time}{Start of time range to query. Date or date-time object, or +character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. +Default is \code{end_time} - 30 days. If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} + +\item{end_time}{End of time range to query. Date or date-time object, or +character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. +Default is today (\code{Sys.Date()}). If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} + +\item{step}{Time step in seconds, or a string formatted as \code{"*h*m*s"} Eg., 1 +day would be \code{"24h0m0s"}. Default is 1 hour ()} +} +\value{ +User cpu request data. +} +\description{ +Query user cpu requests from Grafana. This gives the cpu requests +by a user, in addition to the instance type and container image they are using, +by specified time step for a given time range. +} diff --git a/man/user_cpu_usage.Rd b/man/user_cpu_usage.Rd new file mode 100644 index 0000000..4fc9650 --- /dev/null +++ b/man/user_cpu_usage.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prometheus-usage-summaries.R +\name{user_cpu_usage} +\alias{user_cpu_usage} +\title{Query user CPU usage from Grafana} +\usage{ +user_cpu_usage( + grafana_url = "https://grafana.openscapes.2i2c.cloud", + grafana_token = Sys.getenv("GRAFANA_TOKEN"), + start_time = end_time - 30, + end_time = Sys.Date(), + step = "0h10m0s" +) +} +\arguments{ +\item{grafana_url}{URL of the Grafana instance. Default +\verb{""https://grafana.openscapes.2i2c.cloud""}} + +\item{grafana_token}{Authentication token for Grafana. By default reads from +the environment variable \code{GRAFANA_TOKEN}} + +\item{start_time}{Start of time range to query. Date or date-time object, or +character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. +Default is \code{end_time} - 30 days. If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} + +\item{end_time}{End of time range to query. Date or date-time object, or +character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. +Default is today (\code{Sys.Date()}). If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} + +\item{step}{Time step in seconds, or a string formatted as \code{"*h*m*s"} Eg., 1 +day would be \code{"24h0m0s"}. Default is 1 hour ()} +} +\value{ +A data frame containing user CPU usage. +} +\description{ +Query user cpu usage from Grafana. This gives the actual cpu usage (in percentage) +by a user, in addition to the user pod and hub namespace, +by specified time step for a given time range. +} diff --git a/man/user_dir_snapshot.Rd b/man/user_dir_snapshot.Rd new file mode 100644 index 0000000..cf05d9a --- /dev/null +++ b/man/user_dir_snapshot.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prometheus-usage-summaries.R +\name{user_dir_snapshot} +\alias{user_dir_snapshot} +\title{Get user directory information} +\usage{ +user_dir_snapshot( + grafana_url = "https://grafana.openscapes.2i2c.cloud", + grafana_token = Sys.getenv("GRAFANA_TOKEN"), + time = Sys.time() +) +} +\arguments{ +\item{grafana_url}{URL of the Grafana instance. Default +\verb{""https://grafana.openscapes.2i2c.cloud""}} + +\item{grafana_token}{Authentication token for Grafana. By default reads from +the environment variable \code{GRAFANA_TOKEN}} + +\item{time}{Date or date-time object, or character of the form +"YYYY-MM-DD HH:MM:SS". Time components are optional. Default is current +time (\code{Sys.time()}). If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} +} +\value{ +A data frame of directory information: +\itemize{ +\item \code{namespace}: Hub Namespace (prod, staging, workshop) +\item \code{directory}: User directory +\item \code{last_accessed}: Date of last access +\item \code{dirsize_mb}: Size of directory in MB +\item \code{n_files}: Number of files +\item \code{percent_total_size}: Percentage of total directory size +} +} +\description{ +Get user directory information +} diff --git a/man/user_mem_requests.Rd b/man/user_mem_requests.Rd new file mode 100644 index 0000000..62900ef --- /dev/null +++ b/man/user_mem_requests.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prometheus-usage-summaries.R +\name{user_mem_requests} +\alias{user_mem_requests} +\title{Query user memory requests from Grafana} +\usage{ +user_mem_requests( + grafana_url = "https://grafana.openscapes.2i2c.cloud", + grafana_token = Sys.getenv("GRAFANA_TOKEN"), + start_time = end_time - 30, + end_time = Sys.Date(), + step = "0h10m0s" +) +} +\arguments{ +\item{grafana_url}{URL of the Grafana instance. Default +\verb{""https://grafana.openscapes.2i2c.cloud""}} + +\item{grafana_token}{Authentication token for Grafana. By default reads from +the environment variable \code{GRAFANA_TOKEN}} + +\item{start_time}{Start of time range to query. Date or date-time object, or +character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. +Default is \code{end_time} - 30 days. If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} + +\item{end_time}{End of time range to query. Date or date-time object, or +character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. +Default is today (\code{Sys.Date()}). If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} + +\item{step}{Time step in seconds, or a string formatted as \code{"*h*m*s"} Eg., 1 +day would be \code{"24h0m0s"}. Default is 1 hour ()} +} +\value{ +User memory request data. +} +\description{ +Query user memory requests from Grafana. This gives the memory requests +by a user, in addition to the instance type and container image they are using, +by specified time step for a given time range. +} diff --git a/man/user_mem_usage.Rd b/man/user_mem_usage.Rd new file mode 100644 index 0000000..ab0df5c --- /dev/null +++ b/man/user_mem_usage.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prometheus-usage-summaries.R +\name{user_mem_usage} +\alias{user_mem_usage} +\title{Query user memory usage from Grafana} +\usage{ +user_mem_usage( + grafana_url = "https://grafana.openscapes.2i2c.cloud", + grafana_token = Sys.getenv("GRAFANA_TOKEN"), + start_time = end_time - 30, + end_time = Sys.Date(), + step = "0h10m0s" +) +} +\arguments{ +\item{grafana_url}{URL of the Grafana instance. Default +\verb{""https://grafana.openscapes.2i2c.cloud""}} + +\item{grafana_token}{Authentication token for Grafana. By default reads from +the environment variable \code{GRAFANA_TOKEN}} + +\item{start_time}{Start of time range to query. Date or date-time object, or +character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. +Default is \code{end_time} - 30 days. If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} + +\item{end_time}{End of time range to query. Date or date-time object, or +character of the form "YYYY-MM-DD HH:MM:SS". Time components are optional. +Default is today (\code{Sys.Date()}). If a \code{POSIXt} object, it will be converted +to UTC, if a \code{Date} or \code{character} object, it will be assumed to be \code{UTC}} + +\item{step}{Time step in seconds, or a string formatted as \code{"*h*m*s"} Eg., 1 +day would be \code{"24h0m0s"}. Default is 1 hour ()} +} +\value{ +A data frame containing pod memory usage. +} +\description{ +Query user memory usage from Grafana. This gives the actual memory usage (in MB) +by a user, in addition to the user pod and hub namespace, +by specified time step for a given time range. +} diff --git a/tests/testthat/_snaps/cost-summaries.md b/tests/testthat/_snaps/cost-summaries.md index 957809d..095072d 100644 --- a/tests/testthat/_snaps/cost-summaries.md +++ b/tests/testthat/_snaps/cost-summaries.md @@ -30,3 +30,69 @@ Error in `match.arg()`: ! 'arg' should be one of "unblended", "blended", "all" +--- + + Code + get_daily_usage_costs(hub = "invalid") + Condition + Error in `match.arg()`: + ! 'arg' should be one of "all", "prod", "staging", "workshop", "support" + +--- + + Code + get_daily_usage_costs(cluster = "invalid") + Condition + Error in `match.arg()`: + ! 'arg' should be one of "openscapeshub", "nmfs-openscapes" + +# get_daily_usage_costs correctly combines cluster and hub filters + + Code + filter_str + Output + [1] "list(And = list(list(Dimensions = list(Key = \"RECORD_TYPE\", Values = list(" + [2] " \"Usage\"))), list(Tags = list(Key = \"2i2c:hub-name\", Values = list(" + [3] " \"prod\"), MatchOptions = list(\"EQUALS\")))))" + +# get_daily_usage_costs filter structure is correct for support hub + + Code + deparse(filter_capture) + Output + [1] "list(And = list(list(Dimensions = list(Key = \"RECORD_TYPE\", Values = list(" + [2] " \"Usage\"))), list(Tags = list(Key = \"2i2c:hub-name\", MatchOptions = list(" + [3] " \"ABSENT\")))))" + +# get_daily_usage_costs cluster filter includes all required tag keys + + Code + deparse(filter_capture) + Output + [1] "list(And = list(list(Dimensions = list(Key = \"RECORD_TYPE\", Values = list(" + [2] " \"Usage\"))), list(Or = list(list(Tags = list(Key = \"alpha.eksctl.io/cluster-name\", " + [3] " Values = list(\"openscapeshub\"), MatchOptions = list(\"EQUALS\"))), " + [4] " list(Tags = list(Key = \"kubernetes.io/cluster/openscapeshub\", " + [5] " Values = list(\"owned\"), MatchOptions = list(\"EQUALS\"))), " + [6] " list(Tags = list(Key = \"2i2c.org/cluster-name\", Values = list(" + [7] " \"openscapeshub\"), MatchOptions = list(\"EQUALS\"))), list(" + [8] " Not = list(Tags = list(Key = \"2i2c:hub-name\", MatchOptions = list(" + [9] " \"ABSENT\")))), list(Not = list(Tags = list(Key = \"2i2c:node-purpose\", " + [10] " MatchOptions = list(\"ABSENT\"))))))))" + +--- + + Code + deparse(filter_capture) + Output + [1] "list(And = list(list(Dimensions = list(Key = \"RECORD_TYPE\", Values = list(" + [2] " \"Usage\"))), list(Or = list(list(Tags = list(Key = \"alpha.eksctl.io/cluster-name\", " + [3] " Values = list(\"nmfs-openscapes\"), MatchOptions = list(\"EQUALS\"))), " + [4] " list(Tags = list(Key = \"kubernetes.io/cluster/nmfs-openscapes\", " + [5] " Values = list(\"owned\"), MatchOptions = list(\"EQUALS\"))), " + [6] " list(Tags = list(Key = \"2i2c.org/cluster-name\", Values = list(" + [7] " \"nmfs-openscapes\"), MatchOptions = list(\"EQUALS\"))), " + [8] " list(Not = list(Tags = list(Key = \"2i2c:hub-name\", MatchOptions = list(" + [9] " \"ABSENT\")))), list(Not = list(Tags = list(Key = \"2i2c:node-purpose\", " + [10] " MatchOptions = list(\"ABSENT\"))))))))" + diff --git a/tests/testthat/_snaps/utils.md b/tests/testthat/_snaps/utils.md index 95fccba..df0a2f8 100644 --- a/tests/testthat/_snaps/utils.md +++ b/tests/testthat/_snaps/utils.md @@ -46,3 +46,35 @@ Error: ! `my_date` must be a length 1 Date or POSIXt object, or character in a standard unambiguous date format +# time_string errors on invalid inputs + + Code + time_string("not a date") + Condition + Error in `as.POSIXlt.character()`: + ! character string is not in a standard unambiguous format + +--- + + Code + time_string(NULL) + Condition + Error: + ! `NULL` must be a length 1 Date or POSIXt object, or character in a standard unambiguous date format + +--- + + Code + time_string(NA) + Condition + Error: + ! `NA` must be a length 1 Date or POSIXt object, or character in a standard unambiguous date format + +--- + + Code + time_string(42) + Condition + Error: + ! `42` must be a length 1 Date or POSIXt object, or character in a standard unambiguous date format + diff --git a/tests/testthat/test-cost-summaries.R b/tests/testthat/test-cost-summaries.R index 58f6950..7d3a73d 100644 --- a/tests/testthat/test-cost-summaries.R +++ b/tests/testthat/test-cost-summaries.R @@ -18,6 +18,16 @@ test_that("get_daily_usage_costs input validation works", { error = TRUE, get_daily_usage_costs(cost_type = "invalid") ) + + expect_snapshot( + error = TRUE, + get_daily_usage_costs(hub = "invalid") + ) + + expect_snapshot( + error = TRUE, + get_daily_usage_costs(cluster = "invalid") + ) }) test_that("get_daily_usage_costs returns expected format", { @@ -74,7 +84,15 @@ test_that("get_daily_usage_costs() works for real with nasa env vars", { expect_s3_class(ret, "data.frame") expect_named( ret, - c("id", "date", "service", "linked_account", "cost", "acronym") + c( + "id", + "date", + "service", + "linked_account", + "cost", + "acronym", + "service_component" + ) ) expect_gt(nrow(ret), 0) }) @@ -87,7 +105,198 @@ test_that("get_daily_usage_costs() works for real with nmfs env vars", { expect_s3_class(ret, "data.frame") expect_named( ret, - c("id", "date", "service", "linked_account", "cost", "acronym") + c( + "id", + "date", + "service", + "linked_account", + "cost", + "acronym", + "service_component" + ) ) expect_gt(nrow(ret), 0) }) + +test_that("get_daily_usage_costs correctly combines cluster and hub filters", { + filter_capture <- NULL + mock_fn <- function(start_date, end_date, filter) { + filter_capture <<- filter + data.frame( + id = "unblended", + date = as.Date("2024-01-01"), + service = "Amazon EC2", + linked_account = "123", + cost = 10 + ) + } + + local_mocked_bindings( + "aws_billing" = mock_fn, + .package = "sixtyfour" + ) + + # Test that cluster filter and hub filter are both applied + get_daily_usage_costs(cluster = "nmfs-openscapes", hub = "prod") + + filter_str <- deparse(filter_capture) + + # Should have both cluster name and hub name in filter + expect_snapshot(filter_str) +}) + +test_that("get_daily_usage_costs filter structure is correct for support hub", { + filter_capture <- NULL + mock_fn <- function(start_date, end_date, filter) { + filter_capture <<- filter + data.frame( + id = "unblended", + date = as.Date("2024-01-01"), + service = "Amazon EC2", + linked_account = "123", + cost = 10 + ) + } + + local_mocked_bindings( + "aws_billing" = mock_fn, + .package = "sixtyfour" + ) + + # Test shared hub uses ABSENT match option + get_daily_usage_costs(hub = "support") + + expect_snapshot(deparse(filter_capture)) +}) + +test_that("get_daily_usage_costs cluster filter includes all required tag keys", { + filter_capture <- NULL + mock_fn <- function(start_date, end_date, filter) { + filter_capture <<- filter + data.frame( + id = "unblended", + date = as.Date("2024-01-01"), + service = "Amazon EC2", + linked_account = "123", + cost = 10 + ) + } + + local_mocked_bindings( + "aws_billing" = mock_fn, + .package = "sixtyfour" + ) + + # Test openscapes cluster + get_daily_usage_costs(cluster = "openscapeshub") + + expect_snapshot(deparse(filter_capture)) + + # Test nmfs-openscapes cluster + get_daily_usage_costs(cluster = "nmfs-openscapes") + + expect_snapshot(deparse(filter_capture)) +}) + +test_that("get_daily_usage_costs service_component mapping handles all known services", { + mock_data <- data.frame( + id = "unblended", + date = as.Date("2024-01-01"), + service = c( + "AWS Backup", + "EC2 - Other", + "Amazon Elastic Compute Cloud - Compute", + "Amazon Elastic Container Service for Kubernetes", + "Amazon Elastic File System", + "Amazon Elastic Load Balancing", + "Amazon Simple Storage Service", + "Amazon Virtual Private Cloud" + ), + linked_account = "123", + cost = seq(10, 80, 10) + ) + + local_mocked_bindings( + "aws_billing" = function(...) mock_data, + .package = "sixtyfour" + ) + + result <- get_daily_usage_costs() + + expect_equal( + result$service_component[result$service == "AWS Backup"], + "backup" + ) + expect_equal( + result$service_component[result$service == "EC2 - Other"], + "compute" + ) + expect_equal( + result$service_component[ + result$service == "Amazon Elastic Compute Cloud - Compute" + ], + "compute" + ) + expect_equal( + result$service_component[ + result$service == "Amazon Elastic Container Service for Kubernetes" + ], + "fixed" + ) + expect_equal( + result$service_component[result$service == "Amazon Elastic File System"], + "home storage" + ) + expect_equal( + result$service_component[result$service == "Amazon Elastic Load Balancing"], + "networking" + ) + expect_equal( + result$service_component[result$service == "Amazon Simple Storage Service"], + "object storage" + ) + expect_equal( + result$service_component[result$service == "Amazon Virtual Private Cloud"], + "networking" + ) +}) + +test_that("get_daily_usage_costs service_component defaults to 'other' for unmapped services", { + mock_data <- data.frame( + id = "unblended", + date = as.Date("2024-01-01"), + service = c("Unknown Service 1", "Random Service", "Mystery Service"), + linked_account = "123", + cost = c(5, 10, 15) + ) + + local_mocked_bindings( + "aws_billing" = function(...) mock_data, + .package = "sixtyfour" + ) + + result <- get_daily_usage_costs() + + expect_true(all(result$service_component == "other")) + expect_equal(nrow(result), 3) +}) + +test_that("get_daily_usage_costs includes service_component column in output", { + mock_data <- data.frame( + id = "unblended", + date = as.Date("2024-01-01"), + service = "Amazon EC2", + linked_account = "123", + cost = 10 + ) + + local_mocked_bindings( + "aws_billing" = function(...) mock_data, + .package = "sixtyfour" + ) + + result <- get_daily_usage_costs() + + expect_true("service_component" %in% names(result)) + expect_type(result$service_component, "character") +}) diff --git a/tests/testthat/test-prometheus-usage-summaries.R b/tests/testthat/test-prometheus-usage-summaries.R index 054841f..96847b2 100644 --- a/tests/testthat/test-prometheus-usage-summaries.R +++ b/tests/testthat/test-prometheus-usage-summaries.R @@ -53,3 +53,218 @@ test_that("get_hourly_users() works with nmfs", { expect_s3_class(ret, "data.frame") expect_named(ret, c("date_time", "namespace", "n_users")) }) + +test_that("user_dir_snapshot() works with nasa", { + set_env_vars("nasa") + skip_if_env_vars_not_set() + skip_if_offline() + + ret <- user_dir_snapshot() + + expect_s3_class(ret, "data.frame") + expect_named( + ret, + c( + "date", + "namespace", + "directory", + "last_accessed", + "n_files", + "dirsize_mb", + "percent_total_size" + ) + ) +}) + +test_that("user_dir_snapshot() works with nmfs", { + set_env_vars("nmfs") + skip_if_env_vars_not_set() + skip_if_offline() + + ret <- user_dir_snapshot( + grafana_url = "https://grafana.nmfs-openscapes.2i2c.cloud" + ) + + expect_s3_class(ret, "data.frame") + expect_named( + ret, + c( + "date", + "namespace", + "directory", + "last_accessed", + "n_files", + "dirsize_mb", + "percent_total_size" + ) + ) +}) + +test_that("user_dir_snapshot works with mocked responses", { + mock_date_response <- list( + data = list( + result = list( + metric = data.frame(namespace = "test", directory = "user-2ddir"), + value = list(c("1704067200", "123.45")) + ) + ) + ) + + mock_size_response <- list( + data = list( + result = list( + metric = data.frame(namespace = "test", directory = "user-2ddir"), + value = list(c("1704067200", "1000000")) + ) + ) + ) + + mock_files_response <- list( + data = list( + result = list( + metric = data.frame(namespace = "test", directory = "user-2ddir"), + value = list(c("1704067200", "100")) + ) + ) + ) + + local_mocked_bindings( + req_perform = function(...) structure(list(), class = "httr2_response"), + resp_body_json = function(...) { + parent <- parent.frame() + if (grepl("mtime", parent$query)) return(mock_date_response) + if (grepl("size_bytes", parent$query)) return(mock_size_response) + if (grepl("entries_count", parent$query)) return(mock_files_response) + }, + resp_check_status = function(x) x, + .package = "httr2" + ) + + local_mocked_bindings( + get_default_prometheus_uid = function(...) "foo" + ) + + result <- user_dir_snapshot(time = as.POSIXct("2024-01-01")) + + expect_s3_class(result, "data.frame") + expect_named( + result, + c( + "date", + "namespace", + "directory", + "last_accessed", + "n_files", + "dirsize_mb", + "percent_total_size" + ) + ) + expect_equal(result$namespace, "test") + expect_equal(result$directory, "user-dir") + expect_equal(result$n_files, 100) + expect_equal(result$dirsize_mb, 1) + expect_equal(result$percent_total_size, 100) +}) + +test_that("dir_sizes() works with nasa", { + set_env_vars("nasa") + skip_if_env_vars_not_set() + skip_if_offline() + + ret <- dir_sizes(start_time = "2025-01-01", end_time = "2025-01-10") + + expect_s3_class(ret, "data.frame") + expect_named(ret, c("namespace", "date", "dirsize_mb")) +}) + +test_that("dir_sizes() works with nmfs", { + set_env_vars("nmfs") + skip_if_env_vars_not_set() + skip_if_offline() + + ret <- dir_sizes( + grafana_url = "https://grafana.nmfs-openscapes.2i2c.cloud", + start_time = "2025-01-01", + end_time = "2025-01-10" + ) + + expect_s3_class(ret, "data.frame") + expect_named(ret, c("namespace", "date", "dirsize_mb")) +}) + +test_that("dir_sizes() works with by_user = TRUE", { + mock_response <- list( + data = list( + result = list( + metric = data.frame(namespace = "test", directory = "user-2ddir"), + values = list( + data.frame( + V1 = 1704067200, + V2 = "1000000" + ) + ) + ) + ) + ) + + local_mocked_bindings( + req_perform = function(...) structure(list(), class = "httr2_response"), + resp_body_json = function(...) mock_response, + resp_check_status = function(x) x, + .package = "httr2" + ) + + local_mocked_bindings( + get_default_prometheus_uid = function(...) "foo" + ) + + result <- dir_sizes( + by_user = TRUE, + start_time = as.POSIXct("2024-01-01"), + end_time = as.POSIXct("2024-01-01") + ) + + expect_s3_class(result, "data.frame") + expect_named(result, c("namespace", "directory", "date", "dirsize_mb")) + expect_equal(result$namespace, "test") + expect_equal(result$directory, "user-dir") + expect_equal(result$dirsize_mb, 1) +}) + +test_that("dir_sizes() works with by_user = FALSE", { + mock_response <- list( + data = list( + result = list( + metric = data.frame(namespace = "test"), + values = list( + data.frame( + V1 = 1704067200, + V2 = "1000000" + ) + ) + ) + ) + ) + + local_mocked_bindings( + req_perform = function(...) structure(list(), class = "httr2_response"), + resp_body_json = function(...) mock_response, + resp_check_status = function(x) x, + .package = "httr2" + ) + + local_mocked_bindings( + get_default_prometheus_uid = function(...) "foo" + ) + + result <- dir_sizes( + by_user = FALSE, + start_time = as.POSIXct("2024-01-01"), + end_time = as.POSIXct("2024-01-01") + ) + + expect_s3_class(result, "data.frame") + expect_named(result, c("namespace", "date", "dirsize_mb")) + expect_equal(result$namespace, "test") + expect_equal(result$dirsize_mb, 1) +}) diff --git a/tests/testthat/test-prometheus.R b/tests/testthat/test-prometheus.R index b418fde..58fbf5b 100644 --- a/tests/testthat/test-prometheus.R +++ b/tests/testthat/test-prometheus.R @@ -14,7 +14,7 @@ test_that("get_default_prometheus_uid works", { }) test_that("get_prometheus_labels works", { - mock_response <- c("label1", "label2") + mock_response <- list(data = c("label1", "label2")) local_mocked_bindings( req_perform = function(...) structure(list(), class = "httr2_response"), @@ -27,7 +27,7 @@ test_that("get_prometheus_labels works", { get_default_prometheus_uid = function(...) "foo" ) - expect_equal(get_prometheus_labels(), mock_response) + expect_equal(get_prometheus_labels(), mock_response$data) }) test_that("get_prometheus_metrics works", { @@ -57,7 +57,7 @@ test_that("get_prometheus_metrics works", { expect_equal(names(result), c("col1", "metric", "type", "help", "unit")) }) -test_that("create_range_df works with provided data", { +test_that("format_prom_result works with provided data", { input <- list( data = list( result = list( @@ -72,7 +72,7 @@ test_that("create_range_df works with provided data", { ) ) - result <- create_range_df(input, "test_value") + result <- format_prom_result(as.prom_range(input), "test_value") expect_equal(names(result), c("job", "date", "test_value")) expect_s3_class(result$date, "POSIXct") diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index fb65b78..a9442a3 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -29,3 +29,26 @@ test_that("check_valid_date errors informatively for invalid inputs", { test_that("check_valid_date includes argument name in error", { expect_snapshot(check_valid_date("not a date", arg = "my_date"), error = TRUE) }) + +test_that("time_string formats dates correctly for prometheus", { + test_datetime <- as.POSIXct("2024-01-01 12:34:56", tz = "UTC") + test_date <- as.Date("2024-01-01") + test_posixlt <- as.POSIXlt("2024-01-01 12:34:56", tz = "UTC") + + expect_equal(time_string(test_datetime), "2024-01-01T12:34:56Z") + expect_equal(time_string(test_date), "2024-01-01T00:00:00Z") + expect_equal(time_string(test_posixlt), "2024-01-01T12:34:56Z") + expect_equal(time_string("2024-01-01 12:34:56"), "2024-01-01T12:34:56Z") +}) + +test_that("time_string handles timezone conversion correctly", { + est_time <- as.POSIXct("2024-01-01 07:00:00", tz = "America/New_York") + expect_equal(time_string(est_time), "2024-01-01T12:00:00Z") +}) + +test_that("time_string errors on invalid inputs", { + expect_snapshot(error = TRUE, time_string("not a date")) + expect_snapshot(error = TRUE, time_string(NULL)) + expect_snapshot(error = TRUE, time_string(NA)) + expect_snapshot(error = TRUE, time_string(42)) +})