Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
536a5d4
Fix tests
ateucher Mar 5, 2025
01826b4
Format prometheus results for range and instant
ateucher Mar 5, 2025
4d1d9ca
Add time arg to instant prom queries
ateucher Mar 6, 2025
3ea256c
Start user_dir_info
ateucher Mar 6, 2025
baf76b0
Add more columns to user_dir_info()
ateucher Mar 6, 2025
fb438bd
document
ateucher Mar 6, 2025
7226be5
Use air
ateucher Mar 11, 2025
6b4093a
documentation
ateucher Mar 11, 2025
4334764
reorder columns, unsanitize directory names
ateucher Mar 11, 2025
7743c72
document
ateucher Mar 11, 2025
8a4418b
Add tests for usre_dir_info()
ateucher Mar 11, 2025
672402b
dir_sizes() to query home directory sizes over time
ateucher Mar 12, 2025
f50a64a
type checking with cli
ateucher Mar 12, 2025
7b905bc
change user_dir_info() -> user_dir_snapshot()
ateucher Mar 12, 2025
01b4fa0
update tests
ateucher Mar 12, 2025
d0ba5e2
document
ateucher Mar 12, 2025
0edd8ac
use .data
ateucher Mar 12, 2025
9bd8de4
Pass value_fn to format_prom_df
ateucher Mar 12, 2025
8945267
Tests for dir_sizes
ateucher Mar 12, 2025
d8ff3d2
Set default time step
ateucher Apr 7, 2025
5c60d08
Fix sum vs max when by_user = FALSE
ateucher Apr 7, 2025
585fd56
User CPU and memory requests and usage
ateucher Apr 17, 2025
9eb4841
document
ateucher Apr 21, 2025
bb4ab26
Add ability to filter hub by tag in ce
ateucher Apr 22, 2025
9148f88
return all pods in user_cpu and mem_requests
ateucher Apr 22, 2025
e3d287a
Add filtering and service mapping from 2i2c
ateucher Apr 22, 2025
944f768
document
ateucher Apr 22, 2025
e16fb65
Fix list construction for different clusters
ateucher May 8, 2025
a4737b2
scratch code
ateucher Oct 7, 2025
94eef49
Update NASA hub name
ateucher Oct 7, 2025
0616bd0
Change shared to support hub name, add function to query hub names
ateucher Oct 8, 2025
8a57758
tests
ateucher Oct 8, 2025
513ebe8
document
ateucher Oct 8, 2025
d521277
Add NEWS.md
ateucher Oct 8, 2025
3bf144e
Increment version number to 0.1.0
ateucher Oct 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
^LICENSE\.md$
^\.github$
^README\.Rmd$
^[\.]?air\.toml$
^\.vscode$
5 changes: 5 additions & 0 deletions .vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"recommendations": [
"Posit.air-vscode"
]
}
6 changes: 6 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"[r]": {
"editor.formatOnSave": true,
"editor.defaultFormatter": "Posit.air-vscode"
}
}
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: jupycost
Title: Monitor costs and usage of AWS-hosted 2i2c JupyterHubs
Version: 0.0.0.9000
Version: 0.1.0
Authors@R: c(
person("Andy", "Teucher", , "andy.teucher@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-7840-692X")),
Expand All @@ -12,7 +12,7 @@ Description: Functions to query the AWS Cost Explorer API and Prometheus to
License: MIT + file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
RoxygenNote: 7.3.3
URL: https://github.com/Openscapes/jupycost
BugReports: https://github.com/Openscapes/jupycost/issues
Depends:
Expand Down
12 changes: 11 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
# Generated by roxygen2: do not edit by hand

S3method(format_prom_result,default)
S3method(format_prom_result,prom_instant)
S3method(format_prom_result,prom_range)
export(aws_ce_palette)
export(ce_categories)
export(ce_to_df)
export(create_range_df)
export(dir_sizes)
export(format_prom_result)
export(get_daily_usage_costs)
export(get_daily_users)
export(get_hourly_users)
export(get_prometheus_labels)
export(get_prometheus_metrics)
export(query_hub_names)
export(query_prometheus_instant)
export(query_prometheus_range)
export(unsanitize_dir_names)
export(user_cpu_requests)
export(user_cpu_usage)
export(user_dir_snapshot)
export(user_mem_requests)
export(user_mem_usage)
importFrom(rlang,":=")
importFrom(rlang,.data)
21 changes: 21 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# jupycost 0.1.0

## New Features

* Added `user_dir_snapshot()` function for retrieving user directory information at a point in time

* Added `dir_sizes()` function to query home directory sizes over time

* Added functions to track user CPU and memory requests and usage

* Added ability to query hub names and filter hubs by tags

* Improved Prometheus query handling with S3 classes and methods for range and instant queries

* Removed `create_range_df()` and use `format_prom_result()` to format results from the various prometheus query functions

## Minor Improvements

* Updated NASA hub names

* Improved documentation
92 changes: 92 additions & 0 deletions R/cost-explorer.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ ce_categories <- function(df, n_categories = 10, cost_col) {
)
}


#' AWS Cost Explorer palette
#'
#' @param n number of categories
Expand All @@ -142,3 +143,94 @@ aws_ce_palette <- function(n) {

rev(pal[seq(1, n)])
}

ce_service_map <- function() {
c(
"AWS Backup" = "backup",
"EC2 - Other" = "compute",
"Amazon Elastic Compute Cloud - Compute" = "compute",
"Amazon Elastic Container Service for Kubernetes" = "fixed",
"Amazon Elastic File System" = "home storage",
"Amazon Elastic Load Balancing" = "networking",
"Amazon Simple Storage Service" = "object storage",
"Amazon Virtual Private Cloud" = "networking"
)
}

ce_filter_attributable_costs <- function(
cluster = c("openscapeshub", "nmfs-openscapes")
) {
list(
# ref: https://github.com/2i2c-org/infrastructure/issues/4787#issue-2519110356
# https://github.com/2i2c-org/infrastructure/blob/4c8fa0c264c592a50db2109a3cb9e7e540784af2/helm-charts/aws-ce-grafana-backend/mounted-files/const.py#L52
Or = list(
list(
Tags = list(
Key = "alpha.eksctl.io/cluster-name",
Values = list(cluster),
MatchOptions = list("EQUALS")
)
),
list(
Tags = list(
Key = paste0("kubernetes.io/cluster/", cluster),
Values = list("owned"),
MatchOptions = list("EQUALS")
)
),
list(
Tags = list(
Key = "2i2c.org/cluster-name",
Values = list(cluster),
MatchOptions = list("EQUALS")
)
),
# FIXME: The inclusion of tags 2i2c:hub-name and 2i2c:node-purpose below
# in this filter is a patch to capture openscapes data from 1st
# July and up to 24th September 2024, and can be removed once
# that date range is considered irrelevant.
list(
Not = list(
Tags = list(
Key = "2i2c:hub-name",
MatchOptions = list("ABSENT")
)
)
),
list(
Not = list(
Tags = list(
Key = "2i2c:node-purpose",
MatchOptions = list("ABSENT")
)
)
)
)
)
}

#' Get the names of the hubs from AWS Cost Explorer
#'
#' @param start_date A date or date-like object that can be coerced to a string.
#' @param end_date A date or date-like object that can be coerced to a string.
#'
#' @returns
#' A character vector of hub names, with missing or empty values replaced by `"support"`.
#'
#' @export
query_hub_names <- function(start_date, end_date) {
aws_ce_client <- sixtyfour::con_ce()
# ref: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ce/client/get_tags.html
response <- aws_ce_client$get_tags(
TimePeriod = list(
Start = as.character(start_date),
End = as.character(end_date)
),
TagKey = "2i2c:hub-name"
)

hub_names <- response$Tags
hub_names[is.na(hub_names) | hub_names == ""] <- "support"

hub_names
}
64 changes: 56 additions & 8 deletions R/cost-summaries.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#' @param end_date A Date object representing the end of the desired date range.
#' @param months_back Optional. A single integer specifying how many months back to query.
#' @param cost_type The type of costs. "unblended" (default), "blended", or "all"
#' @param hub which hub (or "all") you want costs for
#' @param cluster which cluster ("openscapeshub" or "nmfs-openscapes") you want information for. Ensure that you are authenticated to the correct AWS account using the appropriate API keys.
#'
#' @returns
#' A data frame of AWS usage costs.
Expand All @@ -11,10 +13,16 @@
get_daily_usage_costs <- function(
end_date = Sys.Date(),
months_back = 6,
cost_type = c("unblended", "blended", "all")
cost_type = c("unblended", "blended", "all"),
hub = c("all", "prod", "staging", "workshop", "support"),
cluster = c("openscapeshub", "nmfs-openscapes")
) {
end_date <- check_valid_date(end_date)

hub <- match.arg(hub)

cluster = match.arg(cluster)
Copy link

Copilot AI Oct 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assignment operator should be '<-' instead of '=' for consistency with R style guidelines.

Suggested change
cluster = match.arg(cluster)
cluster <- match.arg(cluster)

Copilot uses AI. Check for mistakes.

if (!rlang::is_integerish(months_back) || months_back > 12) {
cli::cli_abort("{.arg months_back} must be an integer <= 12.")
}
Expand All @@ -26,21 +34,61 @@ get_daily_usage_costs <- function(
unit = "month"
)

filter_list <- list(
And = list(
list(
Dimensions = list(
Key = "RECORD_TYPE",
Values = list("Usage")
)
),
# TODO: figure out why attributable costs aren't being filtered for shared and individual hubs (only for all)
# https://github.com/2i2c-org/jupyterhub-cost-monitoring/blob/main/src/jupyterhub_cost_monitoring/query_usage.py
ce_filter_attributable_costs(cluster)
)
)

if (hub == "support") {
filter_list = list(
Copy link

Copilot AI Oct 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assignment operator should be '<-' instead of '=' for consistency with R style guidelines.

Copilot uses AI. Check for mistakes.
And = list(
filter_list[["And"]][[1]],
list(
Tags = list(
Key = "2i2c:hub-name",
MatchOptions = list("ABSENT")
)
)
)
)
} else if (hub != "all") {
filter_list = list(
Copy link

Copilot AI Oct 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assignment operator should be '<-' instead of '=' for consistency with R style guidelines.

Suggested change
filter_list = list(
filter_list <- list(

Copilot uses AI. Check for mistakes.
And = list(
filter_list[["And"]][[1]],
list(
Tags = list(
Key = "2i2c:hub-name",
Values = list(hub),
MatchOptions = list("EQUALS")
)
)
)
)
}

raw_daily <- sixtyfour::aws_billing(
as.character(start_date),
as.character(end_date),
filter = list(
Dimensions = list(
Key = "RECORD_TYPE",
Values = "Usage"
)
)
filter = filter_list
)

if (cost_type != "all") {
raw_daily <- dplyr::filter(raw_daily, .data$id == cost_type)
}

raw_daily |>
daily_ce <- raw_daily |>
dplyr::mutate(date = lubridate::ymd(.data$date))

daily_ce$service_component <- ce_service_map()[daily_ce$service]
daily_ce$service_component[is.na(daily_ce$service_component)] <- "other"
daily_ce
}
Loading