Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@
/_site/
*.psd
.Rproj.user

# Local Netlify folder
.netlify
2 changes: 1 addition & 1 deletion _hub-usage-reporting/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Imports:
khroma,
sixtyfour,
ggtext,
jupycost
jupycost (>= 0.1.0)
Remotes:
openscapes/kyber,
openscapes/jupycost,
Expand Down
74 changes: 39 additions & 35 deletions _hub-usage-reporting/aws-usage-report-nasa.qmd
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
title: "NASA Openscapes 2i2c JupyterHub\nUsage and Costs"
params:
year_month: "2025-01"
year_month: "2025-02"
subtitle: "Monthly report for `r format(lubridate::ym(params$year_month), '%B %Y')`"
format: typst
---
Expand Down Expand Up @@ -105,21 +105,14 @@ the Hub, as well as a breakdown of costs by service each month.

# https://www.paws-r-sdk.com/docs/costexplorer_get_cost_and_usage/
# https://docs.aws.amazon.com/aws-cost-management/latest/APIReference/API_GetDimensionValues.html
all_daily_costs_six_months <- sixtyfour::aws_billing(
ceiling_date(end_date %m-% months(6), unit = "month"),
all_daily_costs_six_months <- get_daily_usage_costs(
end_date,
filter = list(
Dimensions = list(
Key = "RECORD_TYPE",
Values = "Usage"
)
)
months_back = 6,
cost_type = "unblended"
)

total_monthly_usage_costs <- all_daily_costs_six_months |>
filter(id == "unblended") |>
mutate(
date = ymd(date),
start_date = floor_date(date, unit = "month"),
end_date = ceiling_date(date, unit = "month")
) |>
Expand Down Expand Up @@ -202,18 +195,17 @@ is directly correlated with the costs for "Amazon Elastic File System" in the
previous chart.

```{r monthly-storage}
monthly_size <- query_prometheus_range(
query = "max(dirsize_total_size_bytes{namespace='prod'})",
monthly_size <- dir_sizes(
start_time = floor_date(end_date, unit = "months") %m-% months(5),
end_time = end_date,
step = 60 * 60 * 24
step = 60 * 60 * 24 # daily
) |>
create_range_df(value_name = "size") |>
mutate(size = size * 1e-9)
filter(namespace == "prod") |>
mutate(dirsize_gb = dirsize_mb * 1e-3)

monthly_size |>
ggplot() +
geom_line(aes(x = date, y = size)) +
geom_line(aes(x = date, y = dirsize_gb)) +
scale_x_datetime(date_breaks = "1 month", date_labels = "%B") +
labs(
title = "Total size of user home directories in AWS EFS in the main Hub",
Expand Down Expand Up @@ -247,17 +239,13 @@ the different y axis scales in each panel. The "prod" namespace panel is broken
out by the GitHub team by which they are granted access to the Hub (Long-Term Access and NASA Champions 2024).

```{r homedir-size-by-date}
size_by_date <- query_prometheus_range(
query = "max(dirsize_total_size_bytes) by (directory, namespace)",
start_time = start_date,
size_by_date <- dir_sizes(
start_time = floor_date(end_date, unit = "months") %m-% months(5),
end_time = end_date,
step = 60 * 60 * 24
by_user = TRUE,
step = 60 * 60 * 24 # daily
) |>
create_range_df(value_name = "size") |>
mutate(
directory = unsanitize_dir_names(directory),
size = size * 1e-9
)
mutate(dirsize_gb = dirsize_mb * 1e-3)

# list_teams("nasa-openscapes")
# list_teams("nasa-openscapes-workshops")
Expand Down Expand Up @@ -297,13 +285,28 @@ size_by_date_by_team <- size_by_date |>
) |>
mutate(
team = ifelse(namespace == "workshop", "workshop", team),
directory = fct_reorder(directory, desc(size), .fun = max, .desc = TRUE)
directory = fct_reorder(
directory,
desc(dirsize_gb),
.fun = max,
.desc = TRUE
)
)

all_dirs_sum_by_date <- size_by_date_by_team |>
filter(namespace %in% c("prod", "workshop")) |>
group_by(namespace, date, team) |>
summarize(total_size_gb = sum(size)) |>
summarize(total_size_gb = sum(dirsize_gb)) |>
# Add a dummy row - this is temporary to work around the bug at:
# https://github.com/tidyverse/ggplot2/issues/6680
bind_rows(
tibble::tibble(
namespace = "workshop",
date = start_date,
team = "Other",
total_size_gb = 0
)
) |>
mutate(
team = ifelse(is.na(team) & namespace == "prod", "Other", team),
team = fct_reorder(team, desc(total_size_gb), .fun = max, .desc = TRUE)
Expand All @@ -318,14 +321,13 @@ all_dirs_sum_by_date |>
facet_grid(vars(namespace), scales = "free_y") +
theme(legend.position = "bottom", legend.title.position = "top") +
paletteer::scale_fill_paletteer_d(
"ggpomological::pomological_palette",
breaks = setdiff(unique(all_dirs_sum_by_date$team), "workshop")
"ggpomological::pomological_palette"
) +
labs(
title = "Total size of user home directories by access team and Hub namespace",
x = "Date",
y = "Size (GB)",
fill = "GitHub Team (production hub only)"
fill = "GitHub Team"
)
```

Expand All @@ -340,7 +342,7 @@ of space. When we see disproportionate amount of space used, we reach out to use
```{r homedir-size-champions}
size_by_date_by_team |>
filter(team == "NASA Champions 2024") |>
ggplot(aes(x = date, y = size, fill = directory)) +
ggplot(aes(x = date, y = dirsize_gb, fill = directory)) +
geom_area() +
paletteer::scale_fill_paletteer_d(
"khroma::soil",
Expand Down Expand Up @@ -370,9 +372,11 @@ develop policies and recommendations for Hub compute usage.
# https://www.paws-r-sdk.com/docs/costexplorer_get_cost_and_usage/
# https://docs.aws.amazon.com/aws-cost-management/latest/APIReference/API_GetDimensionValues.html

# TODO: modify ce_to_df to deal with an arbitrary number of metrics so
# we can do this in one call with `Metrics = list("UnblendedCost", "UsageQuantity")
# and pass it to ce_to_df() once, rather than joining
# TODO: modify jupycost::get_daily_usage_costs() to allow grouping by instance type.
# Currently group_by is hardcoded to SERVICE and LINKED_ACCOUNT in sixtyfour::aws_billing(),
# and can only group by two variables, so would need to modify that function too.
# https://github.com/Openscapes/jupycost/issues/4

ec2_instance_type_costs_usage_res <- cost_explorer$get_cost_and_usage(
TimePeriod = list(Start = start_date, End = end_date),
Granularity = "DAILY",
Expand Down
54 changes: 26 additions & 28 deletions _hub-usage-reporting/aws-usage-report-noaa.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -105,21 +105,15 @@ the Hub, as well as a breakdown of costs by service each month.

# https://www.paws-r-sdk.com/docs/costexplorer_get_cost_and_usage/
# https://docs.aws.amazon.com/aws-cost-management/latest/APIReference/API_GetDimensionValues.html
all_daily_costs_six_months <- sixtyfour::aws_billing(
ceiling_date(end_date %m-% months(6), unit = "month"),
all_daily_costs_six_months <- get_daily_usage_costs(
end_date,
filter = list(
Dimensions = list(
Key = "RECORD_TYPE",
Values = "Usage"
)
)
months_back = 6,
cost_type = "unblended",
cluster = "nmfs-openscapes"
)

total_monthly_usage_costs <- all_daily_costs_six_months |>
filter(id == "unblended") |>
mutate(
date = ymd(date),
start_date = floor_date(date, unit = "month"),
end_date = ceiling_date(date, unit = "month")
) |>
Expand All @@ -131,7 +125,7 @@ total_monthly_cost <- total_monthly_usage_costs$cost[
]

total_monthly_cost_plot <- ggplot(total_monthly_usage_costs) +
geom_line(aes(x = end_date, y = cost)) +
geom_line(aes(x = start_date, y = cost)) +
labs(
title = glue::glue(
"The total cost of all AWS Services for running the NMFS Openscapes 2i2c",
Expand Down Expand Up @@ -202,19 +196,18 @@ is directly correlated with the costs for "Amazon Elastic File System" in the
previous chart.

```{r monthly-storage}
monthly_size <- query_prometheus_range(
monthly_size <- dir_sizes(
grafana_url = "https://grafana.nmfs-openscapes.2i2c.cloud",
query = "max(dirsize_total_size_bytes{namespace='prod'})",
start_time = floor_date(end_date, unit = "months") %m-% months(5),
end_time = end_date,
step = 60 * 60 * 24
step = 60 * 60 * 24 # daily
) |>
format_prom_result(value_name = "size") |>
mutate(size = size * 1e-9)
filter(namespace == "prod") |>
mutate(dirsize_gb = dirsize_mb * 1e-3)

monthly_size |>
ggplot() +
geom_line(aes(x = date, y = size)) +
geom_line(aes(x = date, y = dirsize_gb)) +
scale_x_datetime(date_breaks = "1 month", date_labels = "%B") +
labs(
title = "Total size of user home directories in AWS EFS in the main Hub",
Expand All @@ -235,17 +228,14 @@ out by the GitHub team by which they are granted access to the Hub (Long-Term Ac

```{r homedir-size-by-date}

size_by_date <- query_prometheus_range(
size_by_date <- dir_sizes(
grafana_url = "https://grafana.nmfs-openscapes.2i2c.cloud",
query = "max(dirsize_total_size_bytes) by (directory)",
start_time = start_date,
start_time = floor_date(end_date, unit = "months") %m-% months(5),
end_time = end_date,
step = 60 * 60 * 24
by_user = TRUE,
step = 60 * 60 * 24 # daily
) |>
format_prom_result(value_name = "size") |>
mutate(
directory = unsanitize_dir_names(directory)
)
mutate(dirsize_gb = dirsize_mb * 1e-3)

# list_teams("nmfs-openscapes")

Expand Down Expand Up @@ -273,19 +263,26 @@ size_by_date_by_team <- size_by_date |>
teams,
by = join_by(directory == user)
) |>
filter(namespace %in% c("prod", "workshop")) |>
mutate(
team = case_when(
is.na(team) & directory %in% c("_shared", "deployment-service-check") ~
"shared",
namespace == "workshop" ~ "workshop",
is.na(team) ~ "No team",
.default = team
),
directory = fct_reorder(directory, desc(size), .fun = max, .desc = TRUE)
directory = fct_reorder(
directory,
desc(dirsize_gb),
.fun = max,
.desc = TRUE
)
)

all_dirs_sum_by_date <- size_by_date_by_team |>
group_by(date, team) |>
summarize(total_size_gb = sum(size)) |>
group_by(namespace, date, team) |>
summarize(total_size_gb = sum(dirsize_gb)) |>
mutate(
team = fct_reorder(team, desc(total_size_gb), .fun = max, .desc = TRUE)
)
Expand All @@ -296,6 +293,7 @@ all_dirs_sum_by_date <- size_by_date_by_team |>
all_dirs_sum_by_date |>
ggplot(aes(x = date, y = total_size_gb)) +
geom_area(aes(fill = team)) +
facet_grid(rows = vars(namespace), scales = "free_y") +
theme(legend.position = "bottom", legend.title.position = "top") +
paletteer::scale_fill_paletteer_d(
"ggpomological::pomological_palette",
Expand Down
Loading