Skip to content

Commit ba0ceff

Browse files
committed
feat: add utils to manage reports
1 parent 2dad893 commit ba0ceff

File tree

1 file changed

+84
-5
lines changed

1 file changed

+84
-5
lines changed

R/utils.R

Lines changed: 84 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,14 @@ get_forecast_reference_date <- function(date) {
275275
MMWRweek::MMWRweek2Date(lubridate::epiyear(date), lubridate::epiweek(date)) + 6
276276
}
277277

278-
update_site <- function() {
278+
#' Update the site with the latest reports.
279+
#'
280+
#' Looks at that `reports/` directory and updates `template.md` with new reports
281+
#' that follow a naming convention. This is translated into `report.md` which is
282+
#' then converted to `index.html` with pandoc.
283+
#'
284+
#' @param sync_to_s3 Whether to sync the reports to the S3 bucket.
285+
update_site <- function(sync_to_s3 = TRUE) {
279286
library(fs)
280287
library(stringr)
281288
# Define the directories
@@ -288,7 +295,9 @@ update_site <- function() {
288295
}
289296

290297
# Sync the reports directory with the S3 bucket
291-
aws.s3::s3sync(path = reports_dir, bucket = "forecasting-team-data", prefix = "reports-2024/", verbose = FALSE)
298+
if (sync_to_s3) {
299+
aws.s3::s3sync(path = reports_dir, bucket = "forecasting-team-data", prefix = "reports-2024/", verbose = FALSE)
300+
}
292301

293302
# Read the template file
294303
if (!file_exists(template_path)) {
@@ -313,8 +322,7 @@ update_site <- function() {
313322
# forecast date
314323
used_reports <- report_table %>%
315324
group_by(forecast_date, disease) %>%
316-
arrange(generation_date) %>%
317-
filter(generation_date == max(generation_date)) %>%
325+
slice_max(generation_date) %>%
318326
ungroup() %>%
319327
arrange(forecast_date)
320328

@@ -324,8 +332,9 @@ update_site <- function() {
324332
file_parts <- str_split(fs::path_ext_remove(file_name), "_", simplify = TRUE)
325333
date <- file_parts[1]
326334
disease <- file_parts[2]
335+
generation_date <- file_parts[5]
327336

328-
report_link <- sprintf("- [%s Forecasts %s](%s)", str_to_title(disease), date, file_name)
337+
report_link <- sprintf("- [%s Forecasts %s, Rendered %s](%s)", str_to_title(disease), date, generation_date, file_name)
329338

330339
# Insert into Production Reports section, skipping a line
331340
prod_reports_index <- which(grepl("## Production Reports", report_md_content)) + 1
@@ -340,6 +349,76 @@ update_site <- function() {
340349
system("pandoc reports/report.md -s -o reports/index.html --css=reports/style.css --mathjax='https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js' --metadata pagetitle='Delphi Reports'")
341350
}
342351

352+
#' Delete unused reports from the S3 bucket.
353+
#'
354+
#' @param dry_run List files that would be deleted if `dry_run` is `FALSE`.
355+
delete_extra_s3_files <- function(dry_run = TRUE) {
356+
local_path <- "reports"
357+
bucket <- "forecasting-team-data"
358+
prefix <- "reports-2024/"
359+
# Get list of local files (relative paths)
360+
local_files <- list.files(local_path, recursive = TRUE)
361+
362+
# Get list of S3 files
363+
s3_objects <- aws.s3::get_bucket(bucket, prefix = prefix)
364+
s3_files <- sapply(s3_objects, function(x) x$Key)
365+
366+
# Find files that exist in S3 but not locally
367+
# Remove prefix from s3_files for comparison
368+
s3_files_clean <- gsub(prefix, "", s3_files)
369+
files_to_delete <- s3_files[!(s3_files_clean %in% local_files)]
370+
371+
if (dry_run) {
372+
message("Would delete ", length(files_to_delete), " files from S3")
373+
message("Files: ", paste(files_to_delete, collapse = ", "))
374+
return(invisible(files_to_delete))
375+
}
376+
377+
# Delete each extra file
378+
if (length(files_to_delete) > 0) {
379+
message("Deleting ", length(files_to_delete), " files from S3")
380+
for (file in files_to_delete) {
381+
message("Deleting: ", file)
382+
aws.s3::delete_object(file, bucket)
383+
}
384+
} else {
385+
message("No files to delete")
386+
}
387+
}
388+
389+
#' Find unused report files in index.html.
390+
find_unused_report_files <- function() {
391+
library(rvest)
392+
library(fs)
393+
library(stringr)
394+
395+
# Read all files in reports directory
396+
all_files <- dir_ls("reports", recurse = TRUE) %>%
397+
path_file() # just get filenames, not full paths
398+
399+
# Read index.html and extract all href links
400+
index_html <- read_html("reports/index.html")
401+
used_files <- index_html %>%
402+
html_elements("a") %>%
403+
html_attr("href") %>%
404+
# Add known required files like CSS
405+
c("style.css", "template.md", "report.md", "index.html", .) %>%
406+
# Remove links like "https://" from the list
407+
keep(~ !grepl("^https?://", .))
408+
409+
# Find files that exist but aren't referenced
410+
unused_files <- setdiff(all_files, used_files)
411+
412+
if (length(unused_files) > 0) {
413+
cat("The following files in 'reports' are not referenced in index.html:\n")
414+
cat(paste("-", unused_files), sep = "\n")
415+
} else {
416+
cat("All files in 'reports' are referenced in index.html\n")
417+
}
418+
419+
return(invisible(unused_files))
420+
}
421+
343422
#' Ensure that forecast values are monotically increasing
344423
#' in quantile order.
345424
sort_by_quantile <- function(forecasts) {

0 commit comments

Comments
 (0)