@@ -275,7 +275,14 @@ get_forecast_reference_date <- function(date) {
275275 MMWRweek :: MMWRweek2Date(lubridate :: epiyear(date ), lubridate :: epiweek(date )) + 6
276276}
277277
278- update_site <- function () {
278+ # ' Update the site with the latest reports.
279+ # '
280+ # ' Looks at that `reports/` directory and updates `template.md` with new reports
281+ # ' that follow a naming convention. This is translated into `report.md` which is
282+ # ' then converted to `index.html` with pandoc.
283+ # '
284+ # ' @param sync_to_s3 Whether to sync the reports to the S3 bucket.
285+ update_site <- function (sync_to_s3 = TRUE ) {
279286 library(fs )
280287 library(stringr )
281288 # Define the directories
@@ -288,7 +295,9 @@ update_site <- function() {
288295 }
289296
290297 # Sync the reports directory with the S3 bucket
291- aws.s3 :: s3sync(path = reports_dir , bucket = " forecasting-team-data" , prefix = " reports-2024/" , verbose = FALSE )
298+ if (sync_to_s3 ) {
299+ aws.s3 :: s3sync(path = reports_dir , bucket = " forecasting-team-data" , prefix = " reports-2024/" , verbose = FALSE )
300+ }
292301
293302 # Read the template file
294303 if (! file_exists(template_path )) {
@@ -313,8 +322,7 @@ update_site <- function() {
313322 # forecast date
314323 used_reports <- report_table %> %
315324 group_by(forecast_date , disease ) %> %
316- arrange(generation_date ) %> %
317- filter(generation_date == max(generation_date )) %> %
325+ slice_max(generation_date ) %> %
318326 ungroup() %> %
319327 arrange(forecast_date )
320328
@@ -324,8 +332,9 @@ update_site <- function() {
324332 file_parts <- str_split(fs :: path_ext_remove(file_name ), " _" , simplify = TRUE )
325333 date <- file_parts [1 ]
326334 disease <- file_parts [2 ]
335+ generation_date <- file_parts [5 ]
327336
328- report_link <- sprintf(" - [%s Forecasts %s](%s)" , str_to_title(disease ), date , file_name )
337+ report_link <- sprintf(" - [%s Forecasts %s, Rendered %s ](%s)" , str_to_title(disease ), date , generation_date , file_name )
329338
330339 # Insert into Production Reports section, skipping a line
331340 prod_reports_index <- which(grepl(" ## Production Reports" , report_md_content )) + 1
@@ -340,6 +349,76 @@ update_site <- function() {
340349 system(" pandoc reports/report.md -s -o reports/index.html --css=reports/style.css --mathjax='https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js' --metadata pagetitle='Delphi Reports'" )
341350}
342351
352+ # ' Delete unused reports from the S3 bucket.
353+ # '
354+ # ' @param dry_run List files that would be deleted if `dry_run` is `FALSE`.
355+ delete_extra_s3_files <- function (dry_run = TRUE ) {
356+ local_path <- " reports"
357+ bucket <- " forecasting-team-data"
358+ prefix <- " reports-2024/"
359+ # Get list of local files (relative paths)
360+ local_files <- list.files(local_path , recursive = TRUE )
361+
362+ # Get list of S3 files
363+ s3_objects <- aws.s3 :: get_bucket(bucket , prefix = prefix )
364+ s3_files <- sapply(s3_objects , function (x ) x $ Key )
365+
366+ # Find files that exist in S3 but not locally
367+ # Remove prefix from s3_files for comparison
368+ s3_files_clean <- gsub(prefix , " " , s3_files )
369+ files_to_delete <- s3_files [! (s3_files_clean %in% local_files )]
370+
371+ if (dry_run ) {
372+ message(" Would delete " , length(files_to_delete ), " files from S3" )
373+ message(" Files: " , paste(files_to_delete , collapse = " , " ))
374+ return (invisible (files_to_delete ))
375+ }
376+
377+ # Delete each extra file
378+ if (length(files_to_delete ) > 0 ) {
379+ message(" Deleting " , length(files_to_delete ), " files from S3" )
380+ for (file in files_to_delete ) {
381+ message(" Deleting: " , file )
382+ aws.s3 :: delete_object(file , bucket )
383+ }
384+ } else {
385+ message(" No files to delete" )
386+ }
387+ }
388+
389+ # ' Find unused report files in index.html.
390+ find_unused_report_files <- function () {
391+ library(rvest )
392+ library(fs )
393+ library(stringr )
394+
395+ # Read all files in reports directory
396+ all_files <- dir_ls(" reports" , recurse = TRUE ) %> %
397+ path_file() # just get filenames, not full paths
398+
399+ # Read index.html and extract all href links
400+ index_html <- read_html(" reports/index.html" )
401+ used_files <- index_html %> %
402+ html_elements(" a" ) %> %
403+ html_attr(" href" ) %> %
404+ # Add known required files like CSS
405+ c(" style.css" , " template.md" , " report.md" , " index.html" , . ) %> %
406+ # Remove links like "https://" from the list
407+ keep(~ ! grepl(" ^https?://" , . ))
408+
409+ # Find files that exist but aren't referenced
410+ unused_files <- setdiff(all_files , used_files )
411+
412+ if (length(unused_files ) > 0 ) {
413+ cat(" The following files in 'reports' are not referenced in index.html:\n " )
414+ cat(paste(" -" , unused_files ), sep = " \n " )
415+ } else {
416+ cat(" All files in 'reports' are referenced in index.html\n " )
417+ }
418+
419+ return (invisible (unused_files ))
420+ }
421+
343422# ' Ensure that forecast values are monotically increasing
344423# ' in quantile order.
345424sort_by_quantile <- function (forecasts ) {
0 commit comments