From 7f7d9665b8d317fef7de102ac9f8ff04476b3475 Mon Sep 17 00:00:00 2001 From: Brian Moses Hall Date: Fri, 22 Aug 2025 12:13:55 -0400 Subject: [PATCH 1/8] ETT-519, ETT-520 modify pdus/gfv reports - Add `grin_gfv` Perl module which does most of the heavy lifting. - Add tests for most of the exposed functionality - FIXME: still need tests for `write_update_to_gfv` and `write_reversion_from_gfv` - Use experimental version of db-image with `feed_*` tables added - FIXME: can we remove `sql/ingest.sql` so we don't have to maintain them here? - Updated `bin/grin_gfv.pl` to `bin/new_grin_gfv.pl` - TODO run some side by side comparisons against actual data (with `noop` flag and the dangerous stuff excised) - The new one will replace the old --- bin/new_grin_gfv.pl | 117 +++++++++++++++++++++++ docker-compose.yml | 2 +- perl_lib/Database.pm | 4 + perl_lib/grin_gfv.pm | 222 +++++++++++++++++++++++++++++++++++++++++++ t/grin_gfv.t | 181 +++++++++++++++++++++++++++++++++++ 5 files changed, 525 insertions(+), 1 deletion(-) create mode 100644 bin/new_grin_gfv.pl create mode 100644 perl_lib/grin_gfv.pm create mode 100644 t/grin_gfv.t diff --git a/bin/new_grin_gfv.pl b/bin/new_grin_gfv.pl new file mode 100644 index 0000000..a4da8ff --- /dev/null +++ b/bin/new_grin_gfv.pl @@ -0,0 +1,117 @@ +#!/usr/bin/env perl + +# Does things in 2 steps. +# Each step consists of running a set of queries, +# and sending an email about what was done. +# +# Step 1: +# Updates ic/bib and und/bib rows to pdus/gfv in rights_current +# if they have viewability 'VIEW_FULL' GRIN and are not marked as CLAIMED and are not Keio. +# CLAIMED items are items with the CLAIMED flag is set, items where the rights holder has given permission +# to Google to make the item VIEW_FULL. That permission does not extend to HathiTrust. +# We don't do it for Keio because there are items that are PD in Japan that are not PD in the US (e.g. icus) + +# Step 2: +# Updates pdus/gfv rows in rights_current records +# with their previous attr&reason +# if they no longer have viewability 'VIEW_FULL' in GRIN. + +use strict; +use warnings; + +use lib "$ENV{ROOTDIR}/perl_lib"; + +use Date::Manip qw(ParseDate UnixDate); +use Getopt::Long; +use Mail::Mailer; +use ProgressTracker; +use YAML::XS; + +use Database; +use grin_gfv; + +my $noop = undef; # set with --noop +my $mailer = undef; + +# config +my $config_dir = $ENV{CONFIG_DIR} || '/usr/src/app/config'; +my $config_yaml = "$config_dir/rights.yml"; +my $config = YAML::XS::LoadFile($config_yaml); +my $rights_dir = $config->{rights}->{rights_dir}; + +GetOptions( + # skip update queries, emails, log file & tracker + 'noop=s' => \$noop, +); + +#### Step 1: UPDATE ITEMS TO FULL VIEW #### +my $tracker = ProgressTracker->new(); +$tracker->start_stage("set_pdus_gfv") unless $noop; + +my $grin_gfv = grin_gfv->new; +my $updates = $grin_gfv->updates_to_gfv; + +# Loop over the relevant items and update their attr/reason +foreach my $update (@$updates) { + unless ($noop) { + $grin_gfv->write_update_to_gfv($update); + $tracker->inc(); + } +} + +# Send first email +unless ($noop) { + $mailer = new_mailer("New ic/und but VIEW_FULL volumes"); + print $mailer $grin_gfv->updates_to_gfv_report; + $mailer->close() or warn("Couldn't send message: $!"); +} + +#### Step 2: REVERT FORMERLY VIEW_FULL ITEMS #### +$tracker->start_stage("revert_pdus_gfv") unless $noop; + +# Open file for which to record reverted barcodes +my $barcode_log = sprintf( + '%s/barcodes_%s_revert_gfv_feed', + $rights_dir, + UnixDate(ParseDate("now"), '%Y-%m-%d_%H-%M-%S') +); +open(my $fh, ">>", $barcode_log) or die("can't open $barcode_log: $!"); + +my $reversions = $grin_gfv->reversions_from_gfv; +foreach my $reversion (@$reversions) { + unless ($noop) { + print $fh "$reversion->{namespace}.$reversion->{id}\n"; + # update item in rights_current with the old attr/reason + $grin_gfv->write_reversion_from_gfv($reversion); + $tracker->inc(); + } +} +close($fh); + +unless ($noop) { + # Send the second email and we're done + $mailer = new_mailer("Old pdus/gfv volumes no longer VIEW_FULL"); + print $mailer $grin_gfv->reversions_from_gfv_report; + $mailer->close() or warn("Couldn't send message: $!"); +} + +$tracker->finalize() unless $noop; + +# The 2 emails sent only differ in subject and body, +# so we can do everything else using the same template. +sub new_mailer { + my $subject = shift; + my $mailer = new Mail::Mailer; + my $to_addr = join( + ', ', + split(' ', $ENV{'TO_ADDRESSES'}) + ); + + my $email = { + 'From' => $ENV{'FROM_ADDRESS'}, + 'Subject' => $subject, + 'To' => $to_addr + }; + + $mailer->open($email); +} diff --git a/docker-compose.yml b/docker-compose.yml index bb1c311..6d2a85a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -64,7 +64,7 @@ services: command: prove mariadb: - image: ghcr.io/hathitrust/db-image:latest + image: ghcr.io/hathitrust/db-image:updates_aug_2025 volumes: - ./sql/ingest.sql:/docker-entrypoint-initdb.d/999-ingest.sql - ./sql/hathifiles.sql:/docker-entrypoint-initdb.d/999-hathifiles.sql diff --git a/perl_lib/Database.pm b/perl_lib/Database.pm index 4f5c4d6..e24c2df 100644 --- a/perl_lib/Database.pm +++ b/perl_lib/Database.pm @@ -1,5 +1,9 @@ package Database; +use strict; +use warnings; +use utf8; + # Shared utility for read-only connection to Rights DB (via views in ht database) # or, for populate_rights_data.pl, read-write access to the Rights DB directly. diff --git a/perl_lib/grin_gfv.pm b/perl_lib/grin_gfv.pm new file mode 100644 index 0000000..c261b3f --- /dev/null +++ b/perl_lib/grin_gfv.pm @@ -0,0 +1,222 @@ +package grin_gfv; + +use strict; +use warnings; +use utf8; + +use POSIX qw(strftime); + +# Day Mo NN XX:YY:ZZ Year +our $REPORT_TIME_FORMAT = "%a %b %e %T %Y"; + +my $UPDATE_USER = 'libadm'; +# For rights_current update ($write_update_to_gfv_sql) +my $GFV_ATTR_ID = 9; # pdus +my $GFV_REASON_ID = 12; # gfv +# For rights_current reversion update +my $REVERSION_NOTE = 'Revert to previous attr/reason; no longer VIEW_FULL'; + + +# This gets us the items to update in update_gfv. +my $select_updates_to_gfv_sql = <<~'SQL'; + SELECT r.namespace, r.id, a.name + FROM rights_current r + INNER JOIN attributes a ON r.attr = a.id + INNER JOIN reasons e ON r.reason = e.id + INNER JOIN ht.feed_grin g ON r.id = g.id AND r.namespace = g.namespace + WHERE g.viewability = 'VIEW_FULL' AND a.name IN ('ic', 'und') AND e.name = 'bib' AND g.claimed != 'true' AND r.namespace != 'keio' + ORDER BY r.namespace, r.id +SQL + +my $select_reversions_from_gfv_sql = <<~'SQL'; + SELECT r.namespace, r.id + FROM rights_current r + INNER JOIN ht.feed_grin g ON r.id = g.id AND r.namespace = g.namespace + WHERE r.reason='12' AND (g.viewability != 'VIEW_FULL' OR g.claimed = 'true' OR r.namespace = 'keio') + ORDER BY r.namespace, r.id +SQL + +my $select_rights_log_sql = <<~'SQL'; + SELECT attr, reason, source + FROM rights_log + WHERE namespace = ? AND id = ? + ORDER BY time DESC +SQL + +my $write_update_to_gfv_sql = <<~SQL; + UPDATE rights_current + SET + attr = $GFV_ATTR_ID, + reason = $GFV_REASON_ID, + time = CURRENT_TIMESTAMP, + user = '$UPDATE_USER' + WHERE namespace = ? AND id = ? +SQL + +my $write_reversion_from_gfv_sql = <<~SQL; + UPDATE rights_current + SET + attr = ?, + reason = ?, + time = CURRENT_TIMESTAMP, + user = '$UPDATE_USER', + note = '$REVERSION_NOTE' + WHERE namespace = ? AND id = ? +SQL + +sub new { + my ($class, %args) = @_; + my $self = bless {}, $class; + my $dbh = Database::get_rights_rw_dbh; + $self->{dbh} = $dbh; + $self->{select_updates_to_gfv_sth} = $dbh->prepare($select_updates_to_gfv_sql) || die "could not prepare query: $select_updates_to_gfv_sql";; + $self->{select_reversions_from_gfv_sth} = $dbh->prepare($select_reversions_from_gfv_sql) || die "could not prepare query: $select_reversions_from_gfv_sql"; + $self->{select_rights_log_sth} = $dbh->prepare($select_rights_log_sql) || die "could not prepare query: $select_rights_log_sql"; + $self->{write_update_to_gfv_sth} = $dbh->prepare($write_update_to_gfv_sql) || die "could not prepare query: $write_update_to_gfv_sql"; + $self->{write_reversion_from_gfv_sth} = $dbh->prepare($write_reversion_from_gfv_sql) || die "could not prepare query: $write_reversion_from_gfv_sql"; + return $self; +} + +# ic/und items that should nonetheless be pdus/gfv according to GRIN +# Returns an arrayref of hashref, sorted by namespace and id +# Each hashref contains the fields {namespace, id, attr} +# e.g. { namespace => 'mdp', id => '001', attr => 'ic' } +sub updates_to_gfv { + my $self = shift; + + my $updates = []; + my $sth = $self->{select_updates_to_gfv_sth}; + $sth->execute or die $sth->errstr; + while (my $row = $sth->fetch) { + my ($namespace, $id, $attr) = @$row; + push @$updates, {namespace => $namespace, id => $id, attr => $attr}; + } + $sth->finish; + return $updates; +} + +# Extract updates_to_gfv data into e-mail report +# Optional date_string keyword arg is for testing +sub updates_to_gfv_report { + my $self = shift; + my $updates = shift; + my %args = @_; + + my $date_string = $args{date_string} || strftime($REPORT_TIME_FORMAT, localtime); + my $report = sprintf "%d volumes set to pdus/gfv at $date_string\n\n", scalar @$updates; + my $ic_section = "IC\n\n"; + my $und_section = "UND\n\n"; + foreach my $update (@$updates) { + if ($update->{attr} eq 'ic') { + $ic_section .= "$update->{namespace}.$update->{id}\n"; + } elsif ($update->{attr} eq 'und') { + $und_section .= "$update->{namespace}.$update->{id}\n"; + } else { + printf STDERR "ERROR: unknown attribute in %s\n", Dumper($update); + } + } + $report .= $ic_section . "\n"; + $report .= $und_section; + return $report; +} + +# Write a pdus/gfv update (one of the hashrefs from updates_to_gfv) to the Rights DB +sub write_update_to_gfv { + my $self = shift; + my $update = shift; + + $self->{write_update_to_gfv_sth}->execute( + $update->{namespace}, + $update->{id} + ); +} + +# pdus_gfv items that should be reverted to bib rights +# Returns an arrayref of hashref, sorted by namespace, id +# Each hashref contains the fields {namespace, id, attr, reason, source, gfv_count} +# e.g. { namespace => 'mdp', id => '001', attr => 5, reason => 8, source => 1, gfv_count => 0 } +# NOTE, the attr/reason/src are NUMERIC this time +sub reversions_from_gfv { + my $self = shift; + + my $updates = []; + my $sth = $self->{select_reversions_from_gfv_sth}; + $sth->execute or die $sth->errstr; + while (my $row = $sth->fetch) { + my ($namespace, $id) = @$row; + my $rights_log_data = $self->_rights_log_data($namespace, $id); + push @$updates, { + namespace => $namespace, + id => $id, + attr => $rights_log_data->{attr}, + reason => $rights_log_data->{reason}, + source => $rights_log_data->{source}, + gfv_count => $rights_log_data->{gfv_count} + }; + } + $sth->finish; + return $updates; +} + +# Extract reversions_from_gfv data into e-mail report +# Optional date_string keyword arg is for testing +sub reversions_from_gfv_report { + my $self = shift; + my $reversions = shift; + my %args = @_; + + my $date_string = $args{date_string} || strftime($REPORT_TIME_FORMAT, localtime); + my $report = sprintf "%d volumes reverted from pdus/gfv at $date_string\n\n", scalar @$reversions; + my $prior_section = "Has prior GFV status\n\n"; + my $no_prior_section = "No prior GFV status\n\n"; + foreach my $reversion (@$reversions) { + # There should be at least one GFV (the one we are reverting from). + # Anything more than that counts as prior GFV status. + if ($reversion->{gfv_count} > 1) { + $prior_section .= "$reversion->{namespace}.$reversion->{id}\n"; + } else { + $no_prior_section .= "$reversion->{namespace}.$reversion->{id}\n"; + } + } + $report .= $prior_section . "\n"; + $report .= $no_prior_section; + return $report; +} + +sub write_reversion_from_gfv { + my $self = shift; + my $reversion = shift; + + $self->{write_reversion_from_gfv_sth}->execute( + $reversion->{attr}, + $reversion->{reason}, + $reversion->{namespace}, + $reversion->{id} + ); +} + +# Returns data from the most recent non-gfv entry in rights_log, +# plus a count of */gfv entries (including the one we are reverting from) +# e.g., { attr => 5, reason => 8, source => 1, gfv_count => 1 } +sub _rights_log_data { + my $self = shift; + my $namespace = shift; + my $id = shift; + + my $rights_log_data = { gfv_count => 0 }; + my $sth = $self->{select_rights_log_sth}; + $sth->execute($namespace, $id) or die $sth->errstr; + while (my $row = $sth->fetch) { + my ($attr, $reason, $source) = @$row; + if ($reason == $GFV_REASON_ID) { + $rights_log_data->{gfv_count}++; + } elsif (!defined $rights_log_data->{attr}) { + $rights_log_data->{attr} = $attr; + $rights_log_data->{reason} = $reason; + $rights_log_data->{source} = $source; + } + } + return $rights_log_data; +} + +1; \ No newline at end of file diff --git a/t/grin_gfv.t b/t/grin_gfv.t new file mode 100644 index 0000000..e0e50a2 --- /dev/null +++ b/t/grin_gfv.t @@ -0,0 +1,181 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use utf8; + +use Data::Dumper; +use POSIX qw(strftime); +use Test::More; + +use lib "$ENV{ROOTDIR}/perl_lib"; +use Database; +use grin_gfv; + +my $dbh = Database::get_rights_rw_dbh; + +sub load_test_fixtures { + my $fixture_data = shift; + + my $rights_current_sql = 'INSERT INTO rights_current (namespace, id, attr, reason, source, access_profile) VALUES (?, ?, ?, ?, 1, 1)'; + # For reversion, create a matching rights_log entry + my $rights_log_sql = 'INSERT INTO rights_log (namespace, id, attr, reason, source, access_profile) VALUES (?, ?, ?, ?, 1, 1)'; + # Old rights to revert to and old VIEW_FULL cases to count + my $old_rights_log_sql = <<~'SQL'; + INSERT INTO rights_log (namespace, id, attr, reason, source, access_profile, time) + VALUES (?, ?, ?, ?, 1, 1, ?) + SQL + my $feed_grin_sql = 'INSERT INTO ht.feed_grin (namespace, id, viewability, claimed) VALUES (?, ?, ?, ?)'; + my $rights_current_sth = $dbh->prepare($rights_current_sql); + my $rights_log_sth = $dbh->prepare($rights_log_sql); + my $old_rights_log_sth = $dbh->prepare($old_rights_log_sql); + my $feed_grin_sth = $dbh->prepare($feed_grin_sql); + foreach my $fixture (@$fixture_data) { + my ($namespace, $id, $attr, $reason, $viewability, $claimed, $logs) = @$fixture; + $rights_current_sth->execute($namespace, $id, $attr, $reason); + $rights_log_sth->execute($namespace, $id, $attr, $reason); + # Insert an old ic/bib so we have something to revert to + $old_rights_log_sth->execute($namespace, $id, 2, 1, '2021-01-01 00:00:00'); + $feed_grin_sth->execute($namespace, $id, $viewability, $claimed); + # Optionally insert some old pdus/gfv entries so we can count them + foreach my $n (1..$logs) { + my $date = "2020-01-0$n 00:00:00"; + $old_rights_log_sth->execute($namespace, $id, 9, 12, $date); + } + } +} + +sub unload_test_fixtures { + $dbh->prepare('DELETE FROM rights_current WHERE namespace IN ("gfvtest","keio")')->execute; + $dbh->prepare('DELETE FROM rights_log WHERE namespace IN ("gfvtest","keio")')->execute; + $dbh->prepare('DELETE FROM ht.feed_grin WHERE namespace IN ("gfvtest","keio")')->execute; +} + +# Clean up any previously failed tests +unload_test_fixtures; + +# Qualifies if ic/und and bib and VIEW_FULL and not claimed +my $updates_test_fixtures = [ + # namespace id attr (ic=2 und=5) reason (bib=1) viewability claimed logs comment + ['gfvtest', 'ic', 2, 1, 'VIEW_FULL', 'false', 0, 'ic included'], + ['gfvtest', 'und', 5, 1, 'VIEW_FULL', 'false', 0, 'und included'], + ['gfvtest', 'pd', 1, 1, 'VIEW_FULL', 'false', 0, 'pd/* excluded'], + ['gfvtest', 'nfi', 5, 8, 'VIEW_FULL', 'false', 0, '*/nfi excluded'], + ['gfvtest', 'nonviewable', 2, 1, '', 'false', 0, 'viewability != VIEW_FULL excluded'], + ['gfvtest', 'claimed', 2, 1, 'VIEW_FULL', 'true', 0, 'claimed=true excluded'], + ['keio', 'keio', 2, 1, 'VIEW_FULL', 'false', 0, 'keio excluded'], +]; + +subtest "updates_to_gfv" => sub { + my $err; + load_test_fixtures($updates_test_fixtures); + my $grin_gfv = grin_gfv->new; + my $updates = $grin_gfv->updates_to_gfv; + my $expected = [ + { + 'attr' => 'ic', + 'id' => 'ic', + 'namespace' => 'gfvtest' + }, + { + 'attr' => 'und', + 'id' => 'und', + 'namespace' => 'gfvtest' + } + ]; + unload_test_fixtures; + is_deeply($updates, $expected); +}; + +subtest 'updates_to_gfv_report' => sub { + load_test_fixtures($updates_test_fixtures); + my $grin_gfv = grin_gfv->new; + my $date_string = strftime($grin_gfv::REPORT_TIME_FORMAT, localtime); + my $updates = $grin_gfv->updates_to_gfv; + my $report = $grin_gfv->updates_to_gfv_report($updates, date_string => $date_string); + my $expected = <<~REPORT; + 2 volumes set to pdus/gfv at $date_string + + IC + + gfvtest.ic + + UND + + gfvtest.und + REPORT + unload_test_fixtures; + is($report, $expected); +}; + + +# Qualifies if gfv and (not VIEW_FULL or claimed or keio) +# old_logs is in addition to the rights we're reverting from so the result will be old_logs + 1 for gfv_count +my $reversions_test_fixtures = [ + # namespace id attr (pdus=9) reason (gfv=12) viewability claimed old_logs comment + ['gfvtest', 'bib', 9, 1, 'VIEW_FULL', 'false', 0, 'non-gfv excluded'], + ['gfvtest', 'nonviewable', 9, 12, '', 'false', 0, 'viewability != VIEW_FULL included'], + ['gfvtest', 'claimed', 9, 12, 'VIEW_FULL', 'true', 1, 'claimed=true included'], + ['keio', 'keio', 9, 12, 'VIEW_FULL', 'false', 2, 'keio included'], +]; + +subtest "reversions_from_gfv" => sub { + load_test_fixtures($reversions_test_fixtures); + my $grin_gfv = grin_gfv->new; + my $reversions = $grin_gfv->reversions_from_gfv; + my $expected = [ + { + 'attr' => 2, + 'gfv_count' => 2, + 'id' => 'claimed', + 'namespace' => 'gfvtest', + 'reason' => 1, + 'source' => 1 + }, + { + 'attr' => 2, + 'gfv_count' => 1, + 'id' => 'nonviewable', + 'namespace' => 'gfvtest', + 'reason' => 1, + 'source' => 1 + }, + { + 'attr' => 2, + 'gfv_count' => 3, + 'id' => 'keio', + 'namespace' => 'keio', + 'reason' => 1, + 'source' => 1 + } + ]; + unload_test_fixtures; + is_deeply($reversions, $expected); +}; + +subtest 'reversions_from_gfv_report' => sub { + load_test_fixtures($reversions_test_fixtures); + my $grin_gfv = grin_gfv->new; + my $date_string = strftime($grin_gfv::REPORT_TIME_FORMAT, localtime); + my $reversions = $grin_gfv->reversions_from_gfv; + my $report = $grin_gfv->reversions_from_gfv_report($reversions, date_string => $date_string); + my $expected = <<~REPORT; + 3 volumes reverted from pdus/gfv at $date_string + + Has prior GFV status + + gfvtest.claimed + keio.keio + + No prior GFV status + + gfvtest.nonviewable + REPORT + unload_test_fixtures; + is($report, $expected); +}; + +done_testing; + +__END__ + From 09c961391ae7b17b26aa425002567d9674cf0ddc Mon Sep 17 00:00:00 2001 From: Brian Moses Hall Date: Mon, 25 Aug 2025 13:15:48 -0400 Subject: [PATCH 2/8] - Add tests for `write_update_to_gfv` and `write_reversion_from_gfv` --- perl_lib/grin_gfv.pm | 4 ++-- t/grin_gfv.t | 44 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/perl_lib/grin_gfv.pm b/perl_lib/grin_gfv.pm index c261b3f..fe2f4f1 100644 --- a/perl_lib/grin_gfv.pm +++ b/perl_lib/grin_gfv.pm @@ -11,8 +11,8 @@ our $REPORT_TIME_FORMAT = "%a %b %e %T %Y"; my $UPDATE_USER = 'libadm'; # For rights_current update ($write_update_to_gfv_sql) -my $GFV_ATTR_ID = 9; # pdus -my $GFV_REASON_ID = 12; # gfv +our $GFV_ATTR_ID = 9; # pdus +our $GFV_REASON_ID = 12; # gfv # For rights_current reversion update my $REVERSION_NOTE = 'Revert to previous attr/reason; no longer VIEW_FULL'; diff --git a/t/grin_gfv.t b/t/grin_gfv.t index e0e50a2..2d03f0a 100644 --- a/t/grin_gfv.t +++ b/t/grin_gfv.t @@ -14,10 +14,13 @@ use grin_gfv; my $dbh = Database::get_rights_rw_dbh; +# These are used by `write_reversion_from_gfv` and `write_reversion_from_gfv` tests, as well as `load_test_fixtures` +my $rights_current_sql = 'INSERT INTO rights_current (namespace, id, attr, reason, source, access_profile) VALUES (?, ?, ?, ?, 1, 1)'; +my $rights_current_sth = $dbh->prepare($rights_current_sql); + sub load_test_fixtures { my $fixture_data = shift; - my $rights_current_sql = 'INSERT INTO rights_current (namespace, id, attr, reason, source, access_profile) VALUES (?, ?, ?, ?, 1, 1)'; # For reversion, create a matching rights_log entry my $rights_log_sql = 'INSERT INTO rights_log (namespace, id, attr, reason, source, access_profile) VALUES (?, ?, ?, ?, 1, 1)'; # Old rights to revert to and old VIEW_FULL cases to count @@ -26,7 +29,6 @@ sub load_test_fixtures { VALUES (?, ?, ?, ?, 1, 1, ?) SQL my $feed_grin_sql = 'INSERT INTO ht.feed_grin (namespace, id, viewability, claimed) VALUES (?, ?, ?, ?)'; - my $rights_current_sth = $dbh->prepare($rights_current_sql); my $rights_log_sth = $dbh->prepare($rights_log_sql); my $old_rights_log_sth = $dbh->prepare($old_rights_log_sql); my $feed_grin_sth = $dbh->prepare($feed_grin_sql); @@ -108,6 +110,22 @@ subtest 'updates_to_gfv_report' => sub { is($report, $expected); }; +subtest 'write_update_to_gfv' => sub { + my $test_namespace = 'gfvtest'; + my $test_id = 'write_update'; + # Insert ic/bib (2/1) row for this id which we will then update. + $rights_current_sth->execute($test_namespace, $test_id, 2, 1); + my $grin_gfv = grin_gfv->new; + my $update = { + namespace => $test_namespace, + id => $test_id + }; + $grin_gfv->write_update_to_gfv($update); + my $sql = 'SELECT attr, reason FROM rights_current WHERE namespace = ? AND id = ?'; + my $row = $dbh->selectall_arrayref($sql, undef, $test_namespace, $test_id); + is($row->[0]->[0], $grin_gfv::GFV_ATTR_ID); + is($row->[0]->[1], $grin_gfv::GFV_REASON_ID); +}; # Qualifies if gfv and (not VIEW_FULL or claimed or keio) # old_logs is in addition to the rights we're reverting from so the result will be old_logs + 1 for gfv_count @@ -175,7 +193,23 @@ subtest 'reversions_from_gfv_report' => sub { is($report, $expected); }; -done_testing; - -__END__ +subtest 'write_reversion_from_gfv' => sub { + my $test_namespace = 'gfvtest'; + my $test_id = 'write_reversion'; + # Insert pdus/gfv row for this id which we will then revert. + $rights_current_sth->execute($test_namespace, $test_id, $grin_gfv::GFV_ATTR_ID, $grin_gfv::GFV_REASON_ID); + my $grin_gfv = grin_gfv->new; + my $reversion = { + attr => 2, + reason => 1, + namespace => $test_namespace, + id => $test_id + }; + $grin_gfv->write_reversion_from_gfv($reversion); + my $sql = 'SELECT attr, reason FROM rights_current WHERE namespace = ? AND id = ?'; + my $row = $dbh->selectall_arrayref($sql, undef, $test_namespace, $test_id); + is($row->[0]->[0], 2); + is($row->[0]->[1], 1); +}; +done_testing; From e20b82bc78da9202775257d36ed16e8ef01b6d9e Mon Sep 17 00:00:00 2001 From: Brian Moses Hall Date: Mon, 25 Aug 2025 13:31:30 -0400 Subject: [PATCH 3/8] Use simpler selectrow_array for a couple of the tests --- t/grin_gfv.t | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/t/grin_gfv.t b/t/grin_gfv.t index 2d03f0a..9fed184 100644 --- a/t/grin_gfv.t +++ b/t/grin_gfv.t @@ -113,8 +113,8 @@ subtest 'updates_to_gfv_report' => sub { subtest 'write_update_to_gfv' => sub { my $test_namespace = 'gfvtest'; my $test_id = 'write_update'; - # Insert ic/bib (2/1) row for this id which we will then update. - $rights_current_sth->execute($test_namespace, $test_id, 2, 1); + # Insert icus/gatt (19/17) row for this id which we will then update. + $rights_current_sth->execute($test_namespace, $test_id, 19, 17); my $grin_gfv = grin_gfv->new; my $update = { namespace => $test_namespace, @@ -122,9 +122,9 @@ subtest 'write_update_to_gfv' => sub { }; $grin_gfv->write_update_to_gfv($update); my $sql = 'SELECT attr, reason FROM rights_current WHERE namespace = ? AND id = ?'; - my $row = $dbh->selectall_arrayref($sql, undef, $test_namespace, $test_id); - is($row->[0]->[0], $grin_gfv::GFV_ATTR_ID); - is($row->[0]->[1], $grin_gfv::GFV_REASON_ID); + my ($attr, $reason) = $dbh->selectrow_array($sql, undef, $test_namespace, $test_id); + is($attr, $grin_gfv::GFV_ATTR_ID); + is($reason, $grin_gfv::GFV_REASON_ID); }; # Qualifies if gfv and (not VIEW_FULL or claimed or keio) @@ -196,20 +196,20 @@ subtest 'reversions_from_gfv_report' => sub { subtest 'write_reversion_from_gfv' => sub { my $test_namespace = 'gfvtest'; my $test_id = 'write_reversion'; - # Insert pdus/gfv row for this id which we will then revert. + # Insert pdus/gfv row for this id which we will then revert to icus/gatt. $rights_current_sth->execute($test_namespace, $test_id, $grin_gfv::GFV_ATTR_ID, $grin_gfv::GFV_REASON_ID); my $grin_gfv = grin_gfv->new; my $reversion = { - attr => 2, - reason => 1, + attr => 19, + reason => 17, namespace => $test_namespace, id => $test_id }; $grin_gfv->write_reversion_from_gfv($reversion); my $sql = 'SELECT attr, reason FROM rights_current WHERE namespace = ? AND id = ?'; - my $row = $dbh->selectall_arrayref($sql, undef, $test_namespace, $test_id); - is($row->[0]->[0], 2); - is($row->[0]->[1], 1); + my ($attr, $reason) = $dbh->selectrow_array($sql, undef, $test_namespace, $test_id); + is($attr, 19); + is($reason, 17); }; done_testing; From d0b9fe83ce14c8fbe0b8ead39d7f0f96126bbb8a Mon Sep 17 00:00:00 2001 From: Brian Moses Hall Date: Mon, 25 Aug 2025 15:16:20 -0400 Subject: [PATCH 4/8] - Pass updates/reversions structure to the `_report` methods. --- bin/new_grin_gfv.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/new_grin_gfv.pl b/bin/new_grin_gfv.pl index a4da8ff..52462b2 100644 --- a/bin/new_grin_gfv.pl +++ b/bin/new_grin_gfv.pl @@ -62,7 +62,7 @@ # Send first email unless ($noop) { $mailer = new_mailer("New ic/und but VIEW_FULL volumes"); - print $mailer $grin_gfv->updates_to_gfv_report; + print $mailer $grin_gfv->updates_to_gfv_report($updates); $mailer->close() or warn("Couldn't send message: $!"); } @@ -91,7 +91,7 @@ unless ($noop) { # Send the second email and we're done $mailer = new_mailer("Old pdus/gfv volumes no longer VIEW_FULL"); - print $mailer $grin_gfv->reversions_from_gfv_report; + print $mailer $grin_gfv->reversions_from_gfv_report($reversions); $mailer->close() or warn("Couldn't send message: $!"); } From d425a3e2b234da6d6a6267e08055b52888cb97dd Mon Sep 17 00:00:00 2001 From: Brian Moses Hall Date: Mon, 25 Aug 2025 15:26:27 -0400 Subject: [PATCH 5/8] Rename new grin_gfv.pl over the old one. --- bin/grin_gfv.pl | 95 +++++------------------------------ bin/new_grin_gfv.pl | 117 -------------------------------------------- 2 files changed, 11 insertions(+), 201 deletions(-) delete mode 100644 bin/new_grin_gfv.pl diff --git a/bin/grin_gfv.pl b/bin/grin_gfv.pl index b26eec1..52462b2 100644 --- a/bin/grin_gfv.pl +++ b/bin/grin_gfv.pl @@ -28,24 +28,16 @@ use YAML::XS; use Database; +use grin_gfv; my $noop = undef; # set with --noop my $mailer = undef; -my $email_body = ""; # holds the current email body -my $volcount = 0; - -# static values for queries -my $force_attr = "9"; # pdus -my $force_reason = "12"; # gfv -my $r_note = 'Revert to previous attr/reason; no longer VIEW_FULL'; -my $user = 'libadm'; # config my $config_dir = $ENV{CONFIG_DIR} || '/usr/src/app/config'; my $config_yaml = "$config_dir/rights.yml"; my $config = YAML::XS::LoadFile($config_yaml); my $rights_dir = $config->{rights}->{rights_dir}; -my $dbh = Database::get_rights_rw_dbh(); GetOptions( # skip update queries, emails, log file & tracker @@ -56,34 +48,13 @@ my $tracker = ProgressTracker->new(); $tracker->start_stage("set_pdus_gfv") unless $noop; -# This gets us the items to update in update_gfv. -my $select_gfv_sql = join( - ' ', - "SELECT r.namespace, r.id, a.name", - "FROM rights_current r", - "INNER JOIN attributes a ON r.attr = a.id", - "INNER JOIN reasons e ON r.reason = e.id", - "INNER JOIN ht.feed_grin g ON r.id = g.id AND r.namespace = g.namespace", - "WHERE g.viewability = 'VIEW_FULL' AND a.name IN ('ic', 'und') AND e.name = 'bib' AND g.claimed != 'true' AND r.namespace != 'keio'" -); - -# Takes values for 2 bind-params, in the order: namespace, id. -# user, attr & reason are static, so no bind-params for those. -my $update_gfv_sql = join( - ' ', - "UPDATE rights_current", - "SET attr = '$force_attr', reason = '$force_reason', time = CURRENT_TIMESTAMP, user = '$user'", - "WHERE namespace = ? AND id = ?" -); -my $update_gfv_sth = $dbh->prepare($update_gfv_sql) || die "could not prepare query: $update_gfv_sql"; +my $grin_gfv = grin_gfv->new; +my $updates = $grin_gfv->updates_to_gfv; # Loop over the relevant items and update their attr/reason -foreach my $row (@{$dbh->selectall_arrayref($select_gfv_sql)}) { - my ($namespace, $id, $attrname) = @$row; - $email_body .= "$namespace.$id\t$attrname\n"; - $volcount++; +foreach my $update (@$updates) { unless ($noop) { - $update_gfv_sth->execute($namespace, $id); + $grin_gfv->write_update_to_gfv($update); $tracker->inc(); } } @@ -91,46 +62,13 @@ # Send first email unless ($noop) { $mailer = new_mailer("New ic/und but VIEW_FULL volumes"); - print $mailer "$volcount volumes set to pdus/gfv at " . CORE::localtime() . "\n"; - print $mailer "$email_body"; + print $mailer $grin_gfv->updates_to_gfv_report($updates); $mailer->close() or warn("Couldn't send message: $!"); } #### Step 2: REVERT FORMERLY VIEW_FULL ITEMS #### $tracker->start_stage("revert_pdus_gfv") unless $noop; -my $select_revert_sql = join( - ' ', - "SELECT r.namespace, r.id", - "FROM rights_current r", - "INNER JOIN ht.feed_grin g ON r.id = g.id AND r.namespace = g.namespace", - "WHERE r.reason='12' AND (g.viewability != 'VIEW_FULL' OR g.claimed = 'true' OR r.namespace = 'keio')" -); - -# Takes values for 2 bind-params, in the order: id, namespace. -my $select_old_sql = join( - ' ', - "SELECT attr, reason, source", - "FROM rights_log", - "WHERE id = ? AND namespace = ? AND reason != '12'", - "ORDER BY time DESC", - "LIMIT 1" -); -my $select_old_sth = $dbh->prepare($select_old_sql) || die "could not prepare query: $select_old_sql"; - -# Takes values for 4 bind-params in the order: oldattr, oldreason, namespace, id. -# user, note & time are static so not a bind-params for them. -my $update_revert_sql = join( - ' ', - "UPDATE rights_current", - "SET attr = ?, reason = ?, user = '$user', time = CURRENT_TIMESTAMP, note = '$r_note'", - "WHERE namespace = ? AND id = ?" -); -my $update_revert_sth = $dbh->prepare($update_revert_sql) || die "could not prepare query: $update_revert_sql"; - -# Start second email -$email_body = "Reverting pdus/gfv volumes that are no longer VIEW_FULL\n"; - # Open file for which to record reverted barcodes my $barcode_log = sprintf( '%s/barcodes_%s_revert_gfv_feed', @@ -139,22 +77,12 @@ ); open(my $fh, ">>", $barcode_log) or die("can't open $barcode_log: $!"); -# This loop does a couple of things... -foreach my $row (@{$dbh->selectall_arrayref($select_revert_sql)}) { - # get the namespace and id for the item to update - my ($namespace, $id) = @$row; - - # get the item's most recent non-gfv attr/reason - $select_old_sth->execute($id, $namespace); - my ($oldattr, $oldreason) = $select_old_sth->fetchrow_array(); - +my $reversions = $grin_gfv->reversions_from_gfv; +foreach my $reversion (@$reversions) { unless ($noop) { - # append item to email body and print barcode to log file - $email_body .= "\t$namespace.$id\n"; - print $fh "$namespace.$id\n"; - + print $fh "$reversion->{namespace}.$reversion->{id}\n"; # update item in rights_current with the old attr/reason - $update_revert_sth->execute($oldattr, $oldreason, $namespace, $id); + $grin_gfv->write_reversion_from_gfv($reversion); $tracker->inc(); } } @@ -163,12 +91,11 @@ unless ($noop) { # Send the second email and we're done $mailer = new_mailer("Old pdus/gfv volumes no longer VIEW_FULL"); - print $mailer "$email_body"; + print $mailer $grin_gfv->reversions_from_gfv_report($reversions); $mailer->close() or warn("Couldn't send message: $!"); } $tracker->finalize() unless $noop; -$dbh->disconnect; # The 2 emails sent only differ in subject and body, # so we can do everything else using the same template. diff --git a/bin/new_grin_gfv.pl b/bin/new_grin_gfv.pl deleted file mode 100644 index 52462b2..0000000 --- a/bin/new_grin_gfv.pl +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env perl - -# Does things in 2 steps. -# Each step consists of running a set of queries, -# and sending an email about what was done. -# -# Step 1: -# Updates ic/bib and und/bib rows to pdus/gfv in rights_current -# if they have viewability 'VIEW_FULL' GRIN and are not marked as CLAIMED and are not Keio. -# CLAIMED items are items with the CLAIMED flag is set, items where the rights holder has given permission -# to Google to make the item VIEW_FULL. That permission does not extend to HathiTrust. -# We don't do it for Keio because there are items that are PD in Japan that are not PD in the US (e.g. icus) - -# Step 2: -# Updates pdus/gfv rows in rights_current records -# with their previous attr&reason -# if they no longer have viewability 'VIEW_FULL' in GRIN. - -use strict; -use warnings; - -use lib "$ENV{ROOTDIR}/perl_lib"; - -use Date::Manip qw(ParseDate UnixDate); -use Getopt::Long; -use Mail::Mailer; -use ProgressTracker; -use YAML::XS; - -use Database; -use grin_gfv; - -my $noop = undef; # set with --noop -my $mailer = undef; - -# config -my $config_dir = $ENV{CONFIG_DIR} || '/usr/src/app/config'; -my $config_yaml = "$config_dir/rights.yml"; -my $config = YAML::XS::LoadFile($config_yaml); -my $rights_dir = $config->{rights}->{rights_dir}; - -GetOptions( - # skip update queries, emails, log file & tracker - 'noop=s' => \$noop, -); - -#### Step 1: UPDATE ITEMS TO FULL VIEW #### -my $tracker = ProgressTracker->new(); -$tracker->start_stage("set_pdus_gfv") unless $noop; - -my $grin_gfv = grin_gfv->new; -my $updates = $grin_gfv->updates_to_gfv; - -# Loop over the relevant items and update their attr/reason -foreach my $update (@$updates) { - unless ($noop) { - $grin_gfv->write_update_to_gfv($update); - $tracker->inc(); - } -} - -# Send first email -unless ($noop) { - $mailer = new_mailer("New ic/und but VIEW_FULL volumes"); - print $mailer $grin_gfv->updates_to_gfv_report($updates); - $mailer->close() or warn("Couldn't send message: $!"); -} - -#### Step 2: REVERT FORMERLY VIEW_FULL ITEMS #### -$tracker->start_stage("revert_pdus_gfv") unless $noop; - -# Open file for which to record reverted barcodes -my $barcode_log = sprintf( - '%s/barcodes_%s_revert_gfv_feed', - $rights_dir, - UnixDate(ParseDate("now"), '%Y-%m-%d_%H-%M-%S') -); -open(my $fh, ">>", $barcode_log) or die("can't open $barcode_log: $!"); - -my $reversions = $grin_gfv->reversions_from_gfv; -foreach my $reversion (@$reversions) { - unless ($noop) { - print $fh "$reversion->{namespace}.$reversion->{id}\n"; - # update item in rights_current with the old attr/reason - $grin_gfv->write_reversion_from_gfv($reversion); - $tracker->inc(); - } -} -close($fh); - -unless ($noop) { - # Send the second email and we're done - $mailer = new_mailer("Old pdus/gfv volumes no longer VIEW_FULL"); - print $mailer $grin_gfv->reversions_from_gfv_report($reversions); - $mailer->close() or warn("Couldn't send message: $!"); -} - -$tracker->finalize() unless $noop; - -# The 2 emails sent only differ in subject and body, -# so we can do everything else using the same template. -sub new_mailer { - my $subject = shift; - my $mailer = new Mail::Mailer; - my $to_addr = join( - ', ', - split(' ', $ENV{'TO_ADDRESSES'}) - ); - - my $email = { - 'From' => $ENV{'FROM_ADDRESS'}, - 'Subject' => $subject, - 'To' => $to_addr - }; - - $mailer->open($email); -} From 928679d9d8144b28974e30329c4d3916614a023f Mon Sep 17 00:00:00 2001 From: Brian Moses Hall Date: Mon, 25 Aug 2025 15:29:19 -0400 Subject: [PATCH 6/8] Add newline at end of file --- perl_lib/grin_gfv.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perl_lib/grin_gfv.pm b/perl_lib/grin_gfv.pm index fe2f4f1..9a4d302 100644 --- a/perl_lib/grin_gfv.pm +++ b/perl_lib/grin_gfv.pm @@ -219,4 +219,4 @@ sub _rights_log_data { return $rights_log_data; } -1; \ No newline at end of file +1; From 9b2b05f3715c9859cf6a0b01d12348c65adece49 Mon Sep 17 00:00:00 2001 From: Brian Moses Hall Date: Thu, 28 Aug 2025 15:01:18 -0400 Subject: [PATCH 7/8] Update `write_reversion_from_gfv` test to use realistic revert-to rights ic/bib --- t/grin_gfv.t | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/t/grin_gfv.t b/t/grin_gfv.t index 9fed184..9f0e489 100644 --- a/t/grin_gfv.t +++ b/t/grin_gfv.t @@ -196,20 +196,21 @@ subtest 'reversions_from_gfv_report' => sub { subtest 'write_reversion_from_gfv' => sub { my $test_namespace = 'gfvtest'; my $test_id = 'write_reversion'; - # Insert pdus/gfv row for this id which we will then revert to icus/gatt. + my ($revert_to_attr, $revert_to_reason) = (2, 1); # ic/bib + # Insert pdus/gfv row for this id which we will then revert to ic/bib. $rights_current_sth->execute($test_namespace, $test_id, $grin_gfv::GFV_ATTR_ID, $grin_gfv::GFV_REASON_ID); my $grin_gfv = grin_gfv->new; my $reversion = { - attr => 19, - reason => 17, + attr => $revert_to_attr, + reason => $revert_to_reason, namespace => $test_namespace, id => $test_id }; $grin_gfv->write_reversion_from_gfv($reversion); my $sql = 'SELECT attr, reason FROM rights_current WHERE namespace = ? AND id = ?'; my ($attr, $reason) = $dbh->selectrow_array($sql, undef, $test_namespace, $test_id); - is($attr, 19); - is($reason, 17); + is($attr, $revert_to_attr); + is($reason, $revert_to_reason); }; done_testing; From 8739cb1ddf568b7914e579df37ce584452701daf Mon Sep 17 00:00:00 2001 From: Brian Moses Hall Date: Thu, 28 Aug 2025 15:16:16 -0400 Subject: [PATCH 8/8] Back out experimental version of db-image providing feed_* tables --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 6d2a85a..bb1c311 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -64,7 +64,7 @@ services: command: prove mariadb: - image: ghcr.io/hathitrust/db-image:updates_aug_2025 + image: ghcr.io/hathitrust/db-image:latest volumes: - ./sql/ingest.sql:/docker-entrypoint-initdb.d/999-ingest.sql - ./sql/hathifiles.sql:/docker-entrypoint-initdb.d/999-hathifiles.sql