Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ GEM
puma (7.0.4)
nio4r (~> 2.0)
racc (1.8.1)
rack (3.2.2)
rack (3.2.3)
rack-session (2.1.1)
base64 (>= 0.1.0)
rack (>= 3.0.0)
Expand Down
15 changes: 12 additions & 3 deletions app/lib/otis/log_importer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -119,15 +119,24 @@ def translate_remote_user(remote_user)
end
end

# Translates a "seq" log field into a count of comma-delimited seq numbers.
# Returns a nonzero page count or nil
def extract_pages_from_seq(seq)
return nil if seq.nil?

count = seq.split(",").count
count.zero? ? nil : count
end

private

# Returns true iff all of these are true:
# role is "ssdproxy"
# role is "ssdproxy" or "resource_sharing"
# access is "success"
# mode is "download"
# datetime is on or after last import
def relevant_log_entry?(entry)
entry["role"] == "ssdproxy" &&
(entry["role"] == "ssdproxy" || entry["role"] == "resource_sharing") &&
entry["access"] == "success" &&
entry["mode"] == "download" &&
Time.parse(entry["datetime"]) >= last_import
Expand All @@ -145,7 +154,7 @@ def create_report(entry)
email: translate_remote_user(entry["remote_user_processed"]),
inst_code: entry["inst_code"],
role: entry["role"],
pages: entry["seq"].split(",").count
pages: extract_pages_from_seq(entry["seq"])
)
# Call `.save` and not `.save!` on this object because its SHA may
# violate uniqueness, which is perfectly okay.
Expand Down
5 changes: 2 additions & 3 deletions app/models/ht_contact.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

# Map of institution email contact and type
class HTContact < ApplicationRecord
self.table_name = "otis_contacts"
self.table_name = "ht_repository.otis_contacts"
self.primary_key = "id"

belongs_to :ht_institution, foreign_key: :inst_id, primary_key: :inst_id, required: true
belongs_to :ht_contact_type, foreign_key: :contact_type, primary_key: :id, required: true
Expand All @@ -13,6 +14,4 @@ class HTContact < ApplicationRecord
validates :inst_id, presence: true
validates :contact_type, presence: true
validates :email, presence: true, format: {with: URI::MailTo::EMAIL_REGEXP}

self.primary_key = "id"
end
5 changes: 2 additions & 3 deletions app/models/ht_contact_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

# Name and description for type of institution contact
class HTContactType < ApplicationRecord
self.table_name = "otis_contact_types"
self.table_name = "ht_repository.otis_contact_types"
self.primary_key = "id"

validates :name, presence: true, uniqueness: true, allow_blank: false
validates :description, presence: true, allow_blank: false
Expand All @@ -11,8 +12,6 @@ class HTContactType < ApplicationRecord

before_destroy :check_contacts, prepend: true

self.primary_key = "id"

private

def check_contacts
Expand Down
7 changes: 5 additions & 2 deletions app/models/ht_download.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# frozen_string_literal: true

# Only used read-only in Otis for reporting
# Read-only in the web interface.
# Written by log_import rake task.
class HTDownload < ApplicationRecord
self.table_name = "otis_downloads"
self.table_name = "ht_repository.otis_downloads"
self.primary_key = "id"

# default_scope { order(:datetime) }
has_one :ht_hathifile, foreign_key: :htid, primary_key: :htid
has_one :ht_institution, foreign_key: :inst_id, primary_key: :inst_code
Expand Down
3 changes: 2 additions & 1 deletion spec/fixtures/ulib-logs/README.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
For use by spec/lib/otis/log_importer_spec.rb

access-imgsrv_downloads.log-20250901 - all well-formed JSON, one qualifying ssdproxy entry
access-imgsrv_downloads.log-20250902.gz - first line ill-formed JSON, one qualifying ssdproxy entry
access-imgsrv_downloads.log-20250902.gz - first line ill-formed JSON, one qualifying ssdproxy
entry and one qualifying resource_sharing entry
Binary file not shown.
40 changes: 30 additions & 10 deletions spec/lib/otis/log_importer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
}
let(:text_log) { Rails.root.join(*fixtures_dir, "access-imgsrv_downloads.log-20250901") }
let(:gzip_log) { Rails.root.join(*fixtures_dir, "access-imgsrv_downloads.log-20250902.gz") }
let(:expected_ssdproxy_delta) { 2 }
let(:expected_resource_sharing_delta) { 1 }
let(:expected_delta) { expected_ssdproxy_delta + expected_resource_sharing_delta }

around(:each) do |example|
HTDownload.delete_all
Expand All @@ -20,10 +23,10 @@
end

describe "#run" do
it "populates the table with two entries" do
it "populates the table with the expected number of entries" do
expect do
importer.run
end.to change { HTDownload.count }.by(2)
end.to change { HTDownload.count }.by(expected_delta)
end

it "creates full-populated records" do
Expand All @@ -46,26 +49,29 @@
it "records page count for partial records" do
importer.run

# Both the qualifying records in the fixtures are partial downloads with
# Both the qualifying ssdproxy records in the fixtures are partial downloads with
# 42 pages
download = HTDownload.first
download = HTDownload.where(role: "ssdproxy").first
expect(download.partial?).to be true
expect(download.pages).to eq 42
end

it "records role" do
importer.run

# Both the qualifying records in the fixtures are from ssdproxy
download = HTDownload.first
# Both the qualifying records with is_partial=1 in the fixtures are from ssdproxy
download = HTDownload.where(is_partial: 1).first
expect(download.role).to eq "ssdproxy"
# The resource sharing example is not partial
download = HTDownload.where(is_partial: 0).first
expect(download.role).to eq "resource_sharing"
end

it "records useful stats" do
importer.run
expect(importer.stats[:files_scanned]).to eq(2)
expect(importer.stats[:entries_found]).to eq(2)
expect(importer.stats[:entries_added]).to eq(2)
expect(importer.stats[:entries_found]).to eq(expected_delta)
expect(importer.stats[:entries_added]).to eq(expected_delta)
end

it "ignores entries earlier than the last run file" do
Expand Down Expand Up @@ -135,11 +141,11 @@
end
end

context "with a gzipped log file having one relevant entry and one malformed entry" do
context "with a gzipped log file having two relevant entries and one malformed entry" do
it "adds one entry to the database" do
expect do
importer.process_file(source_file: gzip_log, log_file: gzip_log)
end.to change { HTDownload.count }.by(1)
end.to change { HTDownload.count }.by(2)
end
end

Expand Down Expand Up @@ -167,4 +173,18 @@
end
end
end

describe "#extract_pages_from_seq" do
it "returns the count when there are seq numbers" do
expect(importer.extract_pages_from_seq("1,2,3")).to eq(3)
end

it "returns `nil` when there are no seq numbers" do
expect(importer.extract_pages_from_seq("")).to be_nil
end

it "returns `nil` when `seq` is `nil`" do
expect(importer.extract_pages_from_seq("")).to be_nil
end
end
end
1 change: 1 addition & 0 deletions spec/models/ht_download_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
end

describe "#pages" do
# Note: this is testing the factory
it "has positive pages when partial" do
build(:ht_download, is_partial: 1) do |download|
expect(download.pages).to be >= 1
Expand Down
2 changes: 1 addition & 1 deletion test/factories.rb
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@
datetime { datetime }
htid { Faker::Lorem.unique.characters(number: 10) }
is_partial { [false, true].sample }
pages { is_partial ? rand(100) : nil }
pages { is_partial ? rand(1..100) : nil }
email { Faker::Internet.email }
inst_code { Faker::Internet.unique.domain_word }
role { %w[resource_sharing ssdproxy].sample }
Expand Down