From f6e4f8057f68afd8d9224cafc0ed6818ecf5553a Mon Sep 17 00:00:00 2001 From: "Brian \"Moses\" Hall" Date: Wed, 17 Dec 2025 13:01:50 -0500 Subject: [PATCH 1/9] ETT-1140 catalog needs a default error page -- Bib API side - Right now this just has some tests for successful queries against `htid` and `recordnumber`. - TODO: handle any errors that make it thru the rewrite rules, in `static/api/volumes.php` - TODO: provide an `api/error.tpl` file for API queries that don't conform to a structure the rewrite rules can handle. --- playwright/test/slow/api.spec.js | 49 +++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/playwright/test/slow/api.spec.js b/playwright/test/slow/api.spec.js index ab53149d..ed359691 100644 --- a/playwright/test/slow/api.spec.js +++ b/playwright/test/slow/api.spec.js @@ -11,4 +11,51 @@ test('XML with full CID', async ({ page }) => { test('XML with truncated CID', async ({ page }) => { const response = await page.goto(`/Record/${test_truncated_cid}.xml`); await expect(response.ok()).toBeTruthy(); -}); \ No newline at end of file +}); + +// API JSON resonses are of the form +// {records: {cid: {...}, items: [...]} + +test('Bib API catalog record brief', async ({ page }) => { + const response = await page.goto(`/api/volumes/brief/recordnumber/${test_cid}.json`); + expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('application/json'); + const body = await response.json(); + expect(body.records).toHaveProperty(test_cid); + // No marc-xml property + expect(body.records[test_cid]).not.toHaveProperty("marc-xml"); + expect(body.items.length).toBeGreaterThan(0); +}); + +test('Bib API catalog record full', async ({ page }) => { + const response = await page.goto(`/api/volumes/full/recordnumber/${test_cid}.json`); + expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('application/json'); + const body = await response.json(); + expect(body.records).toHaveProperty(test_cid); + // Has marc-xml property + expect(body.records[test_cid]).toHaveProperty("marc-xml"); + expect(body.items.length).toBeGreaterThan(0); +}); + +test('Bib API HTID brief', async ({ page }) => { + const response = await page.goto(`/api/volumes/brief/htid/${test_htid}.json`); + expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('application/json'); + const body = await response.json(); + expect(body.records).toHaveProperty(test_cid); + // No marc-xml property + expect(body.records[test_cid]).not.toHaveProperty("marc-xml"); + expect(body.items.length).toBeGreaterThan(0); +}); + +test('Bib API HTID full', async ({ page }) => { + const response = await page.goto(`/api/volumes/full/htid/${test_htid}.json`); + expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('application/json'); + const body = await response.json(); + expect(body.records).toHaveProperty(test_cid); + // Has marc-xml property + expect(body.records[test_cid]).toHaveProperty("marc-xml"); + expect(body.items.length).toBeGreaterThan(0); +}); From 330c8bb3e102fdae63463ad4bc826952d4fd2d3e Mon Sep 17 00:00:00 2001 From: "Brian \"Moses\" Hall" Date: Wed, 17 Dec 2025 13:26:19 -0500 Subject: [PATCH 2/9] Restore test_htid constant dropped when moving to new branch --- playwright/test/slow/api.spec.js | 1 + 1 file changed, 1 insertion(+) diff --git a/playwright/test/slow/api.spec.js b/playwright/test/slow/api.spec.js index ed359691..2fc9fa77 100644 --- a/playwright/test/slow/api.spec.js +++ b/playwright/test/slow/api.spec.js @@ -2,6 +2,7 @@ const { test, expect } = require('@playwright/test'); const test_cid = '002312286'; // "Kōkogaku zasshi" arbitrarily chosen const test_truncated_cid = '2312286'; // Truncated version +const test_htid = 'mdp.39015048895836'; // One of the htids on test_cid test('XML with full CID', async ({ page }) => { const response = await page.goto(`/Record/${test_cid}.xml`); From d54eebe9a1a54114e568d5d6c4887836bf9f4197 Mon Sep 17 00:00:00 2001 From: "Brian \"Moses\" Hall" Date: Fri, 19 Dec 2025 16:26:38 -0500 Subject: [PATCH 3/9] - Since two of the Bib API enpoints in .htaccess don't provide `brevity` params, make sure it is set in `volumes.php` if not set. - Pass `brevity` parameter to `QObj` initializer so `$brevity` does not have to be flagged as a global variable. --- static/api/volumes.php | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/static/api/volumes.php b/static/api/volumes.php index defaf685..db039f43 100644 --- a/static/api/volumes.php +++ b/static/api/volumes.php @@ -81,7 +81,14 @@ 'rows' => 200 ); -if ($_REQUEST['brevity'] == 'full') { +// Set brevity to the default value of "brief" as brevity is not always +// provided by the rewrite rules in .htaccess +$brevity = 'brief'; +if (isset($_REQUEST['brevity']) && $_REQUEST['brevity'] == 'full') { + $brevity = 'full'; +} + +if ($brevity == 'full') { $commonargs['fl'] = $commonargs['fl'] . ',fullrecord'; } @@ -104,6 +111,7 @@ class QObj { public $string; + public $brevity; private $_id; public $tspecs = array(); # Transformed specs public $qspecs = array(); # Query specs for solr @@ -111,11 +119,12 @@ class QObj - function __construct($str) { + function __construct($str, $brevity) { global $validField; global $fieldmap; $this->string = $str; + $this->brevity = $brevity; $specs = explode(';', $str); foreach ($specs as $spec) { @@ -266,7 +275,7 @@ function recordsStructure($docs) { $rinfo[$index . 's'] = array(); } } - if ($_REQUEST['brevity'] == 'full') { + if ($this->brevity == 'full') { $rinfo['marc-xml'] = $doc['fullrecord']; } $records[$docid] = $rinfo; @@ -317,7 +326,7 @@ function itemsStructure($docs) { foreach ($qstrings as $qstring) { - $nqo = new QObj($qstring); + $nqo = new QObj($qstring, $brevity); $solrQueryComponents = array_merge($solrQueryComponents, $nqo->qspecs); $qobjs[$qstring] = $nqo; } From db549815942966c54c1e1eaf62c7eb42f0ee8150 Mon Sep 17 00:00:00 2001 From: "Brian \"Moses\" Hall" Date: Fri, 19 Dec 2025 16:29:28 -0500 Subject: [PATCH 4/9] - Add playwright tests for all Bib API endpoints, some common errors, and htid-to-MARCXML facility. --- playwright/test/slow/api.spec.js | 125 +++++++++++++++++++++++++++++-- 1 file changed, 118 insertions(+), 7 deletions(-) diff --git a/playwright/test/slow/api.spec.js b/playwright/test/slow/api.spec.js index 2fc9fa77..b9d73837 100644 --- a/playwright/test/slow/api.spec.js +++ b/playwright/test/slow/api.spec.js @@ -1,23 +1,37 @@ const { test, expect } = require('@playwright/test'); const test_cid = '002312286'; // "Kōkogaku zasshi" arbitrarily chosen +const test_cid2 = '100673017'; // "Kōkogaku zasshi" arbitrarily chosen const test_truncated_cid = '2312286'; // Truncated version const test_htid = 'mdp.39015048895836'; // One of the htids on test_cid +const test_htid2 = 'umn.31951d03005375z'; // Another htid, this one corresponding test_cid2 + test('XML with full CID', async ({ page }) => { const response = await page.goto(`/Record/${test_cid}.xml`); - await expect(response.ok()).toBeTruthy(); + expect(response.ok()).toBeTruthy(); + expect(response.headers()["content-type"]).toContain('text/xml'); }); test('XML with truncated CID', async ({ page }) => { const response = await page.goto(`/Record/${test_truncated_cid}.xml`); - await expect(response.ok()).toBeTruthy(); + expect(response.ok()).toBeTruthy(); + expect(response.headers()["content-type"]).toContain('text/xml'); +}); + +test('XML with HTID', async ({ page }) => { + const response = await page.goto(`/MARCXML/${test_htid}`); + expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('text/xml'); }); -// API JSON resonses are of the form +// See https://github.com/hathitrust/catalog/wiki/Volume-API +// for an explanation of these inscrutable "b/qf/qv.t", "b/t/Q" codes. +// API JSON single-record responses are of the form // {records: {cid: {...}, items: [...]} +// =========== single-id query, b/qf/qv.t endpoint -test('Bib API catalog record brief', async ({ page }) => { +test('Bib API b/qf/qv.t brief recordnumber', async ({ page }) => { const response = await page.goto(`/api/volumes/brief/recordnumber/${test_cid}.json`); expect(response.status()).toBe(200); expect(response.headers()["content-type"]).toContain('application/json'); @@ -28,7 +42,7 @@ test('Bib API catalog record brief', async ({ page }) => { expect(body.items.length).toBeGreaterThan(0); }); -test('Bib API catalog record full', async ({ page }) => { +test('Bib API b/qf/qv.t full recordnumber', async ({ page }) => { const response = await page.goto(`/api/volumes/full/recordnumber/${test_cid}.json`); expect(response.status()).toBe(200); expect(response.headers()["content-type"]).toContain('application/json'); @@ -39,7 +53,7 @@ test('Bib API catalog record full', async ({ page }) => { expect(body.items.length).toBeGreaterThan(0); }); -test('Bib API HTID brief', async ({ page }) => { +test('Bib API b/qf/qv.t brief htid', async ({ page }) => { const response = await page.goto(`/api/volumes/brief/htid/${test_htid}.json`); expect(response.status()).toBe(200); expect(response.headers()["content-type"]).toContain('application/json'); @@ -50,7 +64,7 @@ test('Bib API HTID brief', async ({ page }) => { expect(body.items.length).toBeGreaterThan(0); }); -test('Bib API HTID full', async ({ page }) => { +test('Bib API b/qf/qv.t full htid', async ({ page }) => { const response = await page.goto(`/api/volumes/full/htid/${test_htid}.json`); expect(response.status()).toBe(200); expect(response.headers()["content-type"]).toContain('application/json'); @@ -60,3 +74,100 @@ test('Bib API HTID full', async ({ page }) => { expect(body.records[test_cid]).toHaveProperty("marc-xml"); expect(body.items.length).toBeGreaterThan(0); }); + +// =========== single-id query, b/t/Q endpoint +test('Bib API b/t/Q brief 1-htid', async ({ page }) => { + const response = await page.goto(`/api/volumes/brief/json/htid:${test_htid}`); + expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('application/json'); + const body = await response.json(); + expect(body[`htid:${test_htid}`]).toHaveProperty('records'); + expect(body[`htid:${test_htid}`]).toHaveProperty('items'); + // Has no marc-xml property + expect(body[`htid:${test_htid}`].records[test_cid]).not.toHaveProperty("marc-xml"); + expect(Object.keys(body)).toHaveLength(1); +}); + +// =========== multi-id query, b/t/Q endpoint +test('Bib API b/t/Q brief 2-htid', async ({ page }) => { + const response = await page.goto(`/api/volumes/brief/json/htid:${test_htid}|htid:${test_htid2}`); + expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('application/json'); + const body = await response.json(); + expect(body[`htid:${test_htid}`]).toHaveProperty('records'); + expect(body[`htid:${test_htid}`]).toHaveProperty('items'); + expect(body[`htid:${test_htid2}`]).toHaveProperty('records'); + expect(body[`htid:${test_htid2}`]).toHaveProperty('items'); + // Has no marc-xml property + expect(body[`htid:${test_htid}`].records[test_cid]).not.toHaveProperty("marc-xml"); + expect(body[`htid:${test_htid2}`].records[test_cid2]).not.toHaveProperty("marc-xml"); + expect(Object.keys(body)).toHaveLength(2); +}); + +// Inconsistencies and possible misfeatures +// b/t/Q allows any value for brevity and defaults to "brief" +// This is inconsistent with b/qf/qv.t which requires b to be in {brief,full} +test('Bib API b/t/Q b? 200', async ({ page }) => { + const response = await page.goto(`/api/volumes/blah/json/htid:${test_htid}`); + expect(response.status()).toBe(200); +}); + +// Error conditions (rewrite side) +// These will be caught by the rewrite rules and return 404 +// These tests time out with firefox but succeed with all others +test('Bib API b/qf/qv.t b? 404', async ({ page, browserName }) => { + test.skip(browserName === 'firefox', 'times out with firefox for unknown reason'); + const response = await page.goto(`/api/volumes/blah/htid/${test_htid}.json`); + expect(response.status()).toBe(404); +}); + +test('Bib API b/qf/qv.t t? 404', async ({ page, browserName }) => { + test.skip(browserName === 'firefox', 'times out with firefox for unknown reason'); + const response = await page.goto(`/api/volumes/brief/htid/${test_htid}.blah`); + expect(response.status()).toBe(404); +}); + +test('Bib API b/t/Q t? 404', async ({ page, browserName }) => { + test.skip(browserName === 'firefox', 'times out with firefox for unknown reason'); + const response = await page.goto(`/api/volumes/brief/blah/htid:${test_htid}`); + expect(response.status()).toBe(404); +}); + +// Error conditions (volumes.php side) +// These will be caught by volumes.php and return 400 +test('Bib API b/qf/qv.t qf? 400', async ({ page }) => { + const response = await page.goto(`/api/volumes/brief/blah/${test_htid}.json`); + expect(response.status()).toBe(400); +}); + +test('Bib API b/t/Q qf? 400', async ({ page }) => { + const response = await page.goto(`/api/volumes/brief/json/blah:${test_htid}`); + expect(response.status()).toBe(400); +}); + +// Empty results +test('Bib API b/qf/qv.t qv? 200', async ({ page }) => { + const response = await page.goto('/api/volumes/brief/htid/blah.json'); + expect(response.status()).toBe(200); +}); + +test('Bib API b/t/Q qv? 200', async ({ page }) => { + const response = await page.goto('/api/volumes/brief/json/htid:blah'); + expect(response.status()).toBe(200); +}); + +// Endpoint variants with implied brevity "brief" +test('Bib API t/Q htid', async ({ page }) => { + const response = await page.goto(`/api/volumes/json/htid:${test_htid}`); + expect(response.status()).toBe(200); + const body = await response.json(); + expect(body[`htid:${test_htid}`]).toHaveProperty('records'); + expect(body[`htid:${test_htid}`]).toHaveProperty('items'); +}); + +test('Bib API qf/qv.t htid', async ({ page }) => { + const response = await page.goto(`/api/volumes/htid/${test_htid}.json`); + expect(response.status()).toBe(200); + const body = await response.json(); + expect(body.items.length).toBeGreaterThan(0); +}); From 36108994882ff2d91ebf9633fe9a52a64398e673 Mon Sep 17 00:00:00 2001 From: "Brian \"Moses\" Hall" Date: Fri, 19 Dec 2025 17:09:41 -0500 Subject: [PATCH 5/9] Do not try to load an error template when the problem is a Bib API 404 --- index.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/index.php b/index.php index 9e80f945..2f699390 100755 --- a/index.php +++ b/index.php @@ -341,6 +341,11 @@ function handlePEARError($error, $method = null) { $interface->assign('error', $error); $interface->assign('module', $module); header('HTTP/1.1 404 Not Found'); + // If the module was Bib API ("api") but the rewrite rules could not parse the URL + // then we could provide a developer error string in JSON form. For now, bail out. + if ($module == 'api') { + exit(); + } $interface->setTemplate('error.tpl'); $interface->display('layout.tpl'); From e219341ca749c037ed5e94e29f455256ae56fa37 Mon Sep 17 00:00:00 2001 From: "Brian \"Moses\" Hall" Date: Mon, 22 Dec 2025 09:59:21 -0500 Subject: [PATCH 6/9] Fix pasted-in comment on API playwright test --- playwright/test/slow/api.spec.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/playwright/test/slow/api.spec.js b/playwright/test/slow/api.spec.js index b9d73837..bb474fe7 100644 --- a/playwright/test/slow/api.spec.js +++ b/playwright/test/slow/api.spec.js @@ -1,7 +1,7 @@ const { test, expect } = require('@playwright/test'); const test_cid = '002312286'; // "Kōkogaku zasshi" arbitrarily chosen -const test_cid2 = '100673017'; // "Kōkogaku zasshi" arbitrarily chosen +const test_cid2 = '100673017'; // "Tests of a portable wood chipper..." also arbitrarily chosen const test_truncated_cid = '2312286'; // Truncated version const test_htid = 'mdp.39015048895836'; // One of the htids on test_cid const test_htid2 = 'umn.31951d03005375z'; // Another htid, this one corresponding test_cid2 From ee20d4feac2a406fe09f93040245cd4a435f796c Mon Sep 17 00:00:00 2001 From: "Brian \"Moses\" Hall" Date: Tue, 23 Dec 2025 15:27:23 -0500 Subject: [PATCH 7/9] - Empty response `{records: {}, items: []}` from single-record search was being returned as HTML instead of `application/json`. - Fixed in `volumes.php` and tested in playwright. - Explicitly test emptiness of empty responses. - Add notes about the forms `records` may take in different JSON structures due to the stupidity of PHP. --- playwright/test/slow/api.spec.js | 15 +++++++++++++++ static/api/volumes.php | 4 ++++ 2 files changed, 19 insertions(+) diff --git a/playwright/test/slow/api.spec.js b/playwright/test/slow/api.spec.js index bb474fe7..9605bba2 100644 --- a/playwright/test/slow/api.spec.js +++ b/playwright/test/slow/api.spec.js @@ -148,12 +148,27 @@ test('Bib API b/t/Q qf? 400', async ({ page }) => { // Empty results test('Bib API b/qf/qv.t qv? 200', async ({ page }) => { const response = await page.goto('/api/volumes/brief/htid/blah.json'); + const body = await response.json(); expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('application/json'); + // Single-record results have a `records` hash because volumes.php + // returns JSON crafted to correctly represent it as `{}`. + // Multi-record results do not have the opportunity to do so, so they show up with + // the `records` as `[]`, see below. + expect(body.records).toStrictEqual({}); + expect(body.items).toStrictEqual([]); }); test('Bib API b/t/Q qv? 200', async ({ page }) => { const response = await page.goto('/api/volumes/brief/json/htid:blah'); + const body = await response.json(); expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('application/json'); + // Multi-records are keyed from outside, if you will, so the records are in an array + // instead of a hash (compare and contrast the preceding example). + // This may be considered buggy behavior. + expect(body['htid:blah'].records).toStrictEqual([]); + expect(body['htid:blah'].items).toStrictEqual([]); }); // Endpoint variants with implied brevity "brief" diff --git a/static/api/volumes.php b/static/api/volumes.php index db039f43..edb638c6 100644 --- a/static/api/volumes.php +++ b/static/api/volumes.php @@ -396,6 +396,10 @@ function itemsStructure($docs) { if ($_REQUEST['type'] == 'json') { if (isset($allmatches['records']) && count($allmatches['records']) == 0) { + header('Content-type: application/json; charset=UTF-8'); + // This is a hack to get the correct JSON representation of the empty `records` hash. + // Empty records can still show up serialized as "[]" in multi-record results. + // but this takes care of the simplest case. echo "{\n \"records\": {}, \"items\": []\n}"; exit; } else { From cbbe13813ea0f21859955f4c60a38e68d8161833 Mon Sep 17 00:00:00 2001 From: "Brian \"Moses\" Hall" Date: Tue, 23 Dec 2025 16:03:38 -0500 Subject: [PATCH 8/9] Return `api/volumes.php` 400 errors as JSON instead of HTML --- static/api/volumes.php | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/static/api/volumes.php b/static/api/volumes.php index edb638c6..fa769163 100644 --- a/static/api/volumes.php +++ b/static/api/volumes.php @@ -5,8 +5,12 @@ // Bail immediately if there's no query // print_r($_REQUEST); + +// This may not be possible without a 404 from the current rewrite rules. if (!isset($_REQUEST['q']) || !preg_match('/\S/', $_REQUEST['q'])) { header("HTTP/1.0 400 Malformed"); + header('Content-type: application/json; charset=UTF-8'); + echo json_encode(['message' => 'missing or empty query']); exit(); } @@ -340,7 +344,8 @@ function itemsStructure($docs) { if (!preg_match('/\S/', $q)) { header('HTTP/1.1 400 Bad Request'); $origQuery = htmlspecialchars($origQuery); - echo "Query '$origQuery' is invalid"; + header('Content-type: application/json; charset=UTF-8'); + echo json_encode(['message' => "query '$origQuery' is invalid"]); exit(); } @@ -405,7 +410,7 @@ function itemsStructure($docs) { } else { $json = json_encode($allmatches); if (isset($_REQUEST['callback'])) { - header('Content-type: application/javascript; charset=UTF-8'); + header('Content-type: application/javascript; charset=UTF-8'); echo $_REQUEST['callback'] . "( $json)"; } else { header('Content-type: application/json; charset=UTF-8'); From 1104559d93418a1fef4fe64108c715367c4c9e6b Mon Sep 17 00:00:00 2001 From: "Brian \"Moses\" Hall" Date: Tue, 23 Dec 2025 16:04:27 -0500 Subject: [PATCH 9/9] TIDY: de-TAB-ify `static/api/volumes.php` --- static/api/volumes.php | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/static/api/volumes.php b/static/api/volumes.php index fa769163..56dd407d 100644 --- a/static/api/volumes.php +++ b/static/api/volumes.php @@ -8,10 +8,10 @@ // This may not be possible without a 404 from the current rewrite rules. if (!isset($_REQUEST['q']) || !preg_match('/\S/', $_REQUEST['q'])) { - header("HTTP/1.0 400 Malformed"); - header('Content-type: application/json; charset=UTF-8'); + header("HTTP/1.0 400 Malformed"); + header('Content-type: application/json; charset=UTF-8'); echo json_encode(['message' => 'missing or empty query']); - exit(); + exit(); } require_once 'PEAR.php'; @@ -140,7 +140,7 @@ function __construct($str, $brevity) { if ($field == 'id') { $this->_id = $fv[1]; - continue; + continue; } if (!isset($fv[1])) { continue; @@ -307,7 +307,7 @@ function itemsStructure($docs) { $iinfo['itemURL'] = "https://babel.hathitrust.org/cgi/pt?id=" . $htid; $rc = isset($ht['rights']) ? $ht['rights'] : 'ic'; - $rc = is_array($rc) ? $rc[0] : $rc; + $rc = is_array($rc) ? $rc[0] : $rc; $iinfo['rightsCode'] = $rc; $iinfo['lastUpdate'] = $ht['ingest']; $iinfo['enumcron'] = (isset($ht['enumcron']) && preg_match('/\S/', $ht['enumcron']))? $ht['enumcron'] : false;