diff --git a/index.php b/index.php index 9e80f945..2f699390 100755 --- a/index.php +++ b/index.php @@ -341,6 +341,11 @@ function handlePEARError($error, $method = null) { $interface->assign('error', $error); $interface->assign('module', $module); header('HTTP/1.1 404 Not Found'); + // If the module was Bib API ("api") but the rewrite rules could not parse the URL + // then we could provide a developer error string in JSON form. For now, bail out. + if ($module == 'api') { + exit(); + } $interface->setTemplate('error.tpl'); $interface->display('layout.tpl'); diff --git a/playwright/test/slow/api.spec.js b/playwright/test/slow/api.spec.js index ab53149d..9605bba2 100644 --- a/playwright/test/slow/api.spec.js +++ b/playwright/test/slow/api.spec.js @@ -1,14 +1,188 @@ const { test, expect } = require('@playwright/test'); const test_cid = '002312286'; // "Kōkogaku zasshi" arbitrarily chosen +const test_cid2 = '100673017'; // "Tests of a portable wood chipper..." also arbitrarily chosen const test_truncated_cid = '2312286'; // Truncated version +const test_htid = 'mdp.39015048895836'; // One of the htids on test_cid +const test_htid2 = 'umn.31951d03005375z'; // Another htid, this one corresponding test_cid2 + test('XML with full CID', async ({ page }) => { const response = await page.goto(`/Record/${test_cid}.xml`); - await expect(response.ok()).toBeTruthy(); + expect(response.ok()).toBeTruthy(); + expect(response.headers()["content-type"]).toContain('text/xml'); }); test('XML with truncated CID', async ({ page }) => { const response = await page.goto(`/Record/${test_truncated_cid}.xml`); - await expect(response.ok()).toBeTruthy(); -}); \ No newline at end of file + expect(response.ok()).toBeTruthy(); + expect(response.headers()["content-type"]).toContain('text/xml'); +}); + +test('XML with HTID', async ({ page }) => { + const response = await page.goto(`/MARCXML/${test_htid}`); + expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('text/xml'); +}); + +// See https://github.com/hathitrust/catalog/wiki/Volume-API +// for an explanation of these inscrutable "b/qf/qv.t", "b/t/Q" codes. +// API JSON single-record responses are of the form +// {records: {cid: {...}, items: [...]} +// =========== single-id query, b/qf/qv.t endpoint + +test('Bib API b/qf/qv.t brief recordnumber', async ({ page }) => { + const response = await page.goto(`/api/volumes/brief/recordnumber/${test_cid}.json`); + expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('application/json'); + const body = await response.json(); + expect(body.records).toHaveProperty(test_cid); + // No marc-xml property + expect(body.records[test_cid]).not.toHaveProperty("marc-xml"); + expect(body.items.length).toBeGreaterThan(0); +}); + +test('Bib API b/qf/qv.t full recordnumber', async ({ page }) => { + const response = await page.goto(`/api/volumes/full/recordnumber/${test_cid}.json`); + expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('application/json'); + const body = await response.json(); + expect(body.records).toHaveProperty(test_cid); + // Has marc-xml property + expect(body.records[test_cid]).toHaveProperty("marc-xml"); + expect(body.items.length).toBeGreaterThan(0); +}); + +test('Bib API b/qf/qv.t brief htid', async ({ page }) => { + const response = await page.goto(`/api/volumes/brief/htid/${test_htid}.json`); + expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('application/json'); + const body = await response.json(); + expect(body.records).toHaveProperty(test_cid); + // No marc-xml property + expect(body.records[test_cid]).not.toHaveProperty("marc-xml"); + expect(body.items.length).toBeGreaterThan(0); +}); + +test('Bib API b/qf/qv.t full htid', async ({ page }) => { + const response = await page.goto(`/api/volumes/full/htid/${test_htid}.json`); + expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('application/json'); + const body = await response.json(); + expect(body.records).toHaveProperty(test_cid); + // Has marc-xml property + expect(body.records[test_cid]).toHaveProperty("marc-xml"); + expect(body.items.length).toBeGreaterThan(0); +}); + +// =========== single-id query, b/t/Q endpoint +test('Bib API b/t/Q brief 1-htid', async ({ page }) => { + const response = await page.goto(`/api/volumes/brief/json/htid:${test_htid}`); + expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('application/json'); + const body = await response.json(); + expect(body[`htid:${test_htid}`]).toHaveProperty('records'); + expect(body[`htid:${test_htid}`]).toHaveProperty('items'); + // Has no marc-xml property + expect(body[`htid:${test_htid}`].records[test_cid]).not.toHaveProperty("marc-xml"); + expect(Object.keys(body)).toHaveLength(1); +}); + +// =========== multi-id query, b/t/Q endpoint +test('Bib API b/t/Q brief 2-htid', async ({ page }) => { + const response = await page.goto(`/api/volumes/brief/json/htid:${test_htid}|htid:${test_htid2}`); + expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('application/json'); + const body = await response.json(); + expect(body[`htid:${test_htid}`]).toHaveProperty('records'); + expect(body[`htid:${test_htid}`]).toHaveProperty('items'); + expect(body[`htid:${test_htid2}`]).toHaveProperty('records'); + expect(body[`htid:${test_htid2}`]).toHaveProperty('items'); + // Has no marc-xml property + expect(body[`htid:${test_htid}`].records[test_cid]).not.toHaveProperty("marc-xml"); + expect(body[`htid:${test_htid2}`].records[test_cid2]).not.toHaveProperty("marc-xml"); + expect(Object.keys(body)).toHaveLength(2); +}); + +// Inconsistencies and possible misfeatures +// b/t/Q allows any value for brevity and defaults to "brief" +// This is inconsistent with b/qf/qv.t which requires b to be in {brief,full} +test('Bib API b/t/Q b? 200', async ({ page }) => { + const response = await page.goto(`/api/volumes/blah/json/htid:${test_htid}`); + expect(response.status()).toBe(200); +}); + +// Error conditions (rewrite side) +// These will be caught by the rewrite rules and return 404 +// These tests time out with firefox but succeed with all others +test('Bib API b/qf/qv.t b? 404', async ({ page, browserName }) => { + test.skip(browserName === 'firefox', 'times out with firefox for unknown reason'); + const response = await page.goto(`/api/volumes/blah/htid/${test_htid}.json`); + expect(response.status()).toBe(404); +}); + +test('Bib API b/qf/qv.t t? 404', async ({ page, browserName }) => { + test.skip(browserName === 'firefox', 'times out with firefox for unknown reason'); + const response = await page.goto(`/api/volumes/brief/htid/${test_htid}.blah`); + expect(response.status()).toBe(404); +}); + +test('Bib API b/t/Q t? 404', async ({ page, browserName }) => { + test.skip(browserName === 'firefox', 'times out with firefox for unknown reason'); + const response = await page.goto(`/api/volumes/brief/blah/htid:${test_htid}`); + expect(response.status()).toBe(404); +}); + +// Error conditions (volumes.php side) +// These will be caught by volumes.php and return 400 +test('Bib API b/qf/qv.t qf? 400', async ({ page }) => { + const response = await page.goto(`/api/volumes/brief/blah/${test_htid}.json`); + expect(response.status()).toBe(400); +}); + +test('Bib API b/t/Q qf? 400', async ({ page }) => { + const response = await page.goto(`/api/volumes/brief/json/blah:${test_htid}`); + expect(response.status()).toBe(400); +}); + +// Empty results +test('Bib API b/qf/qv.t qv? 200', async ({ page }) => { + const response = await page.goto('/api/volumes/brief/htid/blah.json'); + const body = await response.json(); + expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('application/json'); + // Single-record results have a `records` hash because volumes.php + // returns JSON crafted to correctly represent it as `{}`. + // Multi-record results do not have the opportunity to do so, so they show up with + // the `records` as `[]`, see below. + expect(body.records).toStrictEqual({}); + expect(body.items).toStrictEqual([]); +}); + +test('Bib API b/t/Q qv? 200', async ({ page }) => { + const response = await page.goto('/api/volumes/brief/json/htid:blah'); + const body = await response.json(); + expect(response.status()).toBe(200); + expect(response.headers()["content-type"]).toContain('application/json'); + // Multi-records are keyed from outside, if you will, so the records are in an array + // instead of a hash (compare and contrast the preceding example). + // This may be considered buggy behavior. + expect(body['htid:blah'].records).toStrictEqual([]); + expect(body['htid:blah'].items).toStrictEqual([]); +}); + +// Endpoint variants with implied brevity "brief" +test('Bib API t/Q htid', async ({ page }) => { + const response = await page.goto(`/api/volumes/json/htid:${test_htid}`); + expect(response.status()).toBe(200); + const body = await response.json(); + expect(body[`htid:${test_htid}`]).toHaveProperty('records'); + expect(body[`htid:${test_htid}`]).toHaveProperty('items'); +}); + +test('Bib API qf/qv.t htid', async ({ page }) => { + const response = await page.goto(`/api/volumes/htid/${test_htid}.json`); + expect(response.status()).toBe(200); + const body = await response.json(); + expect(body.items.length).toBeGreaterThan(0); +}); diff --git a/static/api/volumes.php b/static/api/volumes.php index defaf685..56dd407d 100644 --- a/static/api/volumes.php +++ b/static/api/volumes.php @@ -5,9 +5,13 @@ // Bail immediately if there's no query // print_r($_REQUEST); + +// This may not be possible without a 404 from the current rewrite rules. if (!isset($_REQUEST['q']) || !preg_match('/\S/', $_REQUEST['q'])) { - header("HTTP/1.0 400 Malformed"); - exit(); + header("HTTP/1.0 400 Malformed"); + header('Content-type: application/json; charset=UTF-8'); + echo json_encode(['message' => 'missing or empty query']); + exit(); } require_once 'PEAR.php'; @@ -81,7 +85,14 @@ 'rows' => 200 ); -if ($_REQUEST['brevity'] == 'full') { +// Set brevity to the default value of "brief" as brevity is not always +// provided by the rewrite rules in .htaccess +$brevity = 'brief'; +if (isset($_REQUEST['brevity']) && $_REQUEST['brevity'] == 'full') { + $brevity = 'full'; +} + +if ($brevity == 'full') { $commonargs['fl'] = $commonargs['fl'] . ',fullrecord'; } @@ -104,6 +115,7 @@ class QObj { public $string; + public $brevity; private $_id; public $tspecs = array(); # Transformed specs public $qspecs = array(); # Query specs for solr @@ -111,11 +123,12 @@ class QObj - function __construct($str) { + function __construct($str, $brevity) { global $validField; global $fieldmap; $this->string = $str; + $this->brevity = $brevity; $specs = explode(';', $str); foreach ($specs as $spec) { @@ -127,7 +140,7 @@ function __construct($str) { if ($field == 'id') { $this->_id = $fv[1]; - continue; + continue; } if (!isset($fv[1])) { continue; @@ -266,7 +279,7 @@ function recordsStructure($docs) { $rinfo[$index . 's'] = array(); } } - if ($_REQUEST['brevity'] == 'full') { + if ($this->brevity == 'full') { $rinfo['marc-xml'] = $doc['fullrecord']; } $records[$docid] = $rinfo; @@ -294,7 +307,7 @@ function itemsStructure($docs) { $iinfo['itemURL'] = "https://babel.hathitrust.org/cgi/pt?id=" . $htid; $rc = isset($ht['rights']) ? $ht['rights'] : 'ic'; - $rc = is_array($rc) ? $rc[0] : $rc; + $rc = is_array($rc) ? $rc[0] : $rc; $iinfo['rightsCode'] = $rc; $iinfo['lastUpdate'] = $ht['ingest']; $iinfo['enumcron'] = (isset($ht['enumcron']) && preg_match('/\S/', $ht['enumcron']))? $ht['enumcron'] : false; @@ -317,7 +330,7 @@ function itemsStructure($docs) { foreach ($qstrings as $qstring) { - $nqo = new QObj($qstring); + $nqo = new QObj($qstring, $brevity); $solrQueryComponents = array_merge($solrQueryComponents, $nqo->qspecs); $qobjs[$qstring] = $nqo; } @@ -331,7 +344,8 @@ function itemsStructure($docs) { if (!preg_match('/\S/', $q)) { header('HTTP/1.1 400 Bad Request'); $origQuery = htmlspecialchars($origQuery); - echo "Query '$origQuery' is invalid"; + header('Content-type: application/json; charset=UTF-8'); + echo json_encode(['message' => "query '$origQuery' is invalid"]); exit(); } @@ -387,12 +401,16 @@ function itemsStructure($docs) { if ($_REQUEST['type'] == 'json') { if (isset($allmatches['records']) && count($allmatches['records']) == 0) { + header('Content-type: application/json; charset=UTF-8'); + // This is a hack to get the correct JSON representation of the empty `records` hash. + // Empty records can still show up serialized as "[]" in multi-record results. + // but this takes care of the simplest case. echo "{\n \"records\": {}, \"items\": []\n}"; exit; } else { $json = json_encode($allmatches); if (isset($_REQUEST['callback'])) { - header('Content-type: application/javascript; charset=UTF-8'); + header('Content-type: application/javascript; charset=UTF-8'); echo $_REQUEST['callback'] . "( $json)"; } else { header('Content-type: application/json; charset=UTF-8');