Skip to content
Open
5 changes: 5 additions & 0 deletions index.php
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,11 @@ function handlePEARError($error, $method = null) {
$interface->assign('error', $error);
$interface->assign('module', $module);
header('HTTP/1.1 404 Not Found');
// If the module was Bib API ("api") but the rewrite rules could not parse the URL
// then we could provide a developer error string in JSON form. For now, bail out.
if ($module == 'api') {
exit();
}
$interface->setTemplate('error.tpl');
$interface->display('layout.tpl');

Expand Down
180 changes: 177 additions & 3 deletions playwright/test/slow/api.spec.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,188 @@
const { test, expect } = require('@playwright/test');

const test_cid = '002312286'; // "Kōkogaku zasshi" arbitrarily chosen
const test_cid2 = '100673017'; // "Tests of a portable wood chipper..." also arbitrarily chosen
const test_truncated_cid = '2312286'; // Truncated version
const test_htid = 'mdp.39015048895836'; // One of the htids on test_cid
const test_htid2 = 'umn.31951d03005375z'; // Another htid, this one corresponding test_cid2


test('XML with full CID', async ({ page }) => {
const response = await page.goto(`/Record/${test_cid}.xml`);
await expect(response.ok()).toBeTruthy();
expect(response.ok()).toBeTruthy();
expect(response.headers()["content-type"]).toContain('text/xml');
});

test('XML with truncated CID', async ({ page }) => {
const response = await page.goto(`/Record/${test_truncated_cid}.xml`);
await expect(response.ok()).toBeTruthy();
});
expect(response.ok()).toBeTruthy();
expect(response.headers()["content-type"]).toContain('text/xml');
});

test('XML with HTID', async ({ page }) => {
const response = await page.goto(`/MARCXML/${test_htid}`);
expect(response.status()).toBe(200);
expect(response.headers()["content-type"]).toContain('text/xml');
});

// See https://github.com/hathitrust/catalog/wiki/Volume-API
// for an explanation of these inscrutable "b/qf/qv.t", "b/t/Q" codes.
// API JSON single-record responses are of the form
// {records: {cid: {...}, items: [...]}
// =========== single-id query, b/qf/qv.t endpoint

test('Bib API b/qf/qv.t brief recordnumber', async ({ page }) => {
const response = await page.goto(`/api/volumes/brief/recordnumber/${test_cid}.json`);
expect(response.status()).toBe(200);
expect(response.headers()["content-type"]).toContain('application/json');
const body = await response.json();
expect(body.records).toHaveProperty(test_cid);
// No marc-xml property
expect(body.records[test_cid]).not.toHaveProperty("marc-xml");
expect(body.items.length).toBeGreaterThan(0);
});

test('Bib API b/qf/qv.t full recordnumber', async ({ page }) => {
const response = await page.goto(`/api/volumes/full/recordnumber/${test_cid}.json`);
expect(response.status()).toBe(200);
expect(response.headers()["content-type"]).toContain('application/json');
const body = await response.json();
expect(body.records).toHaveProperty(test_cid);
// Has marc-xml property
expect(body.records[test_cid]).toHaveProperty("marc-xml");
expect(body.items.length).toBeGreaterThan(0);
});

test('Bib API b/qf/qv.t brief htid', async ({ page }) => {
const response = await page.goto(`/api/volumes/brief/htid/${test_htid}.json`);
expect(response.status()).toBe(200);
expect(response.headers()["content-type"]).toContain('application/json');
const body = await response.json();
expect(body.records).toHaveProperty(test_cid);
// No marc-xml property
expect(body.records[test_cid]).not.toHaveProperty("marc-xml");
expect(body.items.length).toBeGreaterThan(0);
});

test('Bib API b/qf/qv.t full htid', async ({ page }) => {
const response = await page.goto(`/api/volumes/full/htid/${test_htid}.json`);
expect(response.status()).toBe(200);
expect(response.headers()["content-type"]).toContain('application/json');
const body = await response.json();
expect(body.records).toHaveProperty(test_cid);
// Has marc-xml property
expect(body.records[test_cid]).toHaveProperty("marc-xml");
expect(body.items.length).toBeGreaterThan(0);
});

// =========== single-id query, b/t/Q endpoint
test('Bib API b/t/Q brief 1-htid', async ({ page }) => {
const response = await page.goto(`/api/volumes/brief/json/htid:${test_htid}`);
expect(response.status()).toBe(200);
expect(response.headers()["content-type"]).toContain('application/json');
const body = await response.json();
expect(body[`htid:${test_htid}`]).toHaveProperty('records');
expect(body[`htid:${test_htid}`]).toHaveProperty('items');
// Has no marc-xml property
expect(body[`htid:${test_htid}`].records[test_cid]).not.toHaveProperty("marc-xml");
expect(Object.keys(body)).toHaveLength(1);
});

// =========== multi-id query, b/t/Q endpoint
test('Bib API b/t/Q brief 2-htid', async ({ page }) => {
const response = await page.goto(`/api/volumes/brief/json/htid:${test_htid}|htid:${test_htid2}`);
expect(response.status()).toBe(200);
expect(response.headers()["content-type"]).toContain('application/json');
const body = await response.json();
expect(body[`htid:${test_htid}`]).toHaveProperty('records');
expect(body[`htid:${test_htid}`]).toHaveProperty('items');
expect(body[`htid:${test_htid2}`]).toHaveProperty('records');
expect(body[`htid:${test_htid2}`]).toHaveProperty('items');
// Has no marc-xml property
expect(body[`htid:${test_htid}`].records[test_cid]).not.toHaveProperty("marc-xml");
expect(body[`htid:${test_htid2}`].records[test_cid2]).not.toHaveProperty("marc-xml");
expect(Object.keys(body)).toHaveLength(2);
});

// Inconsistencies and possible misfeatures
// b/t/Q allows any value for brevity and defaults to "brief"
// This is inconsistent with b/qf/qv.t which requires b to be in {brief,full}
test('Bib API b/t/Q b? 200', async ({ page }) => {
const response = await page.goto(`/api/volumes/blah/json/htid:${test_htid}`);
expect(response.status()).toBe(200);
});

// Error conditions (rewrite side)
// These will be caught by the rewrite rules and return 404
// These tests time out with firefox but succeed with all others
test('Bib API b/qf/qv.t b? 404', async ({ page, browserName }) => {
test.skip(browserName === 'firefox', 'times out with firefox for unknown reason');
const response = await page.goto(`/api/volumes/blah/htid/${test_htid}.json`);
expect(response.status()).toBe(404);
});

test('Bib API b/qf/qv.t t? 404', async ({ page, browserName }) => {
test.skip(browserName === 'firefox', 'times out with firefox for unknown reason');
const response = await page.goto(`/api/volumes/brief/htid/${test_htid}.blah`);
expect(response.status()).toBe(404);
});

test('Bib API b/t/Q t? 404', async ({ page, browserName }) => {
test.skip(browserName === 'firefox', 'times out with firefox for unknown reason');
const response = await page.goto(`/api/volumes/brief/blah/htid:${test_htid}`);
expect(response.status()).toBe(404);
});

// Error conditions (volumes.php side)
// These will be caught by volumes.php and return 400
test('Bib API b/qf/qv.t qf? 400', async ({ page }) => {
const response = await page.goto(`/api/volumes/brief/blah/${test_htid}.json`);
expect(response.status()).toBe(400);
});

test('Bib API b/t/Q qf? 400', async ({ page }) => {
const response = await page.goto(`/api/volumes/brief/json/blah:${test_htid}`);
expect(response.status()).toBe(400);
});

// Empty results
test('Bib API b/qf/qv.t qv? 200', async ({ page }) => {
const response = await page.goto('/api/volumes/brief/htid/blah.json');
const body = await response.json();
expect(response.status()).toBe(200);
expect(response.headers()["content-type"]).toContain('application/json');
// Single-record results have a `records` hash because volumes.php
// returns JSON crafted to correctly represent it as `{}`.
// Multi-record results do not have the opportunity to do so, so they show up with
// the `records` as `[]`, see below.
expect(body.records).toStrictEqual({});
expect(body.items).toStrictEqual([]);
});

test('Bib API b/t/Q qv? 200', async ({ page }) => {
const response = await page.goto('/api/volumes/brief/json/htid:blah');
const body = await response.json();
expect(response.status()).toBe(200);
expect(response.headers()["content-type"]).toContain('application/json');
// Multi-records are keyed from outside, if you will, so the records are in an array
// instead of a hash (compare and contrast the preceding example).
// This may be considered buggy behavior.
expect(body['htid:blah'].records).toStrictEqual([]);
expect(body['htid:blah'].items).toStrictEqual([]);
});

// Endpoint variants with implied brevity "brief"
test('Bib API t/Q htid', async ({ page }) => {
const response = await page.goto(`/api/volumes/json/htid:${test_htid}`);
expect(response.status()).toBe(200);
const body = await response.json();
expect(body[`htid:${test_htid}`]).toHaveProperty('records');
expect(body[`htid:${test_htid}`]).toHaveProperty('items');
});

test('Bib API qf/qv.t htid', async ({ page }) => {
const response = await page.goto(`/api/volumes/htid/${test_htid}.json`);
expect(response.status()).toBe(200);
const body = await response.json();
expect(body.items.length).toBeGreaterThan(0);
});
38 changes: 28 additions & 10 deletions static/api/volumes.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@
// Bail immediately if there's no query

// print_r($_REQUEST);

// This may not be possible without a 404 from the current rewrite rules.
if (!isset($_REQUEST['q']) || !preg_match('/\S/', $_REQUEST['q'])) {
header("HTTP/1.0 400 Malformed");
exit();
header("HTTP/1.0 400 Malformed");
header('Content-type: application/json; charset=UTF-8');
echo json_encode(['message' => 'missing or empty query']);
exit();
}

require_once 'PEAR.php';
Expand Down Expand Up @@ -81,7 +85,14 @@
'rows' => 200
);

if ($_REQUEST['brevity'] == 'full') {
// Set brevity to the default value of "brief" as brevity is not always
// provided by the rewrite rules in .htaccess
$brevity = 'brief';
if (isset($_REQUEST['brevity']) && $_REQUEST['brevity'] == 'full') {
$brevity = 'full';
}

if ($brevity == 'full') {
$commonargs['fl'] = $commonargs['fl'] . ',fullrecord';
}

Expand All @@ -104,18 +115,20 @@
class QObj
{
public $string;
public $brevity;
private $_id;
public $tspecs = array(); # Transformed specs
public $qspecs = array(); # Query specs for solr
public $matches = array(); # Matching document IDs



function __construct($str) {
function __construct($str, $brevity) {
global $validField;
global $fieldmap;

$this->string = $str;
$this->brevity = $brevity;

$specs = explode(';', $str);
foreach ($specs as $spec) {
Expand All @@ -127,7 +140,7 @@ function __construct($str) {

if ($field == 'id') {
$this->_id = $fv[1];
continue;
continue;
}
if (!isset($fv[1])) {
continue;
Expand Down Expand Up @@ -266,7 +279,7 @@ function recordsStructure($docs) {
$rinfo[$index . 's'] = array();
}
}
if ($_REQUEST['brevity'] == 'full') {
if ($this->brevity == 'full') {
$rinfo['marc-xml'] = $doc['fullrecord'];
}
$records[$docid] = $rinfo;
Expand Down Expand Up @@ -294,7 +307,7 @@ function itemsStructure($docs) {
$iinfo['itemURL'] = "https://babel.hathitrust.org/cgi/pt?id=" . $htid;

$rc = isset($ht['rights']) ? $ht['rights'] : 'ic';
$rc = is_array($rc) ? $rc[0] : $rc;
$rc = is_array($rc) ? $rc[0] : $rc;
$iinfo['rightsCode'] = $rc;
$iinfo['lastUpdate'] = $ht['ingest'];
$iinfo['enumcron'] = (isset($ht['enumcron']) && preg_match('/\S/', $ht['enumcron']))? $ht['enumcron'] : false;
Expand All @@ -317,7 +330,7 @@ function itemsStructure($docs) {


foreach ($qstrings as $qstring) {
$nqo = new QObj($qstring);
$nqo = new QObj($qstring, $brevity);
$solrQueryComponents = array_merge($solrQueryComponents, $nqo->qspecs);
$qobjs[$qstring] = $nqo;
}
Expand All @@ -331,7 +344,8 @@ function itemsStructure($docs) {
if (!preg_match('/\S/', $q)) {
header('HTTP/1.1 400 Bad Request');
$origQuery = htmlspecialchars($origQuery);
echo "Query '$origQuery' is invalid";
header('Content-type: application/json; charset=UTF-8');
echo json_encode(['message' => "query '$origQuery' is invalid"]);
exit();
}

Expand Down Expand Up @@ -387,12 +401,16 @@ function itemsStructure($docs) {

if ($_REQUEST['type'] == 'json') {
if (isset($allmatches['records']) && count($allmatches['records']) == 0) {
header('Content-type: application/json; charset=UTF-8');
// This is a hack to get the correct JSON representation of the empty `records` hash.
// Empty records can still show up serialized as "[]" in multi-record results.
// but this takes care of the simplest case.
echo "{\n \"records\": {}, \"items\": []\n}";
exit;
} else {
$json = json_encode($allmatches);
if (isset($_REQUEST['callback'])) {
header('Content-type: application/javascript; charset=UTF-8');
header('Content-type: application/javascript; charset=UTF-8');
echo $_REQUEST['callback'] . "( $json)";
} else {
header('Content-type: application/json; charset=UTF-8');
Expand Down