From b85f3fb246578c1e6ab60b319f7d1609d40d751d Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 12:25:28 -0500 Subject: [PATCH 01/36] Move esRangeValue --- lib/elasticsearch/elastic-body-builder.js | 0 lib/resources.js | 27 +-------------------- lib/utils/resource-helpers.js | 29 +++++++++++++++++++++++ 3 files changed, 30 insertions(+), 26 deletions(-) create mode 100644 lib/elasticsearch/elastic-body-builder.js create mode 100644 lib/utils/resource-helpers.js diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js new file mode 100644 index 00000000..e69de29b diff --git a/lib/resources.js b/lib/resources.js index 57532f96..5c51f029 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -22,6 +22,7 @@ const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC } = require('./elasticsearch/config') const errors = require('./errors') +const { esRangeValue } = require('./utils/resource-helpers') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -395,32 +396,6 @@ module.exports = function (app, _private = null) { return body } - /** - * Given a range represented as an array, returns a corresponding ES range object - * - * @param {Array.} range - An array consisting of a single date or a pair of dates - * @returns {object} - */ - const esRangeValue = (range) => { - // the greater-than-equal value will always be the first value in the range array. - // depending on the number of values and their equality, we query using less-than-equal - // the second value, or just less-than the first value plus one - - // Treat case where range start equals range end same as case of single value: - if (range[0] === range[1]) range = range.slice(0, 1) - const rangeQuery = { - gte: range[0] - } - if (range.length === 2) { - // search on both range values - rangeQuery.lte = range[range.length - 1] - } else if (range.length === 1) { - // if there is just one range, query up until the next year - rangeQuery.lt = range[0] + 1 - } - return rangeQuery - } - /** * Given an object containing filters, * returns content of the ES query filter context diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js new file mode 100644 index 00000000..fb535c1b --- /dev/null +++ b/lib/utils/resource-helpers.js @@ -0,0 +1,29 @@ +/** + * Given a range represented as an array, returns a corresponding ES range object + * + * @param {Array.} range - An array consisting of a single date or a pair of dates + * @returns {object} + */ +const esRangeValue = (range) => { + // the greater-than-equal value will always be the first value in the range array. + // depending on the number of values and their equality, we query using less-than-equal + // the second value, or just less-than the first value plus one + + // Treat case where range start equals range end same as case of single value: + if (range[0] === range[1]) range = range.slice(0, 1) + const rangeQuery = { + gte: range[0] + } + if (range.length === 2) { + // search on both range values + rangeQuery.lte = range[range.length - 1] + } else if (range.length === 1) { + // if there is just one range, query up until the next year + rangeQuery.lt = range[0] + 1 + } + return rangeQuery +} + +module.exports = { + esRangeValue +} From a02a058f0f83a2605de2daf47d01a3fdc831dae6 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 12:32:24 -0500 Subject: [PATCH 02/36] Move parseParams --- lib/elasticsearch/config.js | 41 ++++++++++++++++++++- lib/resources.js | 69 +---------------------------------- lib/utils/resource-helpers.js | 35 +++++++++++++++++- 3 files changed, 76 insertions(+), 69 deletions(-) diff --git a/lib/elasticsearch/config.js b/lib/elasticsearch/config.js index b63c3545..18ef399c 100644 --- a/lib/elasticsearch/config.js +++ b/lib/elasticsearch/config.js @@ -129,8 +129,47 @@ const AGGREGATIONS_SPEC = { collection: { terms: { field: 'collectionIds' } } } +const ITEM_FILTER_AGGREGATIONS = { + item_location: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.holdingLocation_packed' } } } }, + item_status: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.status_packed' } } } }, + item_format: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.formatLiteral' } } } } +} + +// Configure sort fields: +const SORT_FIELDS = { + title: { + initialDirection: 'asc', + field: 'title_sort' + }, + date: { + initialDirection: 'desc', + field: 'dateStartYear' + }, + creator: { + initialDirection: 'asc', + field: 'creator_sort' + }, + relevance: {} +} + +// The following fields can be excluded from ES responses because we don't pass them to client: +const EXCLUDE_FIELDS = [ + 'uris', + '*_packed', + '*_sort', + 'items.*_packed', + 'contentsTitle', + 'suppressed', + // Hide contributor and creator transformed fields: + '*WithoutDates', + '*Normalized' +] + module.exports = { SEARCH_SCOPES, FILTER_CONFIG, - AGGREGATIONS_SPEC + AGGREGATIONS_SPEC, + ITEM_FILTER_AGGREGATIONS, + EXCLUDE_FIELDS, + SORT_FIELDS } diff --git a/lib/resources.js b/lib/resources.js index 5c51f029..319b9836 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -19,78 +19,13 @@ const { parseParams, deepValue } = require('../lib/util') const ApiRequest = require('./api-request') const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') -const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC } = require('./elasticsearch/config') +const { AGGREGATIONS_SPEC, ITEM_FILTER_AGGREGATIONS, EXCLUDE_FIELDS, SORT_FIELDS } = require('./elasticsearch/config') const errors = require('./errors') -const { esRangeValue } = require('./utils/resource-helpers') +const { esRangeValue, parseSearchParams } = require('./utils/resource-helpers') const RESOURCES_INDEX = process.env.RESOURCES_INDEX -const ITEM_FILTER_AGGREGATIONS = { - item_location: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.holdingLocation_packed' } } } }, - item_status: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.status_packed' } } } }, - item_format: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.formatLiteral' } } } } -} - -// Configure sort fields: -const SORT_FIELDS = { - title: { - initialDirection: 'asc', - field: 'title_sort' - }, - date: { - initialDirection: 'desc', - field: 'dateStartYear' - }, - creator: { - initialDirection: 'asc', - field: 'creator_sort' - }, - relevance: {} -} - -// The following fields can be excluded from ES responses because we don't pass them to client: -const EXCLUDE_FIELDS = [ - 'uris', - '*_packed', - '*_sort', - 'items.*_packed', - 'contentsTitle', - 'suppressed', - // Hide contributor and creator transformed fields: - '*WithoutDates', - '*Normalized' -] - -// Configure controller-wide parameter parsing: -const parseSearchParams = function (params, overrideParams = {}) { - return parseParams(params, { - q: { type: 'string' }, - page: { type: 'int', default: 1 }, - per_page: { type: 'int', default: 50, range: [0, 100] }, - field: { type: 'string', range: Object.keys(AGGREGATIONS_SPEC) }, - sort: { type: 'string', range: Object.keys(SORT_FIELDS), default: 'relevance' }, - sort_direction: { type: 'string', range: ['asc', 'desc'] }, - search_scope: { type: 'string', range: Object.keys(SEARCH_SCOPES), default: 'all' }, - filters: { type: 'hash', fields: FILTER_CONFIG }, - items_size: { type: 'int', default: 100, range: [0, 200] }, - items_from: { type: 'int', default: 0 }, - callnumber: { type: 'string' }, - standard_number: { type: 'string' }, - contributor: { type: 'string' }, - title: { type: 'string' }, - subject: { type: 'string' }, - subject_prefix: { type: 'string' }, - isbn: { type: 'string' }, - issn: { type: 'string' }, - lccn: { type: 'string' }, - oclc: { type: 'string' }, - merge_checkin_card_items: { type: 'boolean', default: true }, - include_item_aggregations: { type: 'boolean', default: true }, - ...overrideParams - }) -} - // These are the handlers made available to the router: module.exports = function (app, _private = null) { app.resources = {} diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js index fb535c1b..28ae2993 100644 --- a/lib/utils/resource-helpers.js +++ b/lib/utils/resource-helpers.js @@ -1,3 +1,6 @@ +const { parseParams } = require('../util') +const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC, SORT_FIELDS } = require('../elasticsearch/config') + /** * Given a range represented as an array, returns a corresponding ES range object * @@ -24,6 +27,36 @@ const esRangeValue = (range) => { return rangeQuery } +// Configure controller-wide parameter parsing: +const parseSearchParams = function (params, overrideParams = {}) { + return parseParams(params, { + q: { type: 'string' }, + page: { type: 'int', default: 1 }, + per_page: { type: 'int', default: 50, range: [0, 100] }, + field: { type: 'string', range: Object.keys(AGGREGATIONS_SPEC) }, + sort: { type: 'string', range: Object.keys(SORT_FIELDS), default: 'relevance' }, + sort_direction: { type: 'string', range: ['asc', 'desc'] }, + search_scope: { type: 'string', range: Object.keys(SEARCH_SCOPES), default: 'all' }, + filters: { type: 'hash', fields: FILTER_CONFIG }, + items_size: { type: 'int', default: 100, range: [0, 200] }, + items_from: { type: 'int', default: 0 }, + callnumber: { type: 'string' }, + standard_number: { type: 'string' }, + contributor: { type: 'string' }, + title: { type: 'string' }, + subject: { type: 'string' }, + subject_prefix: { type: 'string' }, + isbn: { type: 'string' }, + issn: { type: 'string' }, + lccn: { type: 'string' }, + oclc: { type: 'string' }, + merge_checkin_card_items: { type: 'boolean', default: true }, + include_item_aggregations: { type: 'boolean', default: true }, + ...overrideParams + }) +} + module.exports = { - esRangeValue + esRangeValue, + parseSearchParams } From 0beec4c3a5aa96a447ffcd54a38bafa4ff91b626 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 14:17:15 -0500 Subject: [PATCH 03/36] Refactor nyplSourc/id calculation and move nyplSourceAndId to utils' --- lib/resources.js | 8 ++------ lib/utils/resource-helpers.js | 14 +++++++++++++- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index 319b9836..6c5d1f70 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -22,7 +22,7 @@ const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') const { AGGREGATIONS_SPEC, ITEM_FILTER_AGGREGATIONS, EXCLUDE_FIELDS, SORT_FIELDS } = require('./elasticsearch/config') const errors = require('./errors') -const { esRangeValue, parseSearchParams } = require('./utils/resource-helpers') +const { esRangeValue, parseSearchParams, nyplSourceAndId } = require('./utils/resource-helpers') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -49,11 +49,7 @@ module.exports = function (app, _private = null) { }) // Validate uri: - const nyplSourceMapper = await NyplSourceMapper.instance() - const { id, nyplSource } = nyplSourceMapper.splitIdentifier(params.uri) ?? {} - if (!id || !nyplSource) { - throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`) - } + await nyplSourceAndId(params) // If we need to return itemAggregations or filter on item_status, // then we need to pre-retrieve SCSB item statuses to incorporate them into diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js index 28ae2993..42629299 100644 --- a/lib/utils/resource-helpers.js +++ b/lib/utils/resource-helpers.js @@ -1,5 +1,7 @@ const { parseParams } = require('../util') const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC, SORT_FIELDS } = require('../elasticsearch/config') +const NyplSourceMapper = require('research-catalog-indexer/lib/utils/nypl-source-mapper') +const errors = require('../errors') /** * Given a range represented as an array, returns a corresponding ES range object @@ -56,7 +58,17 @@ const parseSearchParams = function (params, overrideParams = {}) { }) } +const nyplSourceAndId = async function (params) { + const nyplSourceMapper = await NyplSourceMapper.instance() + const { id, nyplSource } = nyplSourceMapper.splitIdentifier(params.uri) ?? {} + if (!id || !nyplSource) { + throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`) + } + return { id, nyplSource } +} + module.exports = { esRangeValue, - parseSearchParams + parseSearchParams, + nyplSourceAndId } From a7498788f3fcc8dcdd12c568c2b47c25043c67e3 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 14:50:50 -0500 Subject: [PATCH 04/36] Add bodybuilder methods for findByUri --- lib/elasticsearch/elastic-body-builder.js | 329 ++++++++++++++++++++++ lib/resources.js | 321 +-------------------- 2 files changed, 334 insertions(+), 316 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index e69de29b..655ca1c9 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -0,0 +1,329 @@ +const { EXCLUDE_FIELDS, ITEM_FILTER_AGGREGATIONS } = require('./config') +const { deepValue } = require('../util') +const { esRangeValue } = require('../utils/resource-helpers') + +/** + * Given a ES search body, returns same object modified to include the + * additional query necessary to limit (and paginate through) items + * + * @param {object} body - An ES query object (suitable for POSTing to ES + * @param {object} options - An object optionally defining `size` and `from` + * for limiting and paginating through items + */ +const addInnerHits = (body, _options = {}) => { + const options = Object.assign({ + size: process.env.SEARCH_ITEMS_SIZE || 200, + from: 0, + merge_checkin_card_items: true + }, _options) + + // Make sure necessary structure exists: + if (!deepValue(body, 'query.bool') && !deepValue(body, 'query.function_score.query.bool')) { + body.query = { bool: {} } + } + + // The place to add the filter depends on the query built to this point: + const placeToAddFilter = (body.query.bool || body.query.function_score.query.bool) + // Initialize filter object if it doesn't already exist: + placeToAddFilter.filter = placeToAddFilter.filter || [] + // If filter object already exists, convert it to array: + if (!Array.isArray(placeToAddFilter.filter)) placeToAddFilter.filter = [placeToAddFilter.filter] + + const itemsQuery = { + bool: Object.assign( + itemsQueryContext(options), + itemsFilterContext(options) + ) + } + + const wrappedItemsQuery = { + bool: { + should: [ + { + nested: { + path: 'items', + query: itemsQuery, + inner_hits: { + sort: [{ 'items.enumerationChronology_sort': 'desc' }], + size: options.size, + from: options.from, + name: 'items' + } + } + }, + // Add a catch-all to ensure we return the bib document even when + // numItems=0 or applied item filters exclude all items: + { match_all: {} } + ] + } + } + placeToAddFilter.filter.push(wrappedItemsQuery) + + // If there is any item query at all, run an additional inner_hits query + // to retrieve the total number of items without filtering: + if (itemsQuery.bool.filter) { + wrappedItemsQuery.bool.should.push({ + nested: { + path: 'items', + query: { + bool: { + must_not: [{ exists: { field: 'items.electronicLocator' } }] + } + }, + inner_hits: { name: 'allItems' } + } + }) + } + + return body +} + +/** + * Given an object containing filters, + * returns content of the ES query filter context + * + * @param {object} options - An object with keys,value pairs of the form [filter_name]:[filter_value] + * @returns {object} + */ +const itemsFilterContext = (options) => { + if (!options.query) return {} + + const filterHandlers = { + volume: (volumes) => { + return { + range: { + 'items.volumeRange': esRangeValue(volumes) + } + } + }, + date: (dates) => { + return { + range: { + 'items.dateRange': esRangeValue(dates) + } + } + }, + format: (formats) => { + return { + terms: { + 'items.formatLiteral': formats + } + } + }, + location: (locations) => { + return { + terms: { + 'items.holdingLocation.id': locations + } + } + }, + status: (statuses) => { + // Determine if all possible ReCAP statuses were selected: + const selectedRecapStatuses = recapStatuses(statuses) + + if (selectedRecapStatuses.length === 1 && + Array.isArray(options.unavailable_recap_barcodes) && + options.unavailable_recap_barcodes.length > 0) { + // There are known unavailable ReCAP items, so build a complicated + // filter clause with appropriate barcode overrides: + return itemStatusFilterWithUnavailableRecapItems(statuses, options.unavailable_recap_barcodes) + } else { + // If there are no known unavailable ReCAP items, just do a straight + // status match: + return { + terms: { + 'items.status.id': statuses + } + } + } + }, + itemUri: (uri) => { + return { term: { 'items.uri': uri } } + } + } + + const filters = Object.keys(options.query).map((filter) => { + const value = options.query[filter] + const handler = filterHandlers[filter] + return value && handler ? handler(value) : null + }).filter((x) => x) + + return filters.length + ? { filter: filters } + : {} +} + +/** + * Given an array of status ids (e.g. "status:a", "status:na") returns the + * subset of statuses that are relevant in ReCAP + */ +const recapStatuses = (statuses) => { + return statuses + .filter((status) => ['status:a', 'status:na'].includes(status)) +} + +/** + * Builds a big complicated ES filter to allow us to filter items by status, + * but override the indexed status for ReCAP items with statuses retrieved + * from SCSB. This corrects for the fact that ReCAP item statuses tend to be + * wrong in the ES index: + * - partner items are indexed as Available and remain thus forever + * - NYPL item statuses _should_ equal SCSB status, but the mechanism + * for keeping them synced isn't perfect and operates on a delay + * + * @param {string[]} statuses - An array of statuses to filter on + * @param {string[]} unavailableRecapBarcodes - An array of item barcodes + * known to be unavailble + * + * Returns an ES filter that matches the desired statuses, but also uses + * the known unavailable items to override indexed item statuses for ReCAP + * items (because ReCAP is the authority for status of off-site items). + * Essentially, the criteria is for matching an item is: + * + * - if on-site (non-ReCAP): + * - has a matching indexed status + * - if off-site: + * - if filtering on status:na + * - item barcode must be in unavailableRecapBarcodes + * - if filtering on status:a: + * - item barcode must NOT be in unavailableRecapBarcodes + */ +const itemStatusFilterWithUnavailableRecapItems = (statuses, unavailableRecapBarcodes) => { + // First, let's set up some common clauses: + + // Item is in ReCAP: + const itemIsRecapClause = { + regexp: { 'items.holdingLocation.id': 'loc:rc.*' } + } + // Item's indexed status matches one of the filtered statuses: + const itemHasIndexedStatusClause = { + terms: { 'items.status.id': statuses } + } + // Item is marked Unavailable in SCSB: + const itemIsUnavailableInRecapClause = { + script: { + script: { + inline: 'doc[\'items.idBarcode\'].value == null || ' + + 'params.unavailableRecapBarcodes.contains(doc[\'items.idBarcode\'][0])', + lang: 'painless', + params: { unavailableRecapBarcodes } + } + } + } + // This function is only called if `statuses` param contains a single + // ReCAP-relevant status (i.e. status:a or status:na), so determine which + // ReCAP status to use: + const selectedRecapStatus = recapStatuses(statuses).shift() + // Item's ReCAP status agrees with filter: + const itemRecapStatusAgreesWithFilterClause = + selectedRecapStatus === 'status:na' + ? itemIsUnavailableInRecapClause + : { bool: { must_not: itemIsUnavailableInRecapClause } } + + return { + bool: { + should: [ + // Either 1) item is on-site and has correctly indexed status: + { + bool: { + must: [ + // Item is on-site (i.e. not recap): + { bool: { must_not: itemIsRecapClause } }, + // Item indexed status matches filter: + itemHasIndexedStatusClause + ] + } + }, + // Or 2) item is off-site and has a scsb status that agrees with the + // filter (e.g. if filtering on status:na, scsb marks the barcode as + // 'Not Available') + { + bool: { + must: [ + // Item is off-site: + JSON.parse(JSON.stringify(itemIsRecapClause)), + // Item is not marked unavailable + itemRecapStatusAgreesWithFilterClause + ] + } + } + ] + } + } +} + +/** + * Given an object containing query options, + * returns content of the ES query context + * + * @param {object} options - An object with request options. `merge_checkin_card_items` is the only one + * that matters right now + * @returns {object} + */ +const itemsQueryContext = (options) => { + const excludeClauses = [] + + if (!options.merge_checkin_card_items) excludeClauses.push({ term: { 'items.type': 'nypl:CheckinCardItem' } }) + + return excludeClauses.length ? { must_not: excludeClauses } : { must: { match_all: {} } } +} + +const bodyForFindByUri = async function (recapBarcodesByStatus, params) { + // Establish base query: + let body = { + _source: { + excludes: EXCLUDE_FIELDS + }, + size: 1, + query: { + bool: { + must: [ + { + term: { + uri: params.uri + } + } + ] + } + } + } + const paramsIncludesItemLevelFiltering = Object.keys(params) + .filter((param) => param.startsWith('item_')).length > 0 + const returnAllItems = params.all_items && !paramsIncludesItemLevelFiltering + if (returnAllItems) { + body._source.excludes = EXCLUDE_FIELDS.filter((field) => field !== '*_sort') + } else { + // No specific item requested, so add pagination and matching params: + const itemsOptions = { + size: params.items_size, + from: params.items_from, + merge_checkin_card_items: params.merge_checkin_card_items, + query: { + volume: params.item_volume, + date: params.item_date, + format: params.item_format, + location: params.item_location, + status: params.item_status, + itemUri: params.itemUri + }, + unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] + } + body = addInnerHits(body, itemsOptions) + body._source = { + excludes: EXCLUDE_FIELDS.concat(['items']) + } + } + if (params.include_item_aggregations) { + body.aggregations = ITEM_FILTER_AGGREGATIONS + } + return body +} + +module.exports = { + bodyForFindByUri, + addInnerHits, + itemsFilterContext, + recapStatuses, + itemStatusFilterWithUnavailableRecapItems, + itemsQueryContext +} diff --git a/lib/resources.js b/lib/resources.js index 6c5d1f70..f0b47d37 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -15,14 +15,15 @@ const ResponseMassager = require('./response_massager.js') const DeliveryLocationsResolver = require('./delivery-locations-resolver') const AvailableDeliveryLocationTypes = require('./available_delivery_location_types') -const { parseParams, deepValue } = require('../lib/util') +const { parseParams } = require('../lib/util') const ApiRequest = require('./api-request') const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') -const { AGGREGATIONS_SPEC, ITEM_FILTER_AGGREGATIONS, EXCLUDE_FIELDS, SORT_FIELDS } = require('./elasticsearch/config') +const { AGGREGATIONS_SPEC, EXCLUDE_FIELDS, SORT_FIELDS } = require('./elasticsearch/config') const errors = require('./errors') const { esRangeValue, parseSearchParams, nyplSourceAndId } = require('./utils/resource-helpers') +const { bodyForFindByUri, addInnerHits, itemsFilterContext, itemsQueryContext } = require('./elasticsearch/elastic-body-builder') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -67,54 +68,8 @@ module.exports = function (app, _private = null) { : Promise.resolve({}) return scsbStatusLookup - .then((recapBarcodesByStatus) => { - // Establish base query: - let body = { - _source: { - excludes: EXCLUDE_FIELDS - }, - size: 1, - query: { - bool: { - must: [ - { - term: { - uri: params.uri - } - } - ] - } - } - } - const paramsIncludesItemLevelFiltering = Object.keys(params) - .filter((param) => param.startsWith('item_')).length > 0 - const returnAllItems = params.all_items && !paramsIncludesItemLevelFiltering - if (returnAllItems) { - body._source.excludes = EXCLUDE_FIELDS.filter((field) => field !== '*_sort') - } else { - // No specific item requested, so add pagination and matching params: - const itemsOptions = { - size: params.items_size, - from: params.items_from, - merge_checkin_card_items: params.merge_checkin_card_items, - query: { - volume: params.item_volume, - date: params.item_date, - format: params.item_format, - location: params.item_location, - status: params.item_status, - itemUri: params.itemUri - }, - unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] - } - body = addInnerHits(body, itemsOptions) - body._source = { - excludes: EXCLUDE_FIELDS.concat(['items']) - } - } - if (params.include_item_aggregations) { - body.aggregations = ITEM_FILTER_AGGREGATIONS - } + .then(async (recapBarcodesByStatus) => { + const body = await bodyForFindByUri(recapBarcodesByStatus, params) app.logger.debug('Resources#findByUri', body) return app.esClient.search(body) .then((resp) => { @@ -251,272 +206,6 @@ module.exports = function (app, _private = null) { .then((items) => ItemResultsSerializer.serialize(items, opts)) } - /** - * Given a ES search body, returns same object modified to include the - * additional query necessary to limit (and paginate through) items - * - * @param {object} body - An ES query object (suitable for POSTing to ES - * @param {object} options - An object optionally defining `size` and `from` - * for limiting and paginating through items - */ - const addInnerHits = (body, _options = {}) => { - const options = Object.assign({ - size: process.env.SEARCH_ITEMS_SIZE || 200, - from: 0, - merge_checkin_card_items: true - }, _options) - - // Make sure necessary structure exists: - if (!deepValue(body, 'query.bool') && !deepValue(body, 'query.function_score.query.bool')) { - body.query = { bool: {} } - } - - // The place to add the filter depends on the query built to this point: - const placeToAddFilter = (body.query.bool || body.query.function_score.query.bool) - // Initialize filter object if it doesn't already exist: - placeToAddFilter.filter = placeToAddFilter.filter || [] - // If filter object already exists, convert it to array: - if (!Array.isArray(placeToAddFilter.filter)) placeToAddFilter.filter = [placeToAddFilter.filter] - - const itemsQuery = { - bool: Object.assign( - itemsQueryContext(options), - itemsFilterContext(options) - ) - } - - const wrappedItemsQuery = { - bool: { - should: [ - { - nested: { - path: 'items', - query: itemsQuery, - inner_hits: { - sort: [{ 'items.enumerationChronology_sort': 'desc' }], - size: options.size, - from: options.from, - name: 'items' - } - } - }, - // Add a catch-all to ensure we return the bib document even when - // numItems=0 or applied item filters exclude all items: - { match_all: {} } - ] - } - } - placeToAddFilter.filter.push(wrappedItemsQuery) - - // If there is any item query at all, run an additional inner_hits query - // to retrieve the total number of items without filtering: - if (itemsQuery.bool.filter) { - wrappedItemsQuery.bool.should.push({ - nested: { - path: 'items', - query: { - bool: { - must_not: [{ exists: { field: 'items.electronicLocator' } }] - } - }, - inner_hits: { name: 'allItems' } - } - }) - } - - return body - } - - /** - * Given an object containing filters, - * returns content of the ES query filter context - * - * @param {object} options - An object with keys,value pairs of the form [filter_name]:[filter_value] - * @returns {object} - */ - const itemsFilterContext = (options) => { - if (!options.query) return {} - - const filterHandlers = { - volume: (volumes) => { - return { - range: { - 'items.volumeRange': esRangeValue(volumes) - } - } - }, - date: (dates) => { - return { - range: { - 'items.dateRange': esRangeValue(dates) - } - } - }, - format: (formats) => { - return { - terms: { - 'items.formatLiteral': formats - } - } - }, - location: (locations) => { - return { - terms: { - 'items.holdingLocation.id': locations - } - } - }, - status: (statuses) => { - // Determine if all possible ReCAP statuses were selected: - const selectedRecapStatuses = recapStatuses(statuses) - - if (selectedRecapStatuses.length === 1 && - Array.isArray(options.unavailable_recap_barcodes) && - options.unavailable_recap_barcodes.length > 0) { - // There are known unavailable ReCAP items, so build a complicated - // filter clause with appropriate barcode overrides: - return itemStatusFilterWithUnavailableRecapItems(statuses, options.unavailable_recap_barcodes) - } else { - // If there are no known unavailable ReCAP items, just do a straight - // status match: - return { - terms: { - 'items.status.id': statuses - } - } - } - }, - itemUri: (uri) => { - return { term: { 'items.uri': uri } } - } - } - - const filters = Object.keys(options.query).map((filter) => { - const value = options.query[filter] - const handler = filterHandlers[filter] - return value && handler ? handler(value) : null - }).filter((x) => x) - - return filters.length - ? { filter: filters } - : {} - } - - /** - * Given an array of status ids (e.g. "status:a", "status:na") returns the - * subset of statuses that are relevant in ReCAP - */ - const recapStatuses = (statuses) => { - return statuses - .filter((status) => ['status:a', 'status:na'].includes(status)) - } - - /** - * Builds a big complicated ES filter to allow us to filter items by status, - * but override the indexed status for ReCAP items with statuses retrieved - * from SCSB. This corrects for the fact that ReCAP item statuses tend to be - * wrong in the ES index: - * - partner items are indexed as Available and remain thus forever - * - NYPL item statuses _should_ equal SCSB status, but the mechanism - * for keeping them synced isn't perfect and operates on a delay - * - * @param {string[]} statuses - An array of statuses to filter on - * @param {string[]} unavailableRecapBarcodes - An array of item barcodes - * known to be unavailble - * - * Returns an ES filter that matches the desired statuses, but also uses - * the known unavailable items to override indexed item statuses for ReCAP - * items (because ReCAP is the authority for status of off-site items). - * Essentially, the criteria is for matching an item is: - * - * - if on-site (non-ReCAP): - * - has a matching indexed status - * - if off-site: - * - if filtering on status:na - * - item barcode must be in unavailableRecapBarcodes - * - if filtering on status:a: - * - item barcode must NOT be in unavailableRecapBarcodes - */ - const itemStatusFilterWithUnavailableRecapItems = (statuses, unavailableRecapBarcodes) => { - // First, let's set up some common clauses: - - // Item is in ReCAP: - const itemIsRecapClause = { - regexp: { 'items.holdingLocation.id': 'loc:rc.*' } - } - // Item's indexed status matches one of the filtered statuses: - const itemHasIndexedStatusClause = { - terms: { 'items.status.id': statuses } - } - // Item is marked Unavailable in SCSB: - const itemIsUnavailableInRecapClause = { - script: { - script: { - inline: 'doc[\'items.idBarcode\'].value == null || ' + - 'params.unavailableRecapBarcodes.contains(doc[\'items.idBarcode\'][0])', - lang: 'painless', - params: { unavailableRecapBarcodes } - } - } - } - // This function is only called if `statuses` param contains a single - // ReCAP-relevant status (i.e. status:a or status:na), so determine which - // ReCAP status to use: - const selectedRecapStatus = recapStatuses(statuses).shift() - // Item's ReCAP status agrees with filter: - const itemRecapStatusAgreesWithFilterClause = - selectedRecapStatus === 'status:na' - ? itemIsUnavailableInRecapClause - : { bool: { must_not: itemIsUnavailableInRecapClause } } - - return { - bool: { - should: [ - // Either 1) item is on-site and has correctly indexed status: - { - bool: { - must: [ - // Item is on-site (i.e. not recap): - { bool: { must_not: itemIsRecapClause } }, - // Item indexed status matches filter: - itemHasIndexedStatusClause - ] - } - }, - // Or 2) item is off-site and has a scsb status that agrees with the - // filter (e.g. if filtering on status:na, scsb marks the barcode as - // 'Not Available') - { - bool: { - must: [ - // Item is off-site: - JSON.parse(JSON.stringify(itemIsRecapClause)), - // Item is not marked unavailable - itemRecapStatusAgreesWithFilterClause - ] - } - } - ] - } - } - } - - /** - * Given an object containing query options, - * returns content of the ES query context - * - * @param {object} options - An object with request options. `merge_checkin_card_items` is the only one - * that matters right now - * @returns {object} - */ - const itemsQueryContext = (options) => { - const excludeClauses = [] - - if (!options.merge_checkin_card_items) excludeClauses.push({ term: { 'items.type': 'nypl:CheckinCardItem' } }) - - return excludeClauses.length ? { must_not: excludeClauses } : { must: { match_all: {} } } - } - // Conduct a search across resources: app.resources.search = function (params, opts, request) { app.logger.debug('Unparsed params: ', params) From 683445f6c3d4103b3ad0b3b50ad46d8b87f034f0 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 14:52:49 -0500 Subject: [PATCH 05/36] Add nyplSourceAndId call to annotatedMarc --- lib/resources.js | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index f0b47d37..b6726bfa 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -1,4 +1,3 @@ -const NyplSourceMapper = require('research-catalog-indexer/lib/utils/nypl-source-mapper') const scsbClient = require('./scsb-client') const ResourceResultsSerializer = require('./jsonld_serializers.js').ResourceResultsSerializer @@ -97,12 +96,8 @@ module.exports = function (app, _private = null) { // Get a single raw annotated-marc resource: app.resources.annotatedMarc = async function (params, opts) { // Convert discovery id to nyplSource and un-prefixed id: - const nyplSourceMapper = await NyplSourceMapper.instance() - const { id, nyplSource } = nyplSourceMapper.splitIdentifier(params.uri) ?? {} - if (!id || !nyplSource) { - throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`) - } + const { id, nyplSource } = await nyplSourceAndId(params) app.logger.debug('Resources#annotatedMarc', { id, nyplSource }) From 5ac57ee6c2ce65b1a2211e021e9f7c317e7db003 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 15:17:23 -0500 Subject: [PATCH 06/36] Move itemsByFilter to utils --- lib/resources.js | 54 ++--------------------------------- lib/utils/resource-helpers.js | 51 ++++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 53 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index b6726bfa..82f9e112 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -5,7 +5,6 @@ const ResourceSerializer = require('./jsonld_serializers.js').ResourceSerializer const AggregationsSerializer = require('./jsonld_serializers.js').AggregationsSerializer const AggregationSerializer = require('./jsonld_serializers.js').AggregationSerializer const ItemResultsSerializer = require('./jsonld_serializers.js').ItemResultsSerializer -const LocationLabelUpdater = require('./location_label_updater') const AnnotatedMarcSerializer = require('./annotated-marc-serializer') const { makeNyplDataApiClient } = require('./data-api-client') const { IndexSearchError, IndexConnectionError } = require('./errors') @@ -21,7 +20,7 @@ const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') const { AGGREGATIONS_SPEC, EXCLUDE_FIELDS, SORT_FIELDS } = require('./elasticsearch/config') const errors = require('./errors') -const { esRangeValue, parseSearchParams, nyplSourceAndId } = require('./utils/resource-helpers') +const { esRangeValue, parseSearchParams, nyplSourceAndId, itemsByFilter } = require('./utils/resource-helpers') const { bodyForFindByUri, addInnerHits, itemsFilterContext, itemsQueryContext } = require('./elasticsearch/elastic-body-builder') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -113,46 +112,6 @@ module.exports = function (app, _private = null) { .then(AnnotatedMarcSerializer.serialize) } - function itemsByFilter (filter, opts) { - opts = Object.assign({ - _source: null - }, opts) - - // Build ES query body: - const body = { - query: { - nested: { - path: 'items', - score_mode: 'avg', - query: { - constant_score: { - filter - } - } - } - } - } - if (opts._source) body._source = opts._source - - app.logger.debug('Resources#itemsByFilter', body) - return app.esClient.search(body) - .then((resp) => { - if (!resp || !resp.hits || resp.hits.total === 0) return Promise.reject(new Error('No matching items')) - resp = new LocationLabelUpdater(resp).responseWithUpdatedLabels() - // Convert this ES bibs response into an array of flattened items: - return resp.hits.hits - .map((doc) => doc._source) - // Reduce to a flat array of items - .reduce((a, bib) => { - return a.concat(bib.items) - // Let's affix that bnum into the item's identifiers so we know where it came from: - .map((i) => { - return Object.assign(i, { identifier: [`urn:bnum:${bib.uri}`].concat(i.identifier) }) - }) - }, []) - }) - } - // Get deliveryLocations for given resource(s) app.resources.deliveryLocationsByBarcode = function (params, opts) { params = parseParams(params, { @@ -170,16 +129,7 @@ module.exports = function (app, _private = null) { }) // Create promise to resolve items: - const fetchItems = itemsByFilter( - { terms: { 'items.identifier': identifierValues } }, - { _source: ['uri', 'type', 'items.uri', 'items.type', 'items.identifier', 'items.holdingLocation', 'items.status', 'items.catalogItemType', 'items.accessMessage', 'items.m2CustomerCode'] } - - // Filter out any items (multi item bib) that don't match one of the queriered barcodes: - ).then((items) => { - return items.filter((item) => { - return item.identifier.filter((i) => identifierValues.indexOf(i) >= 0).length > 0 - }) - }) + const fetchItems = itemsByFilter(identifierValues, app) // Run both item fetch and patron fetch in parallel: return Promise.all([fetchItems, lookupPatronType]) diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js index 42629299..e87d6e79 100644 --- a/lib/utils/resource-helpers.js +++ b/lib/utils/resource-helpers.js @@ -2,6 +2,7 @@ const { parseParams } = require('../util') const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC, SORT_FIELDS } = require('../elasticsearch/config') const NyplSourceMapper = require('research-catalog-indexer/lib/utils/nypl-source-mapper') const errors = require('../errors') +const LocationLabelUpdater = require('../location_label_updater') /** * Given a range represented as an array, returns a corresponding ES range object @@ -67,8 +68,56 @@ const nyplSourceAndId = async function (params) { return { id, nyplSource } } +function itemsByFilter (identifierValues, app) { + const filter = { terms: { 'items.identifier': identifierValues } } + let opts = { _source: ['uri', 'type', 'items.uri', 'items.type', 'items.identifier', 'items.holdingLocation', 'items.status', 'items.catalogItemType', 'items.accessMessage', 'items.m2CustomerCode'] } + + opts = Object.assign({ + _source: null + }, opts) + + // Build ES query body: + const body = { + query: { + nested: { + path: 'items', + score_mode: 'avg', + query: { + constant_score: { + filter + } + } + } + } + } + if (opts._source) body._source = opts._source + + app.logger.debug('Resources#itemsByFilter', body) + return app.esClient.search(body) + .then((resp) => { + if (!resp || !resp.hits || resp.hits.total === 0) return Promise.reject(new Error('No matching items')) + resp = new LocationLabelUpdater(resp).responseWithUpdatedLabels() + // Convert this ES bibs response into an array of flattened items: + return resp.hits.hits + .map((doc) => doc._source) + // Reduce to a flat array of items + .reduce((a, bib) => { + return a.concat(bib.items) + // Let's affix that bnum into the item's identifiers so we know where it came from: + .map((i) => { + return Object.assign(i, { identifier: [`urn:bnum:${bib.uri}`].concat(i.identifier) }) + }) + }, []) + }).then((items) => { + return items.filter((item) => { + return item.identifier.filter((i) => identifierValues.indexOf(i) >= 0).length > 0 + }) + }) +} + module.exports = { esRangeValue, parseSearchParams, - nyplSourceAndId + nyplSourceAndId, + itemsByFilter } From d50ea4570c9e5e7a8513a31085e2207f88505f36 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 15:21:40 -0500 Subject: [PATCH 07/36] Move buildElasticQuery/Body to bodybuilder --- lib/elasticsearch/elastic-body-builder.js | 54 ++++++++++++++++++++- lib/resources.js | 59 ++++------------------- 2 files changed, 61 insertions(+), 52 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 655ca1c9..77bcb210 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -1,6 +1,8 @@ -const { EXCLUDE_FIELDS, ITEM_FILTER_AGGREGATIONS } = require('./config') +const { EXCLUDE_FIELDS, ITEM_FILTER_AGGREGATIONS, SORT_FIELDS } = require('./config') const { deepValue } = require('../util') const { esRangeValue } = require('../utils/resource-helpers') +const ApiRequest = require('../api-request') +const ElasticQueryBuilder = require('../elasticsearch/elastic-query-builder') /** * Given a ES search body, returns same object modified to include the @@ -319,11 +321,59 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { return body } +/** + * Given GET params, returns a plainobject suitable for use in a ES query. + * + * @param {object} params - A hash of request params including `filters`, + * `search_scope`, `q` + * + * @return {object} ES query object suitable to be POST'd to ES endpoint + */ +const buildElasticQuery = function (params) { + const request = ApiRequest.fromParams(params) + + const builder = ElasticQueryBuilder.forApiRequest(request) + return builder.query.toJson() +} + +/** + * Given GET params, returns a plainobject with `from`, `size`, `query`, + * `sort`, and any other params necessary to perform the ES query based + * on the GET params. + * + * @return {object} An object that can be posted directly to ES + */ +const buildElasticBody = function (params) { + const body = { + from: (params.per_page * (params.page - 1)), + size: params.per_page + } + + body.query = buildElasticQuery(params) + + // Apply sort: + let direction + let field + + if (params.sort === 'relevance') { + field = '_score' + direction = 'desc' + } else { + field = SORT_FIELDS[params.sort].field || params.sort + direction = params.sort_direction || SORT_FIELDS[params.sort].initialDirection + } + body.sort = [{ [field]: direction }, { uri: 'asc' }] + + return body +} + module.exports = { bodyForFindByUri, addInnerHits, itemsFilterContext, recapStatuses, itemStatusFilterWithUnavailableRecapItems, - itemsQueryContext + itemsQueryContext, + buildElasticQuery, + buildElasticBody } diff --git a/lib/resources.js b/lib/resources.js index 82f9e112..fbe3dc78 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -15,13 +15,18 @@ const AvailableDeliveryLocationTypes = require('./available_delivery_location_ty const { parseParams } = require('../lib/util') -const ApiRequest = require('./api-request') -const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') -const { AGGREGATIONS_SPEC, EXCLUDE_FIELDS, SORT_FIELDS } = require('./elasticsearch/config') +const { AGGREGATIONS_SPEC, EXCLUDE_FIELDS } = require('./elasticsearch/config') const errors = require('./errors') const { esRangeValue, parseSearchParams, nyplSourceAndId, itemsByFilter } = require('./utils/resource-helpers') -const { bodyForFindByUri, addInnerHits, itemsFilterContext, itemsQueryContext } = require('./elasticsearch/elastic-body-builder') +const { + bodyForFindByUri, + addInnerHits, + itemsFilterContext, + itemsQueryContext, + buildElasticQuery, + buildElasticBody +} = require('./elasticsearch/elastic-body-builder') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -359,49 +364,3 @@ module.exports = function (app, _private = null) { _private.mergeAggregationsResponses = mergeAggregationsResponses } } - -/** - * Given GET params, returns a plainobject with `from`, `size`, `query`, - * `sort`, and any other params necessary to perform the ES query based - * on the GET params. - * - * @return {object} An object that can be posted directly to ES - */ -const buildElasticBody = function (params) { - const body = { - from: (params.per_page * (params.page - 1)), - size: params.per_page - } - - body.query = buildElasticQuery(params) - - // Apply sort: - let direction - let field - - if (params.sort === 'relevance') { - field = '_score' - direction = 'desc' - } else { - field = SORT_FIELDS[params.sort].field || params.sort - direction = params.sort_direction || SORT_FIELDS[params.sort].initialDirection - } - body.sort = [{ [field]: direction }, { uri: 'asc' }] - - return body -} - -/** - * Given GET params, returns a plainobject suitable for use in a ES query. - * - * @param {object} params - A hash of request params including `filters`, - * `search_scope`, `q` - * - * @return {object} ES query object suitable to be POST'd to ES endpoint - */ -const buildElasticQuery = function (params) { - const request = ApiRequest.fromParams(params) - - const builder = ElasticQueryBuilder.forApiRequest(request) - return builder.query.toJson() -} From 2f6c8875cca34e19d2b31ecb27f3fda093a36baf Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 15:24:08 -0500 Subject: [PATCH 08/36] Factor out body for search --- lib/elasticsearch/elastic-body-builder.js | 16 +++++++++++++++- lib/resources.js | 14 ++++---------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 77bcb210..e77ba2ff 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -367,6 +367,19 @@ const buildElasticBody = function (params) { return body } +const bodyForSearch = function (params) { + let body = buildElasticBody(params) + + // Strip unnecessary _source fields + body._source = { + excludes: EXCLUDE_FIELDS.concat(['items']) + } + + body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) + + return body +} + module.exports = { bodyForFindByUri, addInnerHits, @@ -375,5 +388,6 @@ module.exports = { itemStatusFilterWithUnavailableRecapItems, itemsQueryContext, buildElasticQuery, - buildElasticBody + buildElasticBody, + bodyForSearch } diff --git a/lib/resources.js b/lib/resources.js index fbe3dc78..18646abb 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -15,7 +15,7 @@ const AvailableDeliveryLocationTypes = require('./available_delivery_location_ty const { parseParams } = require('../lib/util') -const { AGGREGATIONS_SPEC, EXCLUDE_FIELDS } = require('./elasticsearch/config') +const { AGGREGATIONS_SPEC } = require('./elasticsearch/config') const errors = require('./errors') const { esRangeValue, parseSearchParams, nyplSourceAndId, itemsByFilter } = require('./utils/resource-helpers') @@ -25,7 +25,8 @@ const { itemsFilterContext, itemsQueryContext, buildElasticQuery, - buildElasticBody + buildElasticBody, + bodyForSearch } = require('./elasticsearch/elastic-body-builder') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -163,14 +164,7 @@ module.exports = function (app, _private = null) { app.logger.debug('Parsed params: ', params) - let body = buildElasticBody(params) - - // Strip unnecessary _source fields - body._source = { - excludes: EXCLUDE_FIELDS.concat(['items']) - } - - body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) + const body = bodyForSearch(params) app.logger.debug('Resources#search', RESOURCES_INDEX, body) From 1075d65e8dbbff84316dc071202d69887735265a Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 15:30:53 -0500 Subject: [PATCH 09/36] Move buildElasticAggregationsBody --- lib/elasticsearch/elastic-body-builder.js | 25 +++++++++++++++++++++-- lib/resources.js | 23 ++------------------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index e77ba2ff..57f87389 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -1,4 +1,4 @@ -const { EXCLUDE_FIELDS, ITEM_FILTER_AGGREGATIONS, SORT_FIELDS } = require('./config') +const { EXCLUDE_FIELDS, ITEM_FILTER_AGGREGATIONS, SORT_FIELDS, AGGREGATIONS_SPEC } = require('./config') const { deepValue } = require('../util') const { esRangeValue } = require('../utils/resource-helpers') const ApiRequest = require('../api-request') @@ -380,6 +380,26 @@ const bodyForSearch = function (params) { return body } +const buildElasticAggregationsBody = (params, aggregateProps) => { + // Add an `aggregations` entry to the ES body describing the aggretations + // we want. Set the `size` property to per_page (default 50) for each. + // https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-size + const aggregations = aggregateProps.reduce((aggs, prop) => { + aggs[prop] = AGGREGATIONS_SPEC[prop] + // Only set size for terms aggs for now: + if (aggs[prop].terms) { + aggs[prop].terms.size = params.per_page + } + return aggs + }, {}) + + const body = buildElasticBody(params) + body.size = 0 + body.aggregations = aggregations + + return body +} + module.exports = { bodyForFindByUri, addInnerHits, @@ -389,5 +409,6 @@ module.exports = { itemsQueryContext, buildElasticQuery, buildElasticBody, - bodyForSearch + bodyForSearch, + buildElasticAggregationsBody } diff --git a/lib/resources.js b/lib/resources.js index 18646abb..2fdd8e96 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -26,7 +26,8 @@ const { itemsQueryContext, buildElasticQuery, buildElasticBody, - bodyForSearch + bodyForSearch, + buildElasticAggregationsBody } = require('./elasticsearch/elastic-body-builder') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -207,26 +208,6 @@ module.exports = function (app, _private = null) { }) } - const buildElasticAggregationsBody = (params, aggregateProps) => { - // Add an `aggregations` entry to the ES body describing the aggretations - // we want. Set the `size` property to per_page (default 50) for each. - // https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-size - const aggregations = aggregateProps.reduce((aggs, prop) => { - aggs[prop] = AGGREGATIONS_SPEC[prop] - // Only set size for terms aggs for now: - if (aggs[prop].terms) { - aggs[prop].terms.size = params.per_page - } - return aggs - }, {}) - - const body = buildElasticBody(params) - body.size = 0 - body.aggregations = aggregations - - return body - } - /** * Given a params hash, returns an array of ES queries for fetching relevant aggregations. */ From a0237b707472f9e902f8daa316fad2d011a2c7e4 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 15:33:10 -0500 Subject: [PATCH 10/36] Move aggregationQueriesForParams to bodybuilder --- lib/elasticsearch/elastic-body-builder.js | 36 ++++++++++++++++++++++- lib/resources.js | 35 +--------------------- 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 57f87389..42a69ae7 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -400,6 +400,39 @@ const buildElasticAggregationsBody = (params, aggregateProps) => { return body } +/** +* Given a params hash, returns an array of ES queries for fetching relevant aggregations. +*/ +const aggregationQueriesForParams = (params) => { + // Build the complete set of distinct aggregation queries we need to run + // depending on active filters. We want: + // - one agg representing the counts for all properties _not_ used in filter + // - one agg each for each property that is used in a filter, but counts should exclude that filter + + // Build the standard aggregation: + const unfilteredAggregationProps = Object.keys(AGGREGATIONS_SPEC) + // Aggregate on all properties that aren't involved in filters: + .filter((prop) => !Object.keys(params.filters || {}).includes(prop)) + const queries = [buildElasticAggregationsBody(params, unfilteredAggregationProps)] + + // Now append all property-specific aggregation queries (one for each + // distinct property used in a filter): + return queries.concat( + Object.entries(params.filters || {}) + // Only consider filters that are also aggregations: + .filter(([prop, values]) => Object.keys(AGGREGATIONS_SPEC).includes(prop)) + .map(([prop, values]) => { + const aggFilters = structuredClone(params.filters) + // For this aggregation, don't filter on namesake property: + delete aggFilters[prop] + + // Build query for single aggregation: + const modifiedParams = Object.assign({}, params, { filters: aggFilters }) + return buildElasticAggregationsBody(modifiedParams, [prop]) + }) + ) +} + module.exports = { bodyForFindByUri, addInnerHits, @@ -410,5 +443,6 @@ module.exports = { buildElasticQuery, buildElasticBody, bodyForSearch, - buildElasticAggregationsBody + buildElasticAggregationsBody, + aggregationQueriesForParams } diff --git a/lib/resources.js b/lib/resources.js index 2fdd8e96..e4ecf72a 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -27,7 +27,7 @@ const { buildElasticQuery, buildElasticBody, bodyForSearch, - buildElasticAggregationsBody + aggregationQueriesForParams } = require('./elasticsearch/elastic-body-builder') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -208,39 +208,6 @@ module.exports = function (app, _private = null) { }) } - /** - * Given a params hash, returns an array of ES queries for fetching relevant aggregations. - */ - const aggregationQueriesForParams = (params) => { - // Build the complete set of distinct aggregation queries we need to run - // depending on active filters. We want: - // - one agg representing the counts for all properties _not_ used in filter - // - one agg each for each property that is used in a filter, but counts should exclude that filter - - // Build the standard aggregation: - const unfilteredAggregationProps = Object.keys(AGGREGATIONS_SPEC) - // Aggregate on all properties that aren't involved in filters: - .filter((prop) => !Object.keys(params.filters || {}).includes(prop)) - const queries = [buildElasticAggregationsBody(params, unfilteredAggregationProps)] - - // Now append all property-specific aggregation queries (one for each - // distinct property used in a filter): - return queries.concat( - Object.entries(params.filters || {}) - // Only consider filters that are also aggregations: - .filter(([prop, values]) => Object.keys(AGGREGATIONS_SPEC).includes(prop)) - .map(([prop, values]) => { - const aggFilters = structuredClone(params.filters) - // For this aggregation, don't filter on namesake property: - delete aggFilters[prop] - - // Build query for single aggregation: - const modifiedParams = Object.assign({}, params, { filters: aggFilters }) - return buildElasticAggregationsBody(modifiedParams, [prop]) - }) - ) - } - /** * Given an array of ES aggregations responses (such as that returned from msearch) **/ From 3340f5e4f13296e5545c66d1fbbe3b3930f8b903 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 15:34:51 -0500 Subject: [PATCH 11/36] Move mergeAggregationsResposes to utils --- lib/resources.js | 36 ++++++++--------------------------- lib/utils/resource-helpers.js | 30 ++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 29 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index e4ecf72a..5082fc1d 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -18,7 +18,14 @@ const { parseParams } = require('../lib/util') const { AGGREGATIONS_SPEC } = require('./elasticsearch/config') const errors = require('./errors') -const { esRangeValue, parseSearchParams, nyplSourceAndId, itemsByFilter } = require('./utils/resource-helpers') +const { + esRangeValue, + parseSearchParams, + nyplSourceAndId, + itemsByFilter, + mergeAggregationsResponses +} = require('./utils/resource-helpers') + const { bodyForFindByUri, addInnerHits, @@ -208,33 +215,6 @@ module.exports = function (app, _private = null) { }) } - /** - * Given an array of ES aggregations responses (such as that returned from msearch) - **/ - const mergeAggregationsResponses = (responses) => { - // Filter out errored responses: - responses = responses.filter((resp) => resp.aggregations) - if (responses.length === 0) { - return {} - } - return { - // Use `hits` of last element, somewhat arbitrarily: - hits: responses[responses.length - 1].hits, - aggregations: responses - .reduce((allAggs, resp) => { - const respAggs = Object.entries(resp.aggregations) - // Build hash of response aggs, squashing _nested aggs: - .reduce((a, [field, _a]) => { - // If it's nested, it will be in our special '_nested' prop: - a[field] = _a._nested || _a - return a - }, {}) - // Add response aggs to combined aggs: - return Object.assign(allAggs, respAggs) - }, {}) - } - } - // Get all aggregations: app.resources.aggregations = async (params, opts) => { params = parseSearchParams(params) diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js index e87d6e79..4b587a2d 100644 --- a/lib/utils/resource-helpers.js +++ b/lib/utils/resource-helpers.js @@ -115,9 +115,37 @@ function itemsByFilter (identifierValues, app) { }) } +/** +* Given an array of ES aggregations responses (such as that returned from msearch) +**/ +const mergeAggregationsResponses = (responses) => { + // Filter out errored responses: + responses = responses.filter((resp) => resp.aggregations) + if (responses.length === 0) { + return {} + } + return { + // Use `hits` of last element, somewhat arbitrarily: + hits: responses[responses.length - 1].hits, + aggregations: responses + .reduce((allAggs, resp) => { + const respAggs = Object.entries(resp.aggregations) + // Build hash of response aggs, squashing _nested aggs: + .reduce((a, [field, _a]) => { + // If it's nested, it will be in our special '_nested' prop: + a[field] = _a._nested || _a + return a + }, {}) + // Add response aggs to combined aggs: + return Object.assign(allAggs, respAggs) + }, {}) + } +} + module.exports = { esRangeValue, parseSearchParams, nyplSourceAndId, - itemsByFilter + itemsByFilter, + mergeAggregationsResponses } From e072d9336a8f3438c2cfaa94e98bfea711831bc3 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 15:37:30 -0500 Subject: [PATCH 12/36] Factor out body for aggregation --- lib/elasticsearch/elastic-body-builder.js | 20 +++++++++++++++++++- lib/resources.js | 18 ++++-------------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 42a69ae7..82688400 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -433,6 +433,23 @@ const aggregationQueriesForParams = (params) => { ) } +const bodyForAggregation = (params) => { + const body = buildElasticBody(params) + + // We're fetching aggs, so specify 0 resource results: + body.size = 0 + + body.aggregations = {} + body.aggregations[params.field] = AGGREGATIONS_SPEC[params.field] + + // If it's a terms agg, we can apply per_page: + if (body.aggregations[params.field].terms) { + body.aggregations[params.field].terms.size = params.per_page + } + + return body +} + module.exports = { bodyForFindByUri, addInnerHits, @@ -444,5 +461,6 @@ module.exports = { buildElasticBody, bodyForSearch, buildElasticAggregationsBody, - aggregationQueriesForParams + aggregationQueriesForParams, + bodyForAggregation } diff --git a/lib/resources.js b/lib/resources.js index 5082fc1d..da5048da 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -34,7 +34,8 @@ const { buildElasticQuery, buildElasticBody, bodyForSearch, - aggregationQueriesForParams + aggregationQueriesForParams, + bodyForAggregation } = require('./elasticsearch/elastic-body-builder') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -244,25 +245,14 @@ module.exports = function (app, _private = null) { return Promise.reject(new Error('Invalid aggregation field')) } - const body = buildElasticBody(params) - - // We're fetching aggs, so specify 0 resource results: - body.size = 0 - - body.aggregations = {} - body.aggregations[params.field] = AGGREGATIONS_SPEC[params.field] - - // If it's a terms agg, we can apply per_page: - if (body.aggregations[params.field].terms) { - body.aggregations[params.field].terms.size = params.per_page - } - const serializationOpts = Object.assign(opts, { // This tells the serializer what fields are "packed" fields, which should be split apart packed_fields: ['materialType', 'language', 'carrierType', 'mediaType', 'issuance', 'status', 'owner'], root: true }) + const body = bodyForAggregation(params) + app.logger.debug('Resources#aggregation:', body) return app.esClient.search(body) .then((resp) => { From 054d1e4eac144ec81d0436e625a5b5a1caad9caf Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 15:58:27 -0500 Subject: [PATCH 13/36] Move findByUri to async/await --- lib/resources.js | 61 ++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index da5048da..61584191 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -72,39 +72,34 @@ module.exports = function (app, _private = null) { // We only need to retrieve scsb statuses if building item aggs or // filtering on status: const retrieveScsbStatuses = params.include_item_aggregations || params.item_status - const scsbStatusLookup = retrieveScsbStatuses - ? scsbClient.getBarcodesByStatusForBnum(params.uri) - .catch((e) => { - app.logger.error(`Error connecting to SCSB; Unable to lookup barcodes for bib ${params.uri}`, e) - return {} - }) - : Promise.resolve({}) - - return scsbStatusLookup - .then(async (recapBarcodesByStatus) => { - const body = await bodyForFindByUri(recapBarcodesByStatus, params) - app.logger.debug('Resources#findByUri', body) - return app.esClient.search(body) - .then((resp) => { - // Mindfully throw errors for known issues: - if (!resp || !resp.hits) { - throw new Error('Error connecting to index') - } else if (resp?.hits?.total?.value === 0) { - throw new errors.NotFoundError(`Record not found: ${params.uri}`) - } else { - const massagedResponse = new ResponseMassager(resp) - return massagedResponse.massagedResponse(request, { queryRecapCustomerCode: !!params.itemUri, recapBarcodesByStatus }) - .catch((e) => { - // If error hitting HTC, just return response un-modified: - return resp - }) - } - }).then((resp) => { - const hitsAndItemAggregations = resp.hits.hits[0]._source - hitsAndItemAggregations.itemAggregations = resp.aggregations - return ResourceSerializer.serialize(hitsAndItemAggregations, Object.assign(opts, { root: true })) - }) - }) + let recapBarcodesByStatus = {} + if (retrieveScsbStatuses) { + try { + recapBarcodesByStatus = await scsbClient.getBarcodesByStatusForBnum(params.uri) + } catch (e) { + app.logger.error(`Error connecting to SCSB; Unable to lookup barcodes for bib ${params.uri}`, e) + } + } + + const body = await bodyForFindByUri(recapBarcodesByStatus, params) + app.logger.debug('Resources#findByUri', body) + let resp = await app.esClient.search(body) + // Mindfully throw errors for known issues: + if (!resp || !resp.hits) { + throw new Error('Error connecting to index') + } else if (resp?.hits?.total?.value === 0) { + throw new errors.NotFoundError(`Record not found: ${params.uri}`) + } else { + const massagedResponse = new ResponseMassager(resp) + try { + resp = await massagedResponse.massagedResponse(request, { queryRecapCustomerCode: !!params.itemUri, recapBarcodesByStatus }) + } catch (e) { + // If error hitting HTC, just return response un-modified: + } + const hitsAndItemAggregations = resp.hits.hits[0]._source + hitsAndItemAggregations.itemAggregations = resp.aggregations + return ResourceSerializer.serialize(hitsAndItemAggregations, Object.assign(opts, { root: true })) + } } // Get a single raw annotated-marc resource: From 2d60b153315198d9dd2627e603332789c260d051 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 16:01:04 -0500 Subject: [PATCH 14/36] Make annotatedMarc async --- lib/resources.js | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index 61584191..1a4a0f4d 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -110,16 +110,13 @@ module.exports = function (app, _private = null) { app.logger.debug('Resources#annotatedMarc', { id, nyplSource }) - return makeNyplDataApiClient().get(`bibs/${nyplSource}/${id}`) - .then((resp) => { - // need to check that the query actually found an entry - if (!resp.data) { - throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`) - } else { - return resp.data - } - }) - .then(AnnotatedMarcSerializer.serialize) + const resp = await makeNyplDataApiClient().get(`bibs/${nyplSource}/${id}`) + // need to check that the query actually found an entry + if (!resp.data) { + throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`) + } + + return await AnnotatedMarcSerializer.serialize(resp.data) } // Get deliveryLocations for given resource(s) From 2ec818a28c77159d25a9e6c2876dae4bd4abad9f Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 16:17:31 -0500 Subject: [PATCH 15/36] Make deliveryLocationsByBarcode async --- lib/resources.js | 46 +++++++++++++++-------------------- lib/utils/resource-helpers.js | 13 +++++++++- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index 1a4a0f4d..35f5fda1 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -11,7 +11,6 @@ const { IndexSearchError, IndexConnectionError } = require('./errors') const ResponseMassager = require('./response_massager.js') const DeliveryLocationsResolver = require('./delivery-locations-resolver') -const AvailableDeliveryLocationTypes = require('./available_delivery_location_types') const { parseParams } = require('../lib/util') @@ -23,7 +22,8 @@ const { parseSearchParams, nyplSourceAndId, itemsByFilter, - mergeAggregationsResponses + mergeAggregationsResponses, + lookupPatronType } = require('./utils/resource-helpers') const { @@ -120,7 +120,7 @@ module.exports = function (app, _private = null) { } // Get deliveryLocations for given resource(s) - app.resources.deliveryLocationsByBarcode = function (params, opts) { + app.resources.deliveryLocationsByBarcode = async function (params, opts) { params = parseParams(params, { barcodes: { type: 'string', repeatable: true }, patronId: { type: 'string' } @@ -129,33 +129,27 @@ module.exports = function (app, _private = null) { const identifierValues = barcodes.map((barcode) => `urn:barcode:${barcode}`) - // Create promise to resolve deliveryLocationTypes by patron type: - const lookupPatronType = AvailableDeliveryLocationTypes.getScholarRoomByPatronId(params.patronId) - .catch((e) => { - throw new errors.InvalidParameterError('Invalid patronId') - }) - // Create promise to resolve items: const fetchItems = itemsByFilter(identifierValues, app) // Run both item fetch and patron fetch in parallel: - return Promise.all([fetchItems, lookupPatronType]) - .then((resp) => { - // The resolved values of Promise.all are strictly ordered based on original array of promises - const items = resp[0] - const scholarRoom = resp[1] - - // Use HTC API and nypl-core mappings to ammend ES response with deliveryLocations: - return DeliveryLocationsResolver.attachDeliveryLocationsAndEddRequestability(items, scholarRoom) - .catch((e) => { - // An error here is likely an HTC API outage - // Let's return items unmodified: - // - app.logger.info({ message: 'Caught (and ignoring) error mapping barcodes to recap customer codes', htcError: e.message }) - return items - }) - }) - .then((items) => ItemResultsSerializer.serialize(items, opts)) + const [resp] = Promise.all([fetchItems, lookupPatronType]) + // The resolved values of Promise.all are strictly ordered based on original array of promises + let items = resp[0] + const scholarRoom = resp[1] + + // Use HTC API and nypl-core mappings to ammend ES response with deliveryLocations: + try { + items = await DeliveryLocationsResolver.attachDeliveryLocationsAndEddRequestability(items, scholarRoom) + } catch (e) { + // An error here is likely an HTC API outage + // Let's return items unmodified: + // + app.logger.info({ message: 'Caught (and ignoring) error mapping barcodes to recap customer codes', htcError: e.message }) + return items + } + items = await ItemResultsSerializer.serialize(items, opts) + return items } // Conduct a search across resources: diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js index 4b587a2d..52e68b45 100644 --- a/lib/utils/resource-helpers.js +++ b/lib/utils/resource-helpers.js @@ -3,6 +3,7 @@ const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC, SORT_FIELDS } = require const NyplSourceMapper = require('research-catalog-indexer/lib/utils/nypl-source-mapper') const errors = require('../errors') const LocationLabelUpdater = require('../location_label_updater') +const AvailableDeliveryLocationTypes = require('../available_delivery_location_types') /** * Given a range represented as an array, returns a corresponding ES range object @@ -142,10 +143,20 @@ const mergeAggregationsResponses = (responses) => { } } +// Create promise to resolve deliveryLocationTypes by patron type: +const lookupPatronType = async function (params) { + try { + await AvailableDeliveryLocationTypes.getScholarRoomByPatronId(params.patronId) + } catch (e) { + throw new errors.InvalidParameterError('Invalid patronId') + } +} + module.exports = { esRangeValue, parseSearchParams, nyplSourceAndId, itemsByFilter, - mergeAggregationsResponses + mergeAggregationsResponses, + lookupPatronType } From ba7fcf5a973ba89a4fc4089d1236f50a79cb63f6 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 11 Dec 2025 12:22:16 -0500 Subject: [PATCH 16/36] Pull search from promise chain in search --- lib/resources.js | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index 35f5fda1..fa26faeb 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -153,7 +153,7 @@ module.exports = function (app, _private = null) { } // Conduct a search across resources: - app.resources.search = function (params, opts, request) { + app.resources.search = async function (params, opts, request) { app.logger.debug('Unparsed params: ', params) params = parseSearchParams(params) @@ -163,7 +163,19 @@ module.exports = function (app, _private = null) { app.logger.debug('Resources#search', RESOURCES_INDEX, body) - return app.esClient.search(body) + let resp + + try { + resp = await app.esClient.search(body) + } catch (e) { + // Wrap ES client errors or any downstream error + if (e instanceof IndexSearchError || e instanceof IndexConnectionError) { + throw e // already a custom error + } + throw new IndexSearchError(`Error processing search: ${e.message || e}`) + } + + return Promise.resolve(resp) .then((resp) => { const massagedResponse = new ResponseMassager(resp) return massagedResponse.massagedResponse(request) @@ -193,13 +205,6 @@ module.exports = function (app, _private = null) { return resp }) }) - .catch((e) => { - // Wrap ES client errors or any downstream error - if (e instanceof IndexSearchError || e instanceof IndexConnectionError) { - throw e // already a custom error - } - throw new IndexSearchError(`Error processing search: ${e.message || e}`) - }) } // Get all aggregations: From 9189c1b7df2c837ac9373ba5e7b618aa86cc2f00 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 11 Dec 2025 12:24:34 -0500 Subject: [PATCH 17/36] Pull massaged response from promise chain in resources#search --- lib/resources.js | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index fa26faeb..3b8aaf06 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -175,14 +175,22 @@ module.exports = function (app, _private = null) { throw new IndexSearchError(`Error processing search: ${e.message || e}`) } + try { + const massagedResponse = new ResponseMassager(resp) + resp = await massagedResponse.massagedResponse(request) + } catch (e) { + + } + return Promise.resolve(resp) .then((resp) => { - const massagedResponse = new ResponseMassager(resp) - return massagedResponse.massagedResponse(request) - .catch((e) => { - // If error hitting HTC, just return response un-modified: - return resp - }) + // const massagedResponse = new ResponseMassager(resp) + // return massagedResponse.massagedResponse(request) + // .catch((e) => { + // // If error hitting HTC, just return response un-modified: + // return resp + // }) + return Promise.resolve(resp) .then((updatedResponse) => ResourceResultsSerializer.serialize(updatedResponse, opts)) .then((resp) => { // Build relevance report (for debugging): From 3297b630c6278a8e4fe9815c2302dcb258dce018 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 11 Dec 2025 12:25:43 -0500 Subject: [PATCH 18/36] Pull ResourceResultsSerializer.serialize from promise chain in search --- lib/resources.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/resources.js b/lib/resources.js index 3b8aaf06..f10da605 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -182,6 +182,8 @@ module.exports = function (app, _private = null) { } + resp = await ResourceResultsSerializer.serialize(resp, opts) + return Promise.resolve(resp) .then((resp) => { // const massagedResponse = new ResponseMassager(resp) @@ -191,7 +193,7 @@ module.exports = function (app, _private = null) { // return resp // }) return Promise.resolve(resp) - .then((updatedResponse) => ResourceResultsSerializer.serialize(updatedResponse, opts)) + .then((resp) => { // Build relevance report (for debugging): const relevanceReport = resp.itemListElement From 03d57dcdb88d53a48a7e4759970f12887f4eb030 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 11 Dec 2025 12:28:06 -0500 Subject: [PATCH 19/36] Remove nested promise --- lib/resources.js | 45 +++++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 28 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index f10da605..eb5c8aff 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -183,37 +183,26 @@ module.exports = function (app, _private = null) { } resp = await ResourceResultsSerializer.serialize(resp, opts) - return Promise.resolve(resp) .then((resp) => { - // const massagedResponse = new ResponseMassager(resp) - // return massagedResponse.massagedResponse(request) - // .catch((e) => { - // // If error hitting HTC, just return response un-modified: - // return resp - // }) - return Promise.resolve(resp) - - .then((resp) => { - // Build relevance report (for debugging): - const relevanceReport = resp.itemListElement - .map((r, ind) => { - const out = [] - out.push(`${ind + 1}: ${r.searchResultScore} score > ${r.result.uri}:`) - if (params.search_scope === 'contributor') out.push(`(${r.result.creatorLiteral || r.result.contributorLiteral})`) - if (['standard_number', 'callnumber'].includes(params.search_scope)) out.push(`(${r.result.items && r.result.items[0]?.shelfMark})`) - out.push(`${r.result.title} (displayed as "${r.result.titleDisplay}")`) - if (r.matchedQueries) out.push(`\n ${r.matchedQueries.join(', ')}`) - return out.join(' ') - }) - app.logger.debug(`Relevances:\n ${relevanceReport.join('\n')}`) - - resp.debug = { - relevanceReport, - query: body - } - return resp + // Build relevance report (for debugging): + const relevanceReport = resp.itemListElement + .map((r, ind) => { + const out = [] + out.push(`${ind + 1}: ${r.searchResultScore} score > ${r.result.uri}:`) + if (params.search_scope === 'contributor') out.push(`(${r.result.creatorLiteral || r.result.contributorLiteral})`) + if (['standard_number', 'callnumber'].includes(params.search_scope)) out.push(`(${r.result.items && r.result.items[0]?.shelfMark})`) + out.push(`${r.result.title} (displayed as "${r.result.titleDisplay}")`) + if (r.matchedQueries) out.push(`\n ${r.matchedQueries.join(', ')}`) + return out.join(' ') }) + app.logger.debug(`Relevances:\n ${relevanceReport.join('\n')}`) + + resp.debug = { + relevanceReport, + query: body + } + return resp }) } From 1c344696b011abe454032e5215b2b96c63b678da Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 11 Dec 2025 12:30:24 -0500 Subject: [PATCH 20/36] Remove promise from search --- lib/resources.js | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index eb5c8aff..6dbbb5ad 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -183,27 +183,24 @@ module.exports = function (app, _private = null) { } resp = await ResourceResultsSerializer.serialize(resp, opts) - return Promise.resolve(resp) - .then((resp) => { - // Build relevance report (for debugging): - const relevanceReport = resp.itemListElement - .map((r, ind) => { - const out = [] - out.push(`${ind + 1}: ${r.searchResultScore} score > ${r.result.uri}:`) - if (params.search_scope === 'contributor') out.push(`(${r.result.creatorLiteral || r.result.contributorLiteral})`) - if (['standard_number', 'callnumber'].includes(params.search_scope)) out.push(`(${r.result.items && r.result.items[0]?.shelfMark})`) - out.push(`${r.result.title} (displayed as "${r.result.titleDisplay}")`) - if (r.matchedQueries) out.push(`\n ${r.matchedQueries.join(', ')}`) - return out.join(' ') - }) - app.logger.debug(`Relevances:\n ${relevanceReport.join('\n')}`) - - resp.debug = { - relevanceReport, - query: body - } - return resp + + const relevanceReport = resp.itemListElement + .map((r, ind) => { + const out = [] + out.push(`${ind + 1}: ${r.searchResultScore} score > ${r.result.uri}:`) + if (params.search_scope === 'contributor') out.push(`(${r.result.creatorLiteral || r.result.contributorLiteral})`) + if (['standard_number', 'callnumber'].includes(params.search_scope)) out.push(`(${r.result.items && r.result.items[0]?.shelfMark})`) + out.push(`${r.result.title} (displayed as "${r.result.titleDisplay}")`) + if (r.matchedQueries) out.push(`\n ${r.matchedQueries.join(', ')}`) + return out.join(' ') }) + app.logger.debug(`Relevances:\n ${relevanceReport.join('\n')}`) + + resp.debug = { + relevanceReport, + query: body + } + return resp } // Get all aggregations: From 6e8af8df660e45689bd75793121b858ab089f358 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 11 Dec 2025 12:39:38 -0500 Subject: [PATCH 21/36] Factor out relevance report --- lib/resources.js | 15 ++++----------- lib/utils/resource-helpers.js | 13 ++++++++++++- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index 6dbbb5ad..b112f13a 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -23,7 +23,8 @@ const { nyplSourceAndId, itemsByFilter, mergeAggregationsResponses, - lookupPatronType + lookupPatronType, + makeRelevanceReport } = require('./utils/resource-helpers') const { @@ -183,17 +184,9 @@ module.exports = function (app, _private = null) { } resp = await ResourceResultsSerializer.serialize(resp, opts) - + const relevanceReport = resp.itemListElement - .map((r, ind) => { - const out = [] - out.push(`${ind + 1}: ${r.searchResultScore} score > ${r.result.uri}:`) - if (params.search_scope === 'contributor') out.push(`(${r.result.creatorLiteral || r.result.contributorLiteral})`) - if (['standard_number', 'callnumber'].includes(params.search_scope)) out.push(`(${r.result.items && r.result.items[0]?.shelfMark})`) - out.push(`${r.result.title} (displayed as "${r.result.titleDisplay}")`) - if (r.matchedQueries) out.push(`\n ${r.matchedQueries.join(', ')}`) - return out.join(' ') - }) + .map(makeRelevanceReport(params)) app.logger.debug(`Relevances:\n ${relevanceReport.join('\n')}`) resp.debug = { diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js index 52e68b45..9ba7ea9d 100644 --- a/lib/utils/resource-helpers.js +++ b/lib/utils/resource-helpers.js @@ -152,11 +152,22 @@ const lookupPatronType = async function (params) { } } +const makeRelevanceReport = (params) => (r, ind) => { + const out = [] + out.push(`${ind + 1}: ${r.searchResultScore} score > ${r.result.uri}:`) + if (params.search_scope === 'contributor') out.push(`(${r.result.creatorLiteral || r.result.contributorLiteral})`) + if (['standard_number', 'callnumber'].includes(params.search_scope)) out.push(`(${r.result.items && r.result.items[0]?.shelfMark})`) + out.push(`${r.result.title} (displayed as "${r.result.titleDisplay}")`) + if (r.matchedQueries) out.push(`\n ${r.matchedQueries.join(', ')}`) + return out.join(' ') +} + module.exports = { esRangeValue, parseSearchParams, nyplSourceAndId, itemsByFilter, mergeAggregationsResponses, - lookupPatronType + lookupPatronType, + makeRelevanceReport } From cf7edcdc0bf90907d00c9eb2d7da4db741a4de1d Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 11 Dec 2025 12:45:03 -0500 Subject: [PATCH 22/36] Make aggregation endpoint async --- lib/resources.js | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index b112f13a..062a0a28 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -217,7 +217,7 @@ module.exports = function (app, _private = null) { } // Get a single aggregation: - app.resources.aggregation = (params, opts) => { + app.resources.aggregation = async (params, opts) => { params = parseSearchParams(params, { per_page: { type: 'int', default: 50, range: [0, 1000] } }) @@ -234,13 +234,11 @@ module.exports = function (app, _private = null) { const body = bodyForAggregation(params) app.logger.debug('Resources#aggregation:', body) - return app.esClient.search(body) - .then((resp) => { - // If it's nested, it will be in our special '_nested' prop: - resp = resp.aggregations[params.field]._nested || resp.aggregations[params.field] - resp.id = params.field - return AggregationSerializer.serialize(resp, serializationOpts) - }) + + let resp = await app.esClient.search(body) + resp = resp.aggregations[params.field]._nested || resp.aggregations[params.field] + resp.id = params.field + return AggregationSerializer.serialize(resp, serializationOpts) } // For unit testing, export private methods if second arg given: From eefeb444be4f8e7fefc0fdf1c4648713f67b17f9 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 9 Jan 2026 15:23:56 -0500 Subject: [PATCH 23/36] Reorganize addInnerHits --- lib/elasticsearch/elastic-body-builder.js | 300 ++---------------- lib/elasticsearch/elastic-query-builder.js | 8 +- .../elastic-query-filter-builder.js | 257 +++++++++++++++ lib/elasticsearch/elastic-query.js | 8 +- test/resources.test.js | 9 +- 5 files changed, 299 insertions(+), 283 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 82688400..f2d357f4 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -1,6 +1,7 @@ const { EXCLUDE_FIELDS, ITEM_FILTER_AGGREGATIONS, SORT_FIELDS, AGGREGATIONS_SPEC } = require('./config') const { deepValue } = require('../util') const { esRangeValue } = require('../utils/resource-helpers') +const { innerHits, itemsQueryContext, itemsFilterContext } = require('./elastic-query-filter-builder') const ApiRequest = require('../api-request') const ElasticQueryBuilder = require('../elasticsearch/elastic-query-builder') @@ -13,263 +14,15 @@ const ElasticQueryBuilder = require('../elasticsearch/elastic-query-builder') * for limiting and paginating through items */ const addInnerHits = (body, _options = {}) => { - const options = Object.assign({ - size: process.env.SEARCH_ITEMS_SIZE || 200, - from: 0, - merge_checkin_card_items: true - }, _options) + const wrappedItemsQuery = innerHits(_options) - // Make sure necessary structure exists: - if (!deepValue(body, 'query.bool') && !deepValue(body, 'query.function_score.query.bool')) { - body.query = { bool: {} } - } - - // The place to add the filter depends on the query built to this point: - const placeToAddFilter = (body.query.bool || body.query.function_score.query.bool) - // Initialize filter object if it doesn't already exist: - placeToAddFilter.filter = placeToAddFilter.filter || [] - // If filter object already exists, convert it to array: - if (!Array.isArray(placeToAddFilter.filter)) placeToAddFilter.filter = [placeToAddFilter.filter] + const placeToAddFilter = body.query.bool - const itemsQuery = { - bool: Object.assign( - itemsQueryContext(options), - itemsFilterContext(options) - ) - } - - const wrappedItemsQuery = { - bool: { - should: [ - { - nested: { - path: 'items', - query: itemsQuery, - inner_hits: { - sort: [{ 'items.enumerationChronology_sort': 'desc' }], - size: options.size, - from: options.from, - name: 'items' - } - } - }, - // Add a catch-all to ensure we return the bib document even when - // numItems=0 or applied item filters exclude all items: - { match_all: {} } - ] - } - } placeToAddFilter.filter.push(wrappedItemsQuery) - // If there is any item query at all, run an additional inner_hits query - // to retrieve the total number of items without filtering: - if (itemsQuery.bool.filter) { - wrappedItemsQuery.bool.should.push({ - nested: { - path: 'items', - query: { - bool: { - must_not: [{ exists: { field: 'items.electronicLocator' } }] - } - }, - inner_hits: { name: 'allItems' } - } - }) - } - return body } -/** - * Given an object containing filters, - * returns content of the ES query filter context - * - * @param {object} options - An object with keys,value pairs of the form [filter_name]:[filter_value] - * @returns {object} - */ -const itemsFilterContext = (options) => { - if (!options.query) return {} - - const filterHandlers = { - volume: (volumes) => { - return { - range: { - 'items.volumeRange': esRangeValue(volumes) - } - } - }, - date: (dates) => { - return { - range: { - 'items.dateRange': esRangeValue(dates) - } - } - }, - format: (formats) => { - return { - terms: { - 'items.formatLiteral': formats - } - } - }, - location: (locations) => { - return { - terms: { - 'items.holdingLocation.id': locations - } - } - }, - status: (statuses) => { - // Determine if all possible ReCAP statuses were selected: - const selectedRecapStatuses = recapStatuses(statuses) - - if (selectedRecapStatuses.length === 1 && - Array.isArray(options.unavailable_recap_barcodes) && - options.unavailable_recap_barcodes.length > 0) { - // There are known unavailable ReCAP items, so build a complicated - // filter clause with appropriate barcode overrides: - return itemStatusFilterWithUnavailableRecapItems(statuses, options.unavailable_recap_barcodes) - } else { - // If there are no known unavailable ReCAP items, just do a straight - // status match: - return { - terms: { - 'items.status.id': statuses - } - } - } - }, - itemUri: (uri) => { - return { term: { 'items.uri': uri } } - } - } - - const filters = Object.keys(options.query).map((filter) => { - const value = options.query[filter] - const handler = filterHandlers[filter] - return value && handler ? handler(value) : null - }).filter((x) => x) - - return filters.length - ? { filter: filters } - : {} -} - -/** - * Given an array of status ids (e.g. "status:a", "status:na") returns the - * subset of statuses that are relevant in ReCAP - */ -const recapStatuses = (statuses) => { - return statuses - .filter((status) => ['status:a', 'status:na'].includes(status)) -} - -/** - * Builds a big complicated ES filter to allow us to filter items by status, - * but override the indexed status for ReCAP items with statuses retrieved - * from SCSB. This corrects for the fact that ReCAP item statuses tend to be - * wrong in the ES index: - * - partner items are indexed as Available and remain thus forever - * - NYPL item statuses _should_ equal SCSB status, but the mechanism - * for keeping them synced isn't perfect and operates on a delay - * - * @param {string[]} statuses - An array of statuses to filter on - * @param {string[]} unavailableRecapBarcodes - An array of item barcodes - * known to be unavailble - * - * Returns an ES filter that matches the desired statuses, but also uses - * the known unavailable items to override indexed item statuses for ReCAP - * items (because ReCAP is the authority for status of off-site items). - * Essentially, the criteria is for matching an item is: - * - * - if on-site (non-ReCAP): - * - has a matching indexed status - * - if off-site: - * - if filtering on status:na - * - item barcode must be in unavailableRecapBarcodes - * - if filtering on status:a: - * - item barcode must NOT be in unavailableRecapBarcodes - */ -const itemStatusFilterWithUnavailableRecapItems = (statuses, unavailableRecapBarcodes) => { - // First, let's set up some common clauses: - - // Item is in ReCAP: - const itemIsRecapClause = { - regexp: { 'items.holdingLocation.id': 'loc:rc.*' } - } - // Item's indexed status matches one of the filtered statuses: - const itemHasIndexedStatusClause = { - terms: { 'items.status.id': statuses } - } - // Item is marked Unavailable in SCSB: - const itemIsUnavailableInRecapClause = { - script: { - script: { - inline: 'doc[\'items.idBarcode\'].value == null || ' + - 'params.unavailableRecapBarcodes.contains(doc[\'items.idBarcode\'][0])', - lang: 'painless', - params: { unavailableRecapBarcodes } - } - } - } - // This function is only called if `statuses` param contains a single - // ReCAP-relevant status (i.e. status:a or status:na), so determine which - // ReCAP status to use: - const selectedRecapStatus = recapStatuses(statuses).shift() - // Item's ReCAP status agrees with filter: - const itemRecapStatusAgreesWithFilterClause = - selectedRecapStatus === 'status:na' - ? itemIsUnavailableInRecapClause - : { bool: { must_not: itemIsUnavailableInRecapClause } } - - return { - bool: { - should: [ - // Either 1) item is on-site and has correctly indexed status: - { - bool: { - must: [ - // Item is on-site (i.e. not recap): - { bool: { must_not: itemIsRecapClause } }, - // Item indexed status matches filter: - itemHasIndexedStatusClause - ] - } - }, - // Or 2) item is off-site and has a scsb status that agrees with the - // filter (e.g. if filtering on status:na, scsb marks the barcode as - // 'Not Available') - { - bool: { - must: [ - // Item is off-site: - JSON.parse(JSON.stringify(itemIsRecapClause)), - // Item is not marked unavailable - itemRecapStatusAgreesWithFilterClause - ] - } - } - ] - } - } -} - -/** - * Given an object containing query options, - * returns content of the ES query context - * - * @param {object} options - An object with request options. `merge_checkin_card_items` is the only one - * that matters right now - * @returns {object} - */ -const itemsQueryContext = (options) => { - const excludeClauses = [] - - if (!options.merge_checkin_card_items) excludeClauses.push({ term: { 'items.type': 'nypl:CheckinCardItem' } }) - - return excludeClauses.length ? { must_not: excludeClauses } : { must: { match_all: {} } } -} - const bodyForFindByUri = async function (recapBarcodesByStatus, params) { // Establish base query: let body = { @@ -285,7 +38,8 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { uri: params.uri } } - ] + ], + filter: [] } } } @@ -310,6 +64,7 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { }, unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] } + body.filter = [] body = addInnerHits(body, itemsOptions) body._source = { excludes: EXCLUDE_FIELDS.concat(['items']) @@ -329,10 +84,10 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { * * @return {object} ES query object suitable to be POST'd to ES endpoint */ -const buildElasticQuery = function (params) { +const buildElasticQuery = function (params, options = {}) { const request = ApiRequest.fromParams(params) - const builder = ElasticQueryBuilder.forApiRequest(request) + const builder = ElasticQueryBuilder.forApiRequest(request, options) return builder.query.toJson() } @@ -343,13 +98,13 @@ const buildElasticQuery = function (params) { * * @return {object} An object that can be posted directly to ES */ -const buildElasticBody = function (params) { +const buildElasticBody = function (params, options = {}) { const body = { from: (params.per_page * (params.page - 1)), size: params.per_page } - body.query = buildElasticQuery(params) + body.query = buildElasticQuery(params, options) // Apply sort: let direction @@ -368,7 +123,7 @@ const buildElasticBody = function (params) { } const bodyForSearch = function (params) { - let body = buildElasticBody(params) + let body = buildElasticBody(params, { items: true }) // Strip unnecessary _source fields body._source = { @@ -393,11 +148,10 @@ const buildElasticAggregationsBody = (params, aggregateProps) => { return aggs }, {}) - const body = buildElasticBody(params) - body.size = 0 - body.aggregations = aggregations - - return body + return Object.assign( + buildElasticBody(params), + { size: 0, aggregations: aggregations } + ) } /** @@ -434,19 +188,23 @@ const aggregationQueriesForParams = (params) => { } const bodyForAggregation = (params) => { - const body = buildElasticBody(params) - - // We're fetching aggs, so specify 0 resource results: - body.size = 0 - - body.aggregations = {} - body.aggregations[params.field] = AGGREGATIONS_SPEC[params.field] + const aggregations = {} + aggregations[params.field] = AGGREGATIONS_SPEC[params.field] // If it's a terms agg, we can apply per_page: - if (body.aggregations[params.field].terms) { - body.aggregations[params.field].terms.size = params.per_page + if (aggregations[params.field].terms) { + aggregations[params.field].terms.size = params.per_page } + const body = Object.assign( + buildElasticBody(params), + { + size: 0, + aggregations: aggregations + } + ) + + return body } @@ -454,8 +212,6 @@ module.exports = { bodyForFindByUri, addInnerHits, itemsFilterContext, - recapStatuses, - itemStatusFilterWithUnavailableRecapItems, itemsQueryContext, buildElasticQuery, buildElasticBody, diff --git a/lib/elasticsearch/elastic-query-builder.js b/lib/elasticsearch/elastic-query-builder.js index d561795a..97fe7018 100644 --- a/lib/elasticsearch/elastic-query-builder.js +++ b/lib/elasticsearch/elastic-query-builder.js @@ -11,9 +11,9 @@ const POPULARITY_BOOSTS = [ ] class ElasticQueryBuilder { - constructor (apiRequest) { + constructor (apiRequest, options = {}) { this.request = apiRequest - this.query = new ElasticQuery() + this.query = new ElasticQuery(options) // Break on search_scope: switch (this.request.params.search_scope) { @@ -706,8 +706,8 @@ class ElasticQueryBuilder { /** * Create a ElasticQueryBuilder for given ApiRequest instance */ - static forApiRequest (request) { - return new ElasticQueryBuilder(request) + static forApiRequest (request, options = {}) { + return new ElasticQueryBuilder(request, options) } } diff --git a/lib/elasticsearch/elastic-query-filter-builder.js b/lib/elasticsearch/elastic-query-filter-builder.js index e69de29b..a7d3e0bb 100644 --- a/lib/elasticsearch/elastic-query-filter-builder.js +++ b/lib/elasticsearch/elastic-query-filter-builder.js @@ -0,0 +1,257 @@ +const { esRangeValue } = require('../utils/resource-helpers') + +/** + * Given an object containing filters, + * returns content of the ES query filter context + * + * @param {object} options - An object with keys,value pairs of the form [filter_name]:[filter_value] + * @returns {object} + */ +const itemsFilterContext = (options) => { + if (!options.query) return {} + + const filterHandlers = { + volume: (volumes) => { + return { + range: { + 'items.volumeRange': esRangeValue(volumes) + } + } + }, + date: (dates) => { + return { + range: { + 'items.dateRange': esRangeValue(dates) + } + } + }, + format: (formats) => { + return { + terms: { + 'items.formatLiteral': formats + } + } + }, + location: (locations) => { + return { + terms: { + 'items.holdingLocation.id': locations + } + } + }, + status: (statuses) => { + // Determine if all possible ReCAP statuses were selected: + const selectedRecapStatuses = recapStatuses(statuses) + + if (selectedRecapStatuses.length === 1 && + Array.isArray(options.unavailable_recap_barcodes) && + options.unavailable_recap_barcodes.length > 0) { + // There are known unavailable ReCAP items, so build a complicated + // filter clause with appropriate barcode overrides: + return itemStatusFilterWithUnavailableRecapItems(statuses, options.unavailable_recap_barcodes) + } else { + // If there are no known unavailable ReCAP items, just do a straight + // status match: + return { + terms: { + 'items.status.id': statuses + } + } + } + }, + itemUri: (uri) => { + return { term: { 'items.uri': uri } } + } + } + + const filters = Object.keys(options.query).map((filter) => { + const value = options.query[filter] + const handler = filterHandlers[filter] + return value && handler ? handler(value) : null + }).filter((x) => x) + + return filters.length + ? { filter: filters } + : {} +} + +/** + * Given an array of status ids (e.g. "status:a", "status:na") returns the + * subset of statuses that are relevant in ReCAP + */ +const recapStatuses = (statuses) => { + return statuses + .filter((status) => ['status:a', 'status:na'].includes(status)) +} + +/** + * Builds a big complicated ES filter to allow us to filter items by status, + * but override the indexed status for ReCAP items with statuses retrieved + * from SCSB. This corrects for the fact that ReCAP item statuses tend to be + * wrong in the ES index: + * - partner items are indexed as Available and remain thus forever + * - NYPL item statuses _should_ equal SCSB status, but the mechanism + * for keeping them synced isn't perfect and operates on a delay + * + * @param {string[]} statuses - An array of statuses to filter on + * @param {string[]} unavailableRecapBarcodes - An array of item barcodes + * known to be unavailble + * + * Returns an ES filter that matches the desired statuses, but also uses + * the known unavailable items to override indexed item statuses for ReCAP + * items (because ReCAP is the authority for status of off-site items). + * Essentially, the criteria is for matching an item is: + * + * - if on-site (non-ReCAP): + * - has a matching indexed status + * - if off-site: + * - if filtering on status:na + * - item barcode must be in unavailableRecapBarcodes + * - if filtering on status:a: + * - item barcode must NOT be in unavailableRecapBarcodes + */ +const itemStatusFilterWithUnavailableRecapItems = (statuses, unavailableRecapBarcodes) => { + // First, let's set up some common clauses: + + // Item is in ReCAP: + const itemIsRecapClause = { + regexp: { 'items.holdingLocation.id': 'loc:rc.*' } + } + // Item's indexed status matches one of the filtered statuses: + const itemHasIndexedStatusClause = { + terms: { 'items.status.id': statuses } + } + // Item is marked Unavailable in SCSB: + const itemIsUnavailableInRecapClause = { + script: { + script: { + inline: 'doc[\'items.idBarcode\'].value == null || ' + + 'params.unavailableRecapBarcodes.contains(doc[\'items.idBarcode\'][0])', + lang: 'painless', + params: { unavailableRecapBarcodes } + } + } + } + // This function is only called if `statuses` param contains a single + // ReCAP-relevant status (i.e. status:a or status:na), so determine which + // ReCAP status to use: + const selectedRecapStatus = recapStatuses(statuses).shift() + // Item's ReCAP status agrees with filter: + const itemRecapStatusAgreesWithFilterClause = + selectedRecapStatus === 'status:na' + ? itemIsUnavailableInRecapClause + : { bool: { must_not: itemIsUnavailableInRecapClause } } + + return { + bool: { + should: [ + // Either 1) item is on-site and has correctly indexed status: + { + bool: { + must: [ + // Item is on-site (i.e. not recap): + { bool: { must_not: itemIsRecapClause } }, + // Item indexed status matches filter: + itemHasIndexedStatusClause + ] + } + }, + // Or 2) item is off-site and has a scsb status that agrees with the + // filter (e.g. if filtering on status:na, scsb marks the barcode as + // 'Not Available') + { + bool: { + must: [ + // Item is off-site: + JSON.parse(JSON.stringify(itemIsRecapClause)), + // Item is not marked unavailable + itemRecapStatusAgreesWithFilterClause + ] + } + } + ] + } + } +} + + +/** + * Given an object containing query options, + * returns content of the ES query context + * + * @param {object} options - An object with request options. `merge_checkin_card_items` is the only one + * that matters right now + * @returns {object} + */ +const itemsQueryContext = (options) => { + const excludeClauses = [] + + if (!options.merge_checkin_card_items) excludeClauses.push({ term: { 'items.type': 'nypl:CheckinCardItem' } }) + + return excludeClauses.length ? { must_not: excludeClauses } : { must: { match_all: {} } } +} + +const innerHits = (_options = {}) => { + const options = Object.assign({ + size: process.env.SEARCH_ITEMS_SIZE || 200, + from: 0, + merge_checkin_card_items: true + }, _options) + + // const placeToAddFilter = body.query.bool + + + // If there is any item query at all, run an additional inner_hits query + // to retrieve the total number of items without filtering: + const itemsQuery = { + bool: Object.assign( + itemsQueryContext(options), + itemsFilterContext(options) + ) + } + + const allItemsQuery = itemsQuery.bool.filter ? + [{ + nested: { + path: 'items', + query: { + bool: { + must_not: [{ exists: { field: 'items.electronicLocator' } }] + } + }, + inner_hits: { name: 'allItems' } + } + }] : + [] + + const wrappedItemsQuery = { + bool: { + should: [ + { + nested: { + path: 'items', + query: itemsQuery, + inner_hits: { + sort: [{ 'items.enumerationChronology_sort': 'desc' }], + size: options.size, + from: options.from, + name: 'items' + } + } + }, + // Add a catch-all to ensure we return the bib document even when + // numItems=0 or applied item filters exclude all items: + { match_all: {} }, + ...allItemsQuery + ] + } + } + + return wrappedItemsQuery +} + +module.exports = { + innerHits, + itemsQueryContext, + itemsFilterContext +} diff --git a/lib/elasticsearch/elastic-query.js b/lib/elasticsearch/elastic-query.js index aa2d2f1e..82edb0f6 100644 --- a/lib/elasticsearch/elastic-query.js +++ b/lib/elasticsearch/elastic-query.js @@ -7,10 +7,11 @@ **/ class ElasticQuery { - constructor () { + constructor (options = {}) { this.musts = [] this.shoulds = [] this.filters = [] + this.options = options } addMust (clause) { @@ -42,7 +43,8 @@ class ElasticQuery { * "query" param in a ES call */ toJson () { - if (!this.musts.length && !this.shoulds.length && !this.filters.length) { + console.log('options: ', this.options) + if (!this.musts.length && !this.shoulds.length && !this.filters.length && !this.options.items) { return { match_all: {} } @@ -56,7 +58,7 @@ class ElasticQuery { if (this.shoulds.length) { result.bool.should = this.shoulds } - if (this.filters.length) { + if (this.filters.length || this.options.items) { result.bool.filter = this.filters } diff --git a/test/resources.test.js b/test/resources.test.js index 056a2926..c91b5d37 100644 --- a/test/resources.test.js +++ b/test/resources.test.js @@ -466,7 +466,8 @@ describe('Resources query', function () { size: 1, query: { bool: { - must: [{ term: { uri: 'b1234' } }] + must: [{ term: { uri: 'b1234' } }], + filter: [] } }, aggregations: { @@ -724,7 +725,7 @@ describe('Resources query', function () { describe('addInnerHits', () => { it('should include query for items', () => { - expect(resourcesPrivMethods.addInnerHits({ query: { bool: {} } }, { size: 1, from: 2 })) + expect(resourcesPrivMethods.addInnerHits({ query: { bool: { filter: [] } } }, { size: 1, from: 2 })) .to.deep.equal({ query: { bool: { @@ -761,7 +762,7 @@ describe('Resources query', function () { }) it('should exclude check in card items if explicitly set', () => { - expect(resourcesPrivMethods.addInnerHits({ query: { bool: {} } }, { size: 1, from: 2, merge_checkin_card_items: false })) + expect(resourcesPrivMethods.addInnerHits({ query: { bool: { filter: [] } } }, { size: 1, from: 2, merge_checkin_card_items: false })) .to.deep.equal({ query: { bool: { @@ -799,7 +800,7 @@ describe('Resources query', function () { it('should include filters for items', () => { expect(resourcesPrivMethods.addInnerHits( - { query: { bool: {} } }, + { query: { bool: { filter: [] } } }, { size: 1, from: 2, query: { volume: [1, 2], location: ['SASB', 'LPA'], other: 'filter' } } )).to.deep.equal({ query: { From d26553b4749c31ae07a24cf7df60f0e3cbc2f6bb Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 9 Jan 2026 15:43:32 -0500 Subject: [PATCH 24/36] Reorganize bodyForFindByUri except for innerHits --- lib/elasticsearch/elastic-body-builder.js | 63 +++++++++++++---------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index f2d357f4..5f236f4b 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -24,10 +24,37 @@ const addInnerHits = (body, _options = {}) => { } const bodyForFindByUri = async function (recapBarcodesByStatus, params) { + const paramsIncludesItemLevelFiltering = Object.keys(params) + .filter((param) => param.startsWith('item_')).length > 0 + + const returnAllItems = params.all_items && !paramsIncludesItemLevelFiltering + + const excludes = returnAllItems ? EXCLUDE_FIELDS.filter((field) => field !== '*_sort') : EXCLUDE_FIELDS.concat(['items']) + + + const aggregations = params.include_item_aggregations ? + { aggregations: ITEM_FILTER_AGGREGATIONS } : + {} + + const itemsOptions = { + size: params.items_size, + from: params.items_from, + merge_checkin_card_items: params.merge_checkin_card_items, + query: { + volume: params.item_volume, + date: params.item_date, + format: params.item_format, + location: params.item_location, + status: params.item_status, + itemUri: params.itemUri + }, + unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] + } + // Establish base query: let body = { _source: { - excludes: EXCLUDE_FIELDS + excludes: excludes }, size: 1, query: { @@ -41,38 +68,22 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { ], filter: [] } - } + }, + ...aggregations } - const paramsIncludesItemLevelFiltering = Object.keys(params) - .filter((param) => param.startsWith('item_')).length > 0 - const returnAllItems = params.all_items && !paramsIncludesItemLevelFiltering + if (returnAllItems) { - body._source.excludes = EXCLUDE_FIELDS.filter((field) => field !== '*_sort') + // body._source.excludes = EXCLUDE_FIELDS.filter((field) => field !== '*_sort') } else { // No specific item requested, so add pagination and matching params: - const itemsOptions = { - size: params.items_size, - from: params.items_from, - merge_checkin_card_items: params.merge_checkin_card_items, - query: { - volume: params.item_volume, - date: params.item_date, - format: params.item_format, - location: params.item_location, - status: params.item_status, - itemUri: params.itemUri - }, - unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] - } + body.filter = [] body = addInnerHits(body, itemsOptions) - body._source = { - excludes: EXCLUDE_FIELDS.concat(['items']) - } - } - if (params.include_item_aggregations) { - body.aggregations = ITEM_FILTER_AGGREGATIONS + // body._source = { + // excludes: EXCLUDE_FIELDS.concat(['items']) + // } } + return body } From 7429ad6ed44fcf83ce1e18a644d4ad08aa3f3bb0 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 9 Jan 2026 15:54:31 -0500 Subject: [PATCH 25/36] Remove addInnerHits from findByUri --- lib/elasticsearch/elastic-body-builder.js | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 5f236f4b..077ae8f2 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -51,6 +51,10 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] } + const filter = returnAllItems ? {} : { filter: []} + + const queryFilter = { filter: !returnAllItems ? [innerHits(itemsOptions)] : [] } + // Establish base query: let body = { _source: { @@ -66,24 +70,13 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { } } ], - filter: [] + ...queryFilter } }, + ...filter, ...aggregations } - if (returnAllItems) { - // body._source.excludes = EXCLUDE_FIELDS.filter((field) => field !== '*_sort') - } else { - // No specific item requested, so add pagination and matching params: - - body.filter = [] - body = addInnerHits(body, itemsOptions) - // body._source = { - // excludes: EXCLUDE_FIELDS.concat(['items']) - // } - } - return body } From b386ec1dc17c68b6d808a8df6af68126ef39bbc0 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 9 Jan 2026 16:11:16 -0500 Subject: [PATCH 26/36] Reorganize buildElasticBody --- lib/elasticsearch/elastic-body-builder.js | 24 ++++++++--------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 077ae8f2..f285f0f2 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -103,13 +103,6 @@ const buildElasticQuery = function (params, options = {}) { * @return {object} An object that can be posted directly to ES */ const buildElasticBody = function (params, options = {}) { - const body = { - from: (params.per_page * (params.page - 1)), - size: params.per_page - } - - body.query = buildElasticQuery(params, options) - // Apply sort: let direction let field @@ -121,9 +114,13 @@ const buildElasticBody = function (params, options = {}) { field = SORT_FIELDS[params.sort].field || params.sort direction = params.sort_direction || SORT_FIELDS[params.sort].initialDirection } - body.sort = [{ [field]: direction }, { uri: 'asc' }] - return body + return { + from: (params.per_page * (params.page - 1)), + size: params.per_page, + query: buildElasticQuery(params, options), + sort: [{ [field]: direction }, { uri: 'asc' }] + } } const bodyForSearch = function (params) { @@ -134,9 +131,7 @@ const bodyForSearch = function (params) { excludes: EXCLUDE_FIELDS.concat(['items']) } - body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) - - return body + return addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) } const buildElasticAggregationsBody = (params, aggregateProps) => { @@ -200,16 +195,13 @@ const bodyForAggregation = (params) => { aggregations[params.field].terms.size = params.per_page } - const body = Object.assign( + return Object.assign( buildElasticBody(params), { size: 0, aggregations: aggregations } ) - - - return body } module.exports = { From 36d56c55864a1baaaf2d24860f9a3afbe2fb0161 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 13 Jan 2026 10:56:19 -0500 Subject: [PATCH 27/36] Fix linting --- lib/elasticsearch/elastic-body-builder.js | 21 ++++++------- .../elastic-query-filter-builder.js | 30 ++++++++----------- 2 files changed, 22 insertions(+), 29 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index f285f0f2..e12eb9b1 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -1,6 +1,4 @@ const { EXCLUDE_FIELDS, ITEM_FILTER_AGGREGATIONS, SORT_FIELDS, AGGREGATIONS_SPEC } = require('./config') -const { deepValue } = require('../util') -const { esRangeValue } = require('../utils/resource-helpers') const { innerHits, itemsQueryContext, itemsFilterContext } = require('./elastic-query-filter-builder') const ApiRequest = require('../api-request') const ElasticQueryBuilder = require('../elasticsearch/elastic-query-builder') @@ -31,10 +29,9 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { const excludes = returnAllItems ? EXCLUDE_FIELDS.filter((field) => field !== '*_sort') : EXCLUDE_FIELDS.concat(['items']) - - const aggregations = params.include_item_aggregations ? - { aggregations: ITEM_FILTER_AGGREGATIONS } : - {} + const aggregations = params.include_item_aggregations + ? { aggregations: ITEM_FILTER_AGGREGATIONS } + : {} const itemsOptions = { size: params.items_size, @@ -51,14 +48,14 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] } - const filter = returnAllItems ? {} : { filter: []} + const filter = returnAllItems ? {} : { filter: [] } const queryFilter = { filter: !returnAllItems ? [innerHits(itemsOptions)] : [] } // Establish base query: - let body = { + const body = { _source: { - excludes: excludes + excludes }, size: 1, query: { @@ -124,7 +121,7 @@ const buildElasticBody = function (params, options = {}) { } const bodyForSearch = function (params) { - let body = buildElasticBody(params, { items: true }) + const body = buildElasticBody(params, { items: true }) // Strip unnecessary _source fields body._source = { @@ -149,7 +146,7 @@ const buildElasticAggregationsBody = (params, aggregateProps) => { return Object.assign( buildElasticBody(params), - { size: 0, aggregations: aggregations } + { size: 0, aggregations } ) } @@ -199,7 +196,7 @@ const bodyForAggregation = (params) => { buildElasticBody(params), { size: 0, - aggregations: aggregations + aggregations } ) } diff --git a/lib/elasticsearch/elastic-query-filter-builder.js b/lib/elasticsearch/elastic-query-filter-builder.js index a7d3e0bb..5149cde8 100644 --- a/lib/elasticsearch/elastic-query-filter-builder.js +++ b/lib/elasticsearch/elastic-query-filter-builder.js @@ -174,7 +174,6 @@ const itemStatusFilterWithUnavailableRecapItems = (statuses, unavailableRecapBar } } - /** * Given an object containing query options, * returns content of the ES query context @@ -198,9 +197,6 @@ const innerHits = (_options = {}) => { merge_checkin_card_items: true }, _options) - // const placeToAddFilter = body.query.bool - - // If there is any item query at all, run an additional inner_hits query // to retrieve the total number of items without filtering: const itemsQuery = { @@ -210,19 +206,19 @@ const innerHits = (_options = {}) => { ) } - const allItemsQuery = itemsQuery.bool.filter ? - [{ - nested: { - path: 'items', - query: { - bool: { - must_not: [{ exists: { field: 'items.electronicLocator' } }] - } - }, - inner_hits: { name: 'allItems' } - } - }] : - [] + const allItemsQuery = itemsQuery.bool.filter + ? [{ + nested: { + path: 'items', + query: { + bool: { + must_not: [{ exists: { field: 'items.electronicLocator' } }] + } + }, + inner_hits: { name: 'allItems' } + } + }] + : [] const wrappedItemsQuery = { bool: { From d8249acf0ee9a514af7b63ccb18354365c64f813 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 13 Jan 2026 11:20:18 -0500 Subject: [PATCH 28/36] Remove adding source to body in body for search --- lib/elasticsearch/elastic-body-builder.js | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index e12eb9b1..924b72c8 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -121,12 +121,14 @@ const buildElasticBody = function (params, options = {}) { } const bodyForSearch = function (params) { - const body = buildElasticBody(params, { items: true }) - - // Strip unnecessary _source fields - body._source = { - excludes: EXCLUDE_FIELDS.concat(['items']) - } + const body = Object.assign( + buildElasticBody(params, { items: true }), + { + _source: { + excludes: EXCLUDE_FIELDS.concat(['items']) + } + } + ) return addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) } From 9353887cce2f03393712f44470fecc488192dffc Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 15 Jan 2026 14:50:29 -0500 Subject: [PATCH 29/36] Add innerHits options --- lib/elasticsearch/elastic-body-builder.js | 3 ++- lib/elasticsearch/elastic-query-builder.js | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 924b72c8..f4b9f9af 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -121,8 +121,9 @@ const buildElasticBody = function (params, options = {}) { } const bodyForSearch = function (params) { + const itemsOptions = { merge_checkin_card_items: params.merge_checkin_card_items } const body = Object.assign( - buildElasticBody(params, { items: true }), + buildElasticBody(params, { items: itemsOptions }), { _source: { excludes: EXCLUDE_FIELDS.concat(['items']) diff --git a/lib/elasticsearch/elastic-query-builder.js b/lib/elasticsearch/elastic-query-builder.js index 97fe7018..f65dbd74 100644 --- a/lib/elasticsearch/elastic-query-builder.js +++ b/lib/elasticsearch/elastic-query-builder.js @@ -2,6 +2,7 @@ const ElasticQuery = require('./elastic-query') const ApiRequest = require('../api-request') const { escapeQuery, namedQuery, prefixMatch, termMatch, phraseMatch } = require('./utils') const { regexEscape } = require('../util') +const { innerHits } = require('./elastic-query-filter-builder') const { FILTER_CONFIG, SEARCH_SCOPES } = require('./config') @@ -41,6 +42,10 @@ class ElasticQueryBuilder { this.buildAllQuery() } + if (options.items) { + this.query.addFilter(innerHits(options.items)) + } + // Add user filters: this.applyFilters() From 15a5e5ca29e40747876b751ff14f54e2d757252c Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 15 Jan 2026 16:19:09 -0500 Subject: [PATCH 30/36] Remove dependence on addInnerHits --- lib/elasticsearch/elastic-body-builder.js | 8 +++++--- lib/elasticsearch/elastic-query-builder.js | 8 ++++---- lib/elasticsearch/elastic-query-filter-builder.js | 1 + lib/elasticsearch/elastic-query.js | 1 - 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index f4b9f9af..28bd58c7 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -122,8 +122,9 @@ const buildElasticBody = function (params, options = {}) { const bodyForSearch = function (params) { const itemsOptions = { merge_checkin_card_items: params.merge_checkin_card_items } - const body = Object.assign( - buildElasticBody(params, { items: itemsOptions }), + + const otherBody = Object.assign( + buildElasticBody(params, { items: itemsOptions, test: true }), { _source: { excludes: EXCLUDE_FIELDS.concat(['items']) @@ -131,7 +132,8 @@ const bodyForSearch = function (params) { } ) - return addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) + // return withInnerHits + return otherBody } const buildElasticAggregationsBody = (params, aggregateProps) => { diff --git a/lib/elasticsearch/elastic-query-builder.js b/lib/elasticsearch/elastic-query-builder.js index f65dbd74..03602596 100644 --- a/lib/elasticsearch/elastic-query-builder.js +++ b/lib/elasticsearch/elastic-query-builder.js @@ -42,13 +42,13 @@ class ElasticQueryBuilder { this.buildAllQuery() } - if (options.items) { - this.query.addFilter(innerHits(options.items)) - } - // Add user filters: this.applyFilters() + if (options.items && options.test) { + this.query.addFilter(innerHits(options.items)) + } + // Apply global clauses: // Hide specific nypl-sources when configured to do so: this.applyHiddenNyplSources() diff --git a/lib/elasticsearch/elastic-query-filter-builder.js b/lib/elasticsearch/elastic-query-filter-builder.js index 5149cde8..4eba4ae6 100644 --- a/lib/elasticsearch/elastic-query-filter-builder.js +++ b/lib/elasticsearch/elastic-query-filter-builder.js @@ -191,6 +191,7 @@ const itemsQueryContext = (options) => { } const innerHits = (_options = {}) => { + console.log('innerHits for: ', _options) const options = Object.assign({ size: process.env.SEARCH_ITEMS_SIZE || 200, from: 0, diff --git a/lib/elasticsearch/elastic-query.js b/lib/elasticsearch/elastic-query.js index 82edb0f6..68dfd30f 100644 --- a/lib/elasticsearch/elastic-query.js +++ b/lib/elasticsearch/elastic-query.js @@ -43,7 +43,6 @@ class ElasticQuery { * "query" param in a ES call */ toJson () { - console.log('options: ', this.options) if (!this.musts.length && !this.shoulds.length && !this.filters.length && !this.options.items) { return { match_all: {} From b5530ebe6cb91b1c73b5814ccd6cff393f2b613e Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 16 Jan 2026 14:20:10 -0500 Subject: [PATCH 31/36] Add tests for bodyForSearch; remove tests for addInnerHits --- lib/elasticsearch/elastic-body-builder.js | 32 +--- lib/elasticsearch/elastic-query-builder.js | 2 +- .../elastic-query-filter-builder.js | 1 - test/elastic-body-builder.test.js | 145 ++++++++++++++++++ test/resources.test.js | 132 ---------------- 5 files changed, 154 insertions(+), 158 deletions(-) create mode 100644 test/elastic-body-builder.test.js diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 28bd58c7..9a5f682a 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -3,24 +3,6 @@ const { innerHits, itemsQueryContext, itemsFilterContext } = require('./elastic- const ApiRequest = require('../api-request') const ElasticQueryBuilder = require('../elasticsearch/elastic-query-builder') -/** - * Given a ES search body, returns same object modified to include the - * additional query necessary to limit (and paginate through) items - * - * @param {object} body - An ES query object (suitable for POSTing to ES - * @param {object} options - An object optionally defining `size` and `from` - * for limiting and paginating through items - */ -const addInnerHits = (body, _options = {}) => { - const wrappedItemsQuery = innerHits(_options) - - const placeToAddFilter = body.query.bool - - placeToAddFilter.filter.push(wrappedItemsQuery) - - return body -} - const bodyForFindByUri = async function (recapBarcodesByStatus, params) { const paramsIncludesItemLevelFiltering = Object.keys(params) .filter((param) => param.startsWith('item_')).length > 0 @@ -112,9 +94,12 @@ const buildElasticBody = function (params, options = {}) { direction = params.sort_direction || SORT_FIELDS[params.sort].initialDirection } + const from = params.per_page && params.page ? { from: params.per_page * (params.page - 1) } : {} + const size = params.per_page ? { size: params.per_page } : {} + return { - from: (params.per_page * (params.page - 1)), - size: params.per_page, + ...from, + ...size, query: buildElasticQuery(params, options), sort: [{ [field]: direction }, { uri: 'asc' }] } @@ -123,8 +108,8 @@ const buildElasticBody = function (params, options = {}) { const bodyForSearch = function (params) { const itemsOptions = { merge_checkin_card_items: params.merge_checkin_card_items } - const otherBody = Object.assign( - buildElasticBody(params, { items: itemsOptions, test: true }), + const body = Object.assign( + buildElasticBody(params, { items: itemsOptions }), { _source: { excludes: EXCLUDE_FIELDS.concat(['items']) @@ -133,7 +118,7 @@ const bodyForSearch = function (params) { ) // return withInnerHits - return otherBody + return body } const buildElasticAggregationsBody = (params, aggregateProps) => { @@ -208,7 +193,6 @@ const bodyForAggregation = (params) => { module.exports = { bodyForFindByUri, - addInnerHits, itemsFilterContext, itemsQueryContext, buildElasticQuery, diff --git a/lib/elasticsearch/elastic-query-builder.js b/lib/elasticsearch/elastic-query-builder.js index 03602596..de87a263 100644 --- a/lib/elasticsearch/elastic-query-builder.js +++ b/lib/elasticsearch/elastic-query-builder.js @@ -45,7 +45,7 @@ class ElasticQueryBuilder { // Add user filters: this.applyFilters() - if (options.items && options.test) { + if (options.items) { this.query.addFilter(innerHits(options.items)) } diff --git a/lib/elasticsearch/elastic-query-filter-builder.js b/lib/elasticsearch/elastic-query-filter-builder.js index 4eba4ae6..5149cde8 100644 --- a/lib/elasticsearch/elastic-query-filter-builder.js +++ b/lib/elasticsearch/elastic-query-filter-builder.js @@ -191,7 +191,6 @@ const itemsQueryContext = (options) => { } const innerHits = (_options = {}) => { - console.log('innerHits for: ', _options) const options = Object.assign({ size: process.env.SEARCH_ITEMS_SIZE || 200, from: 0, diff --git a/test/elastic-body-builder.test.js b/test/elastic-body-builder.test.js new file mode 100644 index 00000000..97986858 --- /dev/null +++ b/test/elastic-body-builder.test.js @@ -0,0 +1,145 @@ +const { expect } = require('chai') + +const { bodyForSearch, bodyForFindByUri } = require('../lib/elasticsearch/elastic-body-builder') + +describe('bodyForSearch', function () { + it('excludes checkin cards by default', function () { + expect(bodyForSearch({ sort: 'relevance' })) + .to.deep.equal( + { + query: { + bool: { + filter: [ + { + bool: { + should: [ + { + nested: { + path: 'items', + query: { + bool: { + must_not: [ + { + term: { + 'items.type': 'nypl:CheckinCardItem' + } + } + ] + } + }, + inner_hits: { + sort: [ + { + 'items.enumerationChronology_sort': 'desc' + } + ], + size: '3', + from: 0, + name: 'items' + } + } + }, + { + match_all: {} + } + ] + } + } + ] + } + }, + sort: [ + { + _score: 'desc' + }, + { + uri: 'asc' + } + ], + _source: { + excludes: [ + 'uris', + '*_packed', + '*_sort', + 'items.*_packed', + 'contentsTitle', + 'suppressed', + '*WithoutDates', + '*Normalized', + 'items' + ] + } + } + ) + }) + + it('includes checkin cards when present in params', function () { + expect(bodyForSearch({ sort: 'relevance', merge_checkin_card_items: true })) + .to.deep.equal( + { + query: { + bool: { + filter: [ + { + bool: { + should: [ + { + nested: { + path: 'items', + query: { + bool: { + must: { + match_all: {} + } + } + }, + inner_hits: { + sort: [ + { + 'items.enumerationChronology_sort': 'desc' + } + ], + size: '3', + from: 0, + name: 'items' + } + } + }, + { + match_all: {} + } + ] + } + } + ] + } + }, + sort: [ + { + _score: 'desc' + }, + { + uri: 'asc' + } + ], + _source: { + excludes: [ + 'uris', + '*_packed', + '*_sort', + 'items.*_packed', + 'contentsTitle', + 'suppressed', + '*WithoutDates', + '*Normalized', + 'items' + ] + } + } + ) + }) +}) + +describe('bodyForFindByUri', async function () { + bodyForFindByUri() +}) diff --git a/test/resources.test.js b/test/resources.test.js index c91b5d37..98220b26 100644 --- a/test/resources.test.js +++ b/test/resources.test.js @@ -723,138 +723,6 @@ describe('Resources query', function () { }) }) - describe('addInnerHits', () => { - it('should include query for items', () => { - expect(resourcesPrivMethods.addInnerHits({ query: { bool: { filter: [] } } }, { size: 1, from: 2 })) - .to.deep.equal({ - query: { - bool: { - filter: [ - { - bool: { - should: [ - { - nested: { - path: 'items', - query: { - bool: { - must: { - match_all: {} - } - } - }, - inner_hits: { - sort: [{ 'items.enumerationChronology_sort': 'desc' }], - size: 1, - from: 2, - name: 'items' - } - } - }, - { match_all: {} } - ] - } - } - ] - } - } - }) - }) - - it('should exclude check in card items if explicitly set', () => { - expect(resourcesPrivMethods.addInnerHits({ query: { bool: { filter: [] } } }, { size: 1, from: 2, merge_checkin_card_items: false })) - .to.deep.equal({ - query: { - bool: { - filter: [ - { - bool: { - should: [ - { - nested: { - path: 'items', - query: { - bool: { - must_not: [ - { term: { 'items.type': 'nypl:CheckinCardItem' } } - ] - } - }, - inner_hits: { - sort: [{ 'items.enumerationChronology_sort': 'desc' }], - size: 1, - from: 2, - name: 'items' - } - } - }, - { match_all: {} } - ] - } - } - ] - } - } - }) - }) - - it('should include filters for items', () => { - expect(resourcesPrivMethods.addInnerHits( - { query: { bool: { filter: [] } } }, - { size: 1, from: 2, query: { volume: [1, 2], location: ['SASB', 'LPA'], other: 'filter' } } - )).to.deep.equal({ - query: { - bool: { - filter: [ - { - bool: { - should: [ - { - nested: { - path: 'items', - query: { - bool: { - must: { - match_all: {} - }, - filter: [ - { range: { 'items.volumeRange': { gte: 1, lte: 2 } } }, - { terms: { 'items.holdingLocation.id': ['SASB', 'LPA'] } } - ] - } - }, - inner_hits: { - sort: [{ 'items.enumerationChronology_sort': 'desc' }], - size: 1, - from: 2, - name: 'items' - } - } - }, - { match_all: {} }, - { - nested: { - inner_hits: { name: 'allItems' }, - path: 'items', - query: { - bool: { - must_not: [ - { exists: { field: 'items.electronicLocator' } } - ] - } - } - } - } - ] - } - } - ] - } - } - }) - }) - }) - describe('search exception handling', () => { describe('lexical error', () => { before(() => { From 6cfbd49e9ac36c87aae2d440cef660dfa2e32399 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 16 Jan 2026 14:20:50 -0500 Subject: [PATCH 32/36] Remove remaining references to addInnerHits --- lib/resources.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index 062a0a28..87a41b45 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -29,7 +29,6 @@ const { const { bodyForFindByUri, - addInnerHits, itemsFilterContext, itemsQueryContext, buildElasticQuery, @@ -249,7 +248,6 @@ module.exports = function (app, _private = null) { _private.esRangeValue = esRangeValue _private.itemsFilterContext = itemsFilterContext _private.itemsQueryContext = itemsQueryContext - _private.addInnerHits = addInnerHits _private.aggregationQueriesForParams = aggregationQueriesForParams _private.mergeAggregationsResponses = mergeAggregationsResponses } From 1c769c9cddef35a85f02e8ac48f3658677eac8d5 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 16 Jan 2026 15:36:19 -0500 Subject: [PATCH 33/36] Add bodybuilder tests --- lib/elasticsearch/elastic-body-builder.js | 5 +- test/elastic-body-builder.test.js | 235 +++++++++++++++++++++- 2 files changed, 235 insertions(+), 5 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 9a5f682a..110917b1 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -3,7 +3,7 @@ const { innerHits, itemsQueryContext, itemsFilterContext } = require('./elastic- const ApiRequest = require('../api-request') const ElasticQueryBuilder = require('../elasticsearch/elastic-query-builder') -const bodyForFindByUri = async function (recapBarcodesByStatus, params) { +const bodyForFindByUri = function (recapBarcodesByStatus, params) { const paramsIncludesItemLevelFiltering = Object.keys(params) .filter((param) => param.startsWith('item_')).length > 0 @@ -30,7 +30,7 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] } - const filter = returnAllItems ? {} : { filter: [] } + // const filter = returnAllItems ? {} : { filter: [] } const queryFilter = { filter: !returnAllItems ? [innerHits(itemsOptions)] : [] } @@ -52,7 +52,6 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { ...queryFilter } }, - ...filter, ...aggregations } diff --git a/test/elastic-body-builder.test.js b/test/elastic-body-builder.test.js index 97986858..ccec82dd 100644 --- a/test/elastic-body-builder.test.js +++ b/test/elastic-body-builder.test.js @@ -140,6 +140,237 @@ describe('bodyForSearch', function () { }) }) -describe('bodyForFindByUri', async function () { - bodyForFindByUri() +describe('bodyForFindByUri', function () { + it('queries for uri', function () { + const expected = { + _source: { + excludes: [ + 'uris', + '*_packed', + '*_sort', + 'items.*_packed', + 'contentsTitle', + 'suppressed', + '*WithoutDates', + '*Normalized', + 'items' + ] + }, + size: 1, + query: { + bool: { + must: [ + { + term: { + uri: 'b15781267' + } + } + ], + filter: [ + { + bool: { + should: [ + { + nested: { + path: 'items', + query: { + bool: { + must: { + match_all: {} + } + } + }, + inner_hits: { + sort: [ + { + 'items.enumerationChronology_sort': 'desc' + } + ], + size: 100, + from: 0, + name: 'items' + } + } + }, + { + match_all: {} + } + ] + } + } + ] + } + }, + aggregations: { + item_location: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.holdingLocation_packed' + } + } + } + }, + item_status: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.status_packed' + } + } + } + }, + item_format: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.formatLiteral' + } + } + } + } + } + } + + const params = { + all_items: false, + uri: 'b15781267', + items_size: 100, + items_from: 0, + merge_checkin_card_items: true, + include_item_aggregations: true + } + const barcodes = {} + expect(bodyForFindByUri(barcodes, params)) + .to.deep.equal(expected) + }) + + it('accepts item params', function () { + const barcodes = { 'Not Available': ['1234'] } + const params = { + all_items: false, + uri: 'b15781267', + items_size: 10, + items_from: 10, + merge_checkin_card_items: true, + include_item_aggregations: true + } + + const expected = { + _source: { + excludes: [ + 'uris', + '*_packed', + '*_sort', + 'items.*_packed', + 'contentsTitle', + 'suppressed', + '*WithoutDates', + '*Normalized', + 'items' + ] + }, + size: 1, + query: { + bool: { + must: [ + { + term: { + uri: 'b15781267' + } + } + ], + filter: [ + { + bool: { + should: [ + { + nested: { + path: 'items', + query: { + bool: { + must: { + match_all: {} + } + } + }, + inner_hits: { + sort: [ + { + 'items.enumerationChronology_sort': 'desc' + } + ], + size: 10, + from: 10, + name: 'items' + } + } + }, + { + match_all: {} + } + ] + } + } + ] + } + }, + aggregations: { + item_location: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.holdingLocation_packed' + } + } + } + }, + item_status: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.status_packed' + } + } + } + }, + item_format: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.formatLiteral' + } + } + } + } + } + } + + expect(bodyForFindByUri(barcodes, params)) + .to.deep.equal(expected) + }) }) From a5e190e1bb8be71376d3f2feb0452cf997ab02f3 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 23 Jan 2026 11:56:29 -0500 Subject: [PATCH 34/36] Remove options from elastic-query --- lib/elasticsearch/elastic-query-builder.js | 2 +- lib/elasticsearch/elastic-query.js | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/elasticsearch/elastic-query-builder.js b/lib/elasticsearch/elastic-query-builder.js index de87a263..cc87333b 100644 --- a/lib/elasticsearch/elastic-query-builder.js +++ b/lib/elasticsearch/elastic-query-builder.js @@ -14,7 +14,7 @@ const POPULARITY_BOOSTS = [ class ElasticQueryBuilder { constructor (apiRequest, options = {}) { this.request = apiRequest - this.query = new ElasticQuery(options) + this.query = new ElasticQuery() // Break on search_scope: switch (this.request.params.search_scope) { diff --git a/lib/elasticsearch/elastic-query.js b/lib/elasticsearch/elastic-query.js index 68dfd30f..aa2d2f1e 100644 --- a/lib/elasticsearch/elastic-query.js +++ b/lib/elasticsearch/elastic-query.js @@ -7,11 +7,10 @@ **/ class ElasticQuery { - constructor (options = {}) { + constructor () { this.musts = [] this.shoulds = [] this.filters = [] - this.options = options } addMust (clause) { @@ -43,7 +42,7 @@ class ElasticQuery { * "query" param in a ES call */ toJson () { - if (!this.musts.length && !this.shoulds.length && !this.filters.length && !this.options.items) { + if (!this.musts.length && !this.shoulds.length && !this.filters.length) { return { match_all: {} } @@ -57,7 +56,7 @@ class ElasticQuery { if (this.shoulds.length) { result.bool.should = this.shoulds } - if (this.filters.length || this.options.items) { + if (this.filters.length) { result.bool.filter = this.filters } From 59901659d28e5b19ee764364a79516a7c02f53f9 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Feb 2026 13:23:39 -0500 Subject: [PATCH 35/36] Remove irrelevant code --- lib/elasticsearch/elastic-body-builder.js | 4 -- lib/resources.js | 20 +++----- lib/utils/resource-helpers.js | 6 +-- test/resources.test.js | 60 +++++++++++++---------- 4 files changed, 42 insertions(+), 48 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 110917b1..aa5c45a9 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -30,11 +30,8 @@ const bodyForFindByUri = function (recapBarcodesByStatus, params) { unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] } - // const filter = returnAllItems ? {} : { filter: [] } - const queryFilter = { filter: !returnAllItems ? [innerHits(itemsOptions)] : [] } - // Establish base query: const body = { _source: { excludes @@ -116,7 +113,6 @@ const bodyForSearch = function (params) { } ) - // return withInnerHits return body } diff --git a/lib/resources.js b/lib/resources.js index fb8c1694..36b53f43 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -30,10 +30,6 @@ const { const { bodyForFindByUri, - itemsFilterContext, - itemsQueryContext, - buildElasticQuery, - buildElasticBody, bodyForSearch, aggregationQueriesForParams, bodyForAggregation @@ -64,7 +60,7 @@ module.exports = function (app, _private = null) { }) // Validate uri: - await nyplSourceAndId(params) + await nyplSourceAndId(params.uri) // If we need to return itemAggregations or filter on item_status, // then we need to pre-retrieve SCSB item statuses to incorporate them into @@ -82,7 +78,7 @@ module.exports = function (app, _private = null) { } } - const body = await bodyForFindByUri(recapBarcodesByStatus, params) + const body = bodyForFindByUri(recapBarcodesByStatus, params) app.logger.debug('Resources#findByUri', body) let resp = await app.esClient.search(body) // Mindfully throw errors for known issues: @@ -107,7 +103,7 @@ module.exports = function (app, _private = null) { app.resources.annotatedMarc = async function (params, opts) { // Convert discovery id to nyplSource and un-prefixed id: - const { id, nyplSource } = await nyplSourceAndId(params) + const { id, nyplSource } = await nyplSourceAndId(params.uri) app.logger.debug('Resources#annotatedMarc', { id, nyplSource }) @@ -117,13 +113,13 @@ module.exports = function (app, _private = null) { throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`) } - return await AnnotatedMarcSerializer.serialize(resp.data) + return AnnotatedMarcSerializer.serialize(resp.data) } // Get a single raw marc: app.resources.marc = async function (params, opts) { // Convert discovery id to nyplSource and un-prefixed id: - const { id, nyplSource } = await nyplSourceAndId(params) + const { id, nyplSource } = await nyplSourceAndId(params.uri) app.logger.debug('Resources#annotatedMarc', { id, nyplSource }) @@ -133,7 +129,7 @@ module.exports = function (app, _private = null) { throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`) } - return await MarcSerializer.serialize(resp.data) + return MarcSerializer.serialize(resp.data) } // Get deliveryLocations for given resource(s) @@ -259,12 +255,8 @@ module.exports = function (app, _private = null) { // For unit testing, export private methods if second arg given: if (_private && typeof _private === 'object') { - _private.buildElasticBody = buildElasticBody - _private.buildElasticQuery = buildElasticQuery _private.parseSearchParams = parseSearchParams _private.esRangeValue = esRangeValue - _private.itemsFilterContext = itemsFilterContext - _private.itemsQueryContext = itemsQueryContext _private.aggregationQueriesForParams = aggregationQueriesForParams _private.mergeAggregationsResponses = mergeAggregationsResponses } diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js index 9ba7ea9d..50b6c9c5 100644 --- a/lib/utils/resource-helpers.js +++ b/lib/utils/resource-helpers.js @@ -60,11 +60,11 @@ const parseSearchParams = function (params, overrideParams = {}) { }) } -const nyplSourceAndId = async function (params) { +const nyplSourceAndId = async function (uri) { const nyplSourceMapper = await NyplSourceMapper.instance() - const { id, nyplSource } = nyplSourceMapper.splitIdentifier(params.uri) ?? {} + const { id, nyplSource } = nyplSourceMapper.splitIdentifier(uri) ?? {} if (!id || !nyplSource) { - throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`) + throw new errors.InvalidParameterError(`Invalid bnum: ${uri}`) } return { id, nyplSource } } diff --git a/test/resources.test.js b/test/resources.test.js index 98220b26..ebc3c313 100644 --- a/test/resources.test.js +++ b/test/resources.test.js @@ -5,6 +5,12 @@ const scsbClient = require('../lib/scsb-client') const errors = require('../lib/errors') const { AGGREGATIONS_SPEC } = require('../lib/elasticsearch/config') const numAggregations = Object.keys(AGGREGATIONS_SPEC).length +const { + itemsFilterContext, + itemsQueryContext, + buildElasticQuery, + buildElasticBody +} = require('../lib/elasticsearch/elastic-body-builder') const fixtures = require('./fixtures') @@ -72,7 +78,7 @@ describe('Resources query', function () { describe('buildElasticQuery', function () { it('uses "query string query" if subjectLiteral: used', function () { const params = resourcesPrivMethods.parseSearchParams({ q: 'subjectLiteral:potatoes' }) - const body = resourcesPrivMethods.buildElasticQuery(params) + const body = buildElasticQuery(params) expect(body).to.be.a('object') expect(body.bool).to.be.a('object') expect(body.bool.must).to.be.a('array') @@ -83,7 +89,7 @@ describe('Resources query', function () { it('uses "query string query" if subjectLiteral: quoted phrase used', function () { const params = resourcesPrivMethods.parseSearchParams({ q: 'subjectLiteral:"hot potatoes"' }) - const body = resourcesPrivMethods.buildElasticQuery(params) + const body = buildElasticQuery(params) expect(body).to.be.a('object') expect(body.bool).to.be.a('object') expect(body.bool.must).to.be.a('array') @@ -94,7 +100,7 @@ describe('Resources query', function () { it('escapes colon if field not recognized', function () { const params = resourcesPrivMethods.parseSearchParams({ q: 'fladeedle:"hot potatoes"' }) - const body = resourcesPrivMethods.buildElasticQuery(params) + const body = buildElasticQuery(params) expect(body).to.be.a('object') expect(body.bool).to.be.a('object') expect(body.bool.must).to.be.a('array') @@ -105,7 +111,7 @@ describe('Resources query', function () { it('uses "query string query" if plain keyword query used', function () { const params = resourcesPrivMethods.parseSearchParams({ q: 'potatoes' }) - const body = resourcesPrivMethods.buildElasticQuery(params) + const body = buildElasticQuery(params) expect(body).to.be.a('object') expect(body.bool).to.be.a('object') expect(body.bool.must).to.be.a('array') @@ -116,7 +122,7 @@ describe('Resources query', function () { it('accepts advanced search parameters', function () { const params = resourcesPrivMethods.parseSearchParams({ contributor: 'Poe', title: 'Raven', subject: 'ravens' }) - const body = resourcesPrivMethods.buildElasticQuery(params) + const body = buildElasticQuery(params) expect(body).to.nested.include({ // Expect a title match on Raven: @@ -135,7 +141,7 @@ describe('Resources query', function () { describe('buildElasticBody', function () { it('uses subjectLiteral.raw when given a subjectLiteral filter', function () { const params = resourcesPrivMethods.parseSearchParams({ q: '', filters: { subjectLiteral: 'United States -- History' } }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.be.a('object') expect(body.query).to.be.a('object') expect(body.query.bool).to.be.a('object') @@ -151,7 +157,7 @@ describe('Resources query', function () { expect(process.env.HIDE_NYPL_SOURCE).to.be.a('undefined') const params = resourcesPrivMethods.parseSearchParams({ q: '' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.be.a('object') expect(body.query.filter).to.be.a('undefined') @@ -161,7 +167,7 @@ describe('Resources query', function () { process.env.HIDE_NYPL_SOURCE = 'recap-hl' const params = resourcesPrivMethods.parseSearchParams({ q: '' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) // Expect query to resemble: {"from":0,"size":50,"query":{"bool":{"filter":[{"bool":{"must_not":{"terms":{"nyplSource":["recap-hl"]}}}}]}},"sort":["uri"]} expect(body).to.be.a('object') @@ -173,26 +179,26 @@ describe('Resources query', function () { it('processes isbn correctly', () => { const params = resourcesPrivMethods.parseSearchParams({ isbn: '0689844921' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.nested .include({ 'query.bool.must[0].term.idIsbn\\.clean': '0689844921' }) }) it('processes issn correctly', () => { const params = resourcesPrivMethods.parseSearchParams({ issn: '1234-5678' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.nested.include({ 'query.bool.must[0].term.idIssn\\.clean': '1234-5678' }) }) it('processes lccn correctly', () => { const params = resourcesPrivMethods.parseSearchParams({ lccn: '00068799' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.nested.include({ 'query.bool.must[0].regexp.idLccn.value': '[^\\d]*00068799[^\\d]*' }) }) it('processes oclc correctly', () => { const params = resourcesPrivMethods.parseSearchParams({ oclc: '1033548057' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.nested.include({ 'query.bool.must[0].term.idOclc': '1033548057' }) }) @@ -205,9 +211,9 @@ describe('Resources query', function () { }) const paramsSnapshot = JSON.stringify(params) - resourcesPrivMethods.buildElasticBody(params) - resourcesPrivMethods.buildElasticBody(params) - resourcesPrivMethods.buildElasticBody(params) + buildElasticBody(params) + buildElasticBody(params) + buildElasticBody(params) expect(JSON.stringify(params)).to.equal(paramsSnapshot) }) @@ -648,40 +654,40 @@ describe('Resources query', function () { describe('itemsFilterContext', () => { it('should return an empty object in case of no query', () => { - expect(resourcesPrivMethods.itemsFilterContext({})).to.deep.equal({}) + expect(itemsFilterContext({})).to.deep.equal({}) }) it('should return an empty object in case there are no filters', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: {} })).to.deep.equal({}) + expect(itemsFilterContext({ query: {} })).to.deep.equal({}) }) it('should return filters for volume in case there is a volume', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { volume: [1, 2] } })) + expect(itemsFilterContext({ query: { volume: [1, 2] } })) .to.deep.equal({ filter: [{ range: { 'items.volumeRange': { gte: 1, lte: 2 } } }] }) }) it('should return filters for date in case there is a date', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { date: [1, 2] } })) + expect(itemsFilterContext({ query: { date: [1, 2] } })) .to.deep.equal({ filter: [{ range: { 'items.dateRange': { gte: 1, lte: 2 } } }] }) }) it('should return filters for format in case there is a format', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { format: ['text', 'microfilm', 'AV'] } })) + expect(itemsFilterContext({ query: { format: ['text', 'microfilm', 'AV'] } })) .to.deep.equal({ filter: [{ terms: { 'items.formatLiteral': ['text', 'microfilm', 'AV'] } }] }) }) it('should return filters for location in case there is a location', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { location: ['SASB', 'LPA', 'Schomburg'] } })) + expect(itemsFilterContext({ query: { location: ['SASB', 'LPA', 'Schomburg'] } })) .to.deep.equal({ filter: [{ terms: { 'items.holdingLocation.id': ['SASB', 'LPA', 'Schomburg'] } }] }) }) it('should return filters for status in case there is a status', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { status: ['Available', 'Unavailable', 'In Process'] } })) + expect(itemsFilterContext({ query: { status: ['Available', 'Unavailable', 'In Process'] } })) .to.deep.equal({ filter: [{ terms: { 'items.status.id': ['Available', 'Unavailable', 'In Process'] } }] }) }) it('should combine all filters in case of multiple filters', () => { - expect(resourcesPrivMethods.itemsFilterContext({ + expect(itemsFilterContext({ query: { volume: [1, 2], date: [3, 4], @@ -701,24 +707,24 @@ describe('Resources query', function () { }) it('should ignore all other parameters', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { location: ['SASB', 'LPA', 'Schomburg'] }, something: 'else' })) + expect(itemsFilterContext({ query: { location: ['SASB', 'LPA', 'Schomburg'] }, something: 'else' })) .to.deep.equal({ filter: [{ terms: { 'items.holdingLocation.id': ['SASB', 'LPA', 'Schomburg'] } }] }) }) }) describe('itemsQueryContext', () => { it('should exclude check in card items when options.merge_checkin_card_items is not set', () => { - expect(resourcesPrivMethods.itemsQueryContext({})) + expect(itemsQueryContext({})) .to.deep.equal({ must_not: [{ term: { 'items.type': 'nypl:CheckinCardItem' } }] }) }) it('should exclude check in card items when merge_checkin_card_items is falsey', () => { - expect(resourcesPrivMethods.itemsQueryContext({ merge_checkin_card_items: false })) + expect(itemsQueryContext({ merge_checkin_card_items: false })) .to.deep.equal({ must_not: [{ term: { 'items.type': 'nypl:CheckinCardItem' } }] }) }) it('should use match_all for items when merge_checkin_card_items is truthy', () => { - expect(resourcesPrivMethods.itemsQueryContext({ merge_checkin_card_items: true })) + expect(itemsQueryContext({ merge_checkin_card_items: true })) .to.deep.equal({ must: { match_all: {} } }) }) }) From 020c75db0f295950b79e8f6b61c0be62fdc0f80e Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Feb 2026 14:25:18 -0500 Subject: [PATCH 36/36] Remove spurious merge code --- lib/resources.js | 69 ------------------------------------------------ 1 file changed, 69 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index 0a055238..36b53f43 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -37,75 +37,6 @@ const { const RESOURCES_INDEX = process.env.RESOURCES_INDEX -<<<<<<< scc-5050-2 -======= -const ITEM_FILTER_AGGREGATIONS = { - item_location: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.holdingLocation_packed' } } } }, - item_status: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.status_packed' } } } }, - item_format: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.formatLiteral' } } } } -} - -// Configure sort fields: -const SORT_FIELDS = { - title: { - initialDirection: 'asc', - field: 'title_sort' - }, - date: { - initialDirection: 'desc', - field: 'dateStartYear' - }, - creator: { - initialDirection: 'asc', - field: 'creator_sort' - }, - relevance: {} -} - -// The following fields can be excluded from ES responses because we don't pass them to client: -const EXCLUDE_FIELDS = [ - 'uris', - '*_packed', - '*_sort', - 'items.*_packed', - 'contentsTitle', - 'suppressed', - // Hide contributor and creator transformed fields: - '*WithoutDates', - '*Normalized' -] - -// Configure controller-wide parameter parsing: -const parseSearchParams = function (params, overrideParams = {}) { - return parseParams(params, { - q: { type: 'string' }, - page: { type: 'int', default: 1 }, - per_page: { type: 'int', default: 50, range: [0, 100] }, - field: { type: 'string', range: Object.keys(AGGREGATIONS_SPEC) }, - sort: { type: 'string', range: Object.keys(SORT_FIELDS), default: 'relevance' }, - sort_direction: { type: 'string', range: ['asc', 'desc'] }, - search_scope: { type: 'string', range: Object.keys(SEARCH_SCOPES), default: 'all' }, - filters: { type: 'hash', fields: FILTER_CONFIG }, - items_size: { type: 'int', default: 100, range: [0, 200] }, - items_from: { type: 'int', default: 0 }, - callnumber: { type: 'string' }, - standard_number: { type: 'string' }, - contributor: { type: 'string' }, - title: { type: 'string' }, - subject: { type: 'string' }, - subject_prefix: { type: 'string' }, - isbn: { type: 'string' }, - issn: { type: 'string' }, - lccn: { type: 'string' }, - oclc: { type: 'string' }, - role: { type: 'string' }, - merge_checkin_card_items: { type: 'boolean', default: true }, - include_item_aggregations: { type: 'boolean', default: true }, - ...overrideParams - }) -} - ->>>>>>> main // These are the handlers made available to the router: module.exports = function (app, _private = null) { app.resources = {}