From 76857228e3555780060804f1105b7910f718ea9b Mon Sep 17 00:00:00 2001 From: Paul Beaudoin Date: Thu, 10 Jul 2025 12:40:17 -0400 Subject: [PATCH 01/82] Override agg-self-filtering behavior for open search Trying out overriding the policy of _not_ applying namesake filter to aggregations when there is no other search criteria https://newyorkpubliclibrary.atlassian.net/browse/SCC-4789 --- lib/resources.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/resources.js b/lib/resources.js index faf71c79..80682dde 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -697,6 +697,13 @@ module.exports = function (app, _private = null) { // - one agg representing the counts for all properties _not_ used in filter // - one agg each for each property that is used in a filter, but counts should exclude that filter + // If the search query consists of only a single filter (or a couple + // filters of same type), just do a normal self-filtering aggregation + // for all properties: + if (!params.q && Object.keys(params.filters).length === 1) { + return [buildElasticAggregationsBody(params, Object.keys(AGGREGATIONS_SPEC))] + } + // Build the standard aggregation: const unfilteredAggregationProps = Object.keys(AGGREGATIONS_SPEC) // Aggregate on all properties that aren't involved in filters: From 579eec58cbb9cc84e3316cd5cacd3a15c585596f Mon Sep 17 00:00:00 2001 From: danamansana Date: Mon, 1 Dec 2025 11:15:02 -0500 Subject: [PATCH 02/82] Fix query structure --- lib/elasticsearch/config.js | 3 +- lib/elasticsearch/cql_grammar.js | 15 +++++ lib/elasticsearch/cql_query_builder.js | 90 ++++++++++++++++++++++++++ lib/resources.js | 15 ++++- package-lock.json | 10 +++ package.json | 1 + 6 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 lib/elasticsearch/cql_grammar.js create mode 100644 lib/elasticsearch/cql_query_builder.js diff --git a/lib/elasticsearch/config.js b/lib/elasticsearch/config.js index b63c3545..494dbb15 100644 --- a/lib/elasticsearch/config.js +++ b/lib/elasticsearch/config.js @@ -73,7 +73,8 @@ const SEARCH_SCOPES = { }, standard_number: { // We do custom field matching for this search-scope - } + }, + cql: {} } const FILTER_CONFIG = { diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js new file mode 100644 index 00000000..24cfb489 --- /dev/null +++ b/lib/elasticsearch/cql_grammar.js @@ -0,0 +1,15 @@ +const { Grammars } = require('ebnf') + +const cql = ` + query ::= sub_query " " connective " " query | sub_query + connective ::= "and" | "or" + sub_query ::= atomic_query | "(" query ")" + atomic_query ::= scope " " relation " " key | key + scope ::= "title" | "contributor" | "keyword" | "callNumber" | "identifier" | "subject" | "language" | "date" | "center" | "format" + relation ::= "any" | "adj" | "=" | "==" | "within" | "encloses" + key ::= [a-z]* | '"' key '"' +` + +let cqlParser = new Grammars.W3C.Parser(cql) + +module.exports = { cqlParser } diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js new file mode 100644 index 00000000..8ea7498e --- /dev/null +++ b/lib/elasticsearch/cql_query_builder.js @@ -0,0 +1,90 @@ +const { cqlParser } = require('./cql_grammar') +const ElasticQueryBuilder = require('./elastic-query-builder') +const ApiRequest = require('../api-request') + +function buildEsQuery (cqlQuery) { + const tree = cqlParser.getAST(cqlQuery) + console.log('tree: ', tree) + return buildEsQueryFromTree(tree) +} + +/** + this is mostly there but needs to handle exact strings + */ + +function buildEsQueryFromTree (tree) { + switch (tree.type) { + case 'query': + if (tree.children.length > 1) { + return buildBoolean( + buildEsQueryFromTree(tree.children[0]), + tree.children[1].text, + buildEsQueryFromTree(tree.children[2]) + ) + } else { + return buildEsQueryFromTree(tree.children[0]) + } + break + case 'sub_query': + return buildEsQueryFromTree(tree.children.length > 1 ? tree.children[1] : tree.children[0]) + break + case 'atomic_query': + let scope + let relation + let term + if (tree.children.length > 1) { + scope = tree.children[0].text + relation = tree.children[1].text + term = tree.children[2].text + } else { + scope = "all" + relation = "any" + term = tree.children[0].text + } + return buildAtomic(scope, relation, term) + break + default: + break + } +} + +function buildBoolean (queryOne, operator, queryTwo) { + console.log('building boolean ', queryOne, operator, queryTwo) + const esOperator = operator === 'and' ? 'must' : 'should' + return { + "bool": { + [esOperator]: [ + queryOne, + queryTwo + ] + } + } +} + +function buildAtomic (scope, relation, term) { + console.log('building atomic: ', scope, relation, term) + const request = ApiRequest.fromParams({ + q: term, + search_scope: scope + }) + const builder = ElasticQueryBuilder.forApiRequest(request) + // return { + // query: builder.query.toJson() + // } + return builder.query.toJson() + // return { + // "query": { + // "multi_match" : { + // "query": term, + // "fields": [ "subject", "message" ] + // } + // } + // } +} + +module.exports = { + buildEsQuery, + buildEsQueryFromTree, + buildBoolean, + buildAtomic +} diff --git a/lib/resources.js b/lib/resources.js index 57532f96..134abdad 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -19,6 +19,7 @@ const { parseParams, deepValue } = require('../lib/util') const ApiRequest = require('./api-request') const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') +const cqlQueryBuilder = require('./elasticsearch/cql_query_builder') const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC } = require('./elasticsearch/config') const errors = require('./errors') @@ -620,12 +621,19 @@ module.exports = function (app, _private = null) { let body = buildElasticBody(params) + console.log('body: ', body) + // Strip unnecessary _source fields body._source = { excludes: EXCLUDE_FIELDS.concat(['items']) } - body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) + if (params.search_scope !== 'cql') { + body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) + } + + + console.log('body after inner: ', body) app.logger.debug('Resources#search', RESOURCES_INDEX, body) @@ -860,6 +868,11 @@ const buildElasticBody = function (params) { * @return {object} ES query object suitable to be POST'd to ES endpoint */ const buildElasticQuery = function (params) { + if (params.search_scope === 'cql') { + query = cqlQueryBuilder.buildEsQuery(params.q) + console.log('built cql query for query: ', params.q, 'query: ', query) + return query + } const request = ApiRequest.fromParams(params) const builder = ElasticQueryBuilder.forApiRequest(request) diff --git a/package-lock.json b/package-lock.json index 792258c5..193bd7d4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15,6 +15,7 @@ "@nypl/nypl-data-api-client": "^2.0.0", "@nypl/scsb-rest-client": "3.0.0", "dotenv": "^16.4.5", + "ebnf": "^1.9.1", "express": "^4.18.3", "research-catalog-indexer": "git+https://github.com/NYPL/research-catalog-indexer.git#a292adff0d3ed594a0c2996561d9e17b0f9dc04a", "winston": "3.12.0" @@ -9666,6 +9667,15 @@ "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", "optional": true }, + "node_modules/ebnf": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ebnf/-/ebnf-1.9.1.tgz", + "integrity": "sha512-uW2UKSsuty9ANJ3YByIQE4ANkD8nqUPO7r6Fwcc1ADKPe9FRdcPpMl3VEput4JSvKBJ4J86npIC2MLP0pYkCuw==", + "license": "MIT", + "bin": { + "ebnf": "dist/bin.js" + } + }, "node_modules/ecc-jsbn": { "version": "0.1.2", "dev": true, diff --git a/package.json b/package.json index 435131f3..7ffe5b80 100644 --- a/package.json +++ b/package.json @@ -8,6 +8,7 @@ "@nypl/nypl-data-api-client": "^2.0.0", "@nypl/scsb-rest-client": "3.0.0", "dotenv": "^16.4.5", + "ebnf": "^1.9.1", "express": "^4.18.3", "research-catalog-indexer": "git+https://github.com/NYPL/research-catalog-indexer.git#a292adff0d3ed594a0c2996561d9e17b0f9dc04a", "winston": "3.12.0" From ae28b7299c952f55a33e958688f43c335ea00139 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 4 Dec 2025 16:00:39 -0500 Subject: [PATCH 03/82] Add more permissive key structure --- lib/elasticsearch/cql_grammar.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 24cfb489..0de4df11 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -7,7 +7,9 @@ const cql = ` atomic_query ::= scope " " relation " " key | key scope ::= "title" | "contributor" | "keyword" | "callNumber" | "identifier" | "subject" | "language" | "date" | "center" | "format" relation ::= "any" | "adj" | "=" | "==" | "within" | "encloses" - key ::= [a-z]* | '"' key '"' + key ::= NON_WS_KEY | '"' KEYPHRASE '"' + KEYPHRASE ::= [^"]+ + NON_WS_KEY ::= [^#x20#x09#x0A#x0D"]+ ` let cqlParser = new Grammars.W3C.Parser(cql) From 8b6000fd8a551be46f4ea074de60f40452021aaa Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 4 Dec 2025 16:03:10 -0500 Subject: [PATCH 04/82] Remove console logs and commented code --- lib/elasticsearch/cql_query_builder.js | 14 -------------- lib/resources.js | 6 ------ 2 files changed, 20 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 8ea7498e..69f8fe01 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -4,7 +4,6 @@ const ApiRequest = require('../api-request') function buildEsQuery (cqlQuery) { const tree = cqlParser.getAST(cqlQuery) - console.log('tree: ', tree) return buildEsQueryFromTree(tree) } @@ -49,7 +48,6 @@ function buildEsQueryFromTree (tree) { } function buildBoolean (queryOne, operator, queryTwo) { - console.log('building boolean ', queryOne, operator, queryTwo) const esOperator = operator === 'and' ? 'must' : 'should' return { "bool": { @@ -62,24 +60,12 @@ function buildBoolean (queryOne, operator, queryTwo) { } function buildAtomic (scope, relation, term) { - console.log('building atomic: ', scope, relation, term) const request = ApiRequest.fromParams({ q: term, search_scope: scope }) const builder = ElasticQueryBuilder.forApiRequest(request) - // return { - // query: builder.query.toJson() - // } return builder.query.toJson() - // return { - // "query": { - // "multi_match" : { - // "query": term, - // "fields": [ "subject", "message" ] - // } - // } - // } } module.exports = { diff --git a/lib/resources.js b/lib/resources.js index 134abdad..9c5e13cc 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -621,8 +621,6 @@ module.exports = function (app, _private = null) { let body = buildElasticBody(params) - console.log('body: ', body) - // Strip unnecessary _source fields body._source = { excludes: EXCLUDE_FIELDS.concat(['items']) @@ -632,9 +630,6 @@ module.exports = function (app, _private = null) { body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) } - - console.log('body after inner: ', body) - app.logger.debug('Resources#search', RESOURCES_INDEX, body) return app.esClient.search(body) @@ -870,7 +865,6 @@ const buildElasticBody = function (params) { const buildElasticQuery = function (params) { if (params.search_scope === 'cql') { query = cqlQueryBuilder.buildEsQuery(params.q) - console.log('built cql query for query: ', params.q, 'query: ', query) return query } const request = ApiRequest.fromParams(params) From e5b61dc79e0a3fef407ea5f3c411297070264465 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 4 Dec 2025 16:40:12 -0500 Subject: [PATCH 05/82] Fix linter errors --- lib/elasticsearch/cql_grammar.js | 4 ++-- lib/elasticsearch/cql_query_builder.js | 12 +++++------- lib/resources.js | 2 +- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 0de4df11..be8cceca 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -1,4 +1,4 @@ -const { Grammars } = require('ebnf') +const { Grammars } = require('ebnf') const cql = ` query ::= sub_query " " connective " " query | sub_query @@ -12,6 +12,6 @@ const cql = ` NON_WS_KEY ::= [^#x20#x09#x0A#x0D"]+ ` -let cqlParser = new Grammars.W3C.Parser(cql) +const cqlParser = new Grammars.W3C.Parser(cql) module.exports = { cqlParser } diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 69f8fe01..a26c7e38 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -23,11 +23,9 @@ function buildEsQueryFromTree (tree) { } else { return buildEsQueryFromTree(tree.children[0]) } - break case 'sub_query': return buildEsQueryFromTree(tree.children.length > 1 ? tree.children[1] : tree.children[0]) - break - case 'atomic_query': + case 'atomic_query': { let scope let relation let term @@ -36,12 +34,12 @@ function buildEsQueryFromTree (tree) { relation = tree.children[1].text term = tree.children[2].text } else { - scope = "all" - relation = "any" + scope = 'all' + relation = 'any' term = tree.children[0].text } return buildAtomic(scope, relation, term) - break + } default: break } @@ -50,7 +48,7 @@ function buildEsQueryFromTree (tree) { function buildBoolean (queryOne, operator, queryTwo) { const esOperator = operator === 'and' ? 'must' : 'should' return { - "bool": { + bool: { [esOperator]: [ queryOne, queryTwo diff --git a/lib/resources.js b/lib/resources.js index 9c5e13cc..aecf9658 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -864,7 +864,7 @@ const buildElasticBody = function (params) { */ const buildElasticQuery = function (params) { if (params.search_scope === 'cql') { - query = cqlQueryBuilder.buildEsQuery(params.q) + const query = cqlQueryBuilder.buildEsQuery(params.q) return query } const request = ApiRequest.fromParams(params) From b85f3fb246578c1e6ab60b319f7d1609d40d751d Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 12:25:28 -0500 Subject: [PATCH 06/82] Move esRangeValue --- lib/elasticsearch/elastic-body-builder.js | 0 lib/resources.js | 27 +-------------------- lib/utils/resource-helpers.js | 29 +++++++++++++++++++++++ 3 files changed, 30 insertions(+), 26 deletions(-) create mode 100644 lib/elasticsearch/elastic-body-builder.js create mode 100644 lib/utils/resource-helpers.js diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js new file mode 100644 index 00000000..e69de29b diff --git a/lib/resources.js b/lib/resources.js index 57532f96..5c51f029 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -22,6 +22,7 @@ const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC } = require('./elasticsearch/config') const errors = require('./errors') +const { esRangeValue } = require('./utils/resource-helpers') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -395,32 +396,6 @@ module.exports = function (app, _private = null) { return body } - /** - * Given a range represented as an array, returns a corresponding ES range object - * - * @param {Array.} range - An array consisting of a single date or a pair of dates - * @returns {object} - */ - const esRangeValue = (range) => { - // the greater-than-equal value will always be the first value in the range array. - // depending on the number of values and their equality, we query using less-than-equal - // the second value, or just less-than the first value plus one - - // Treat case where range start equals range end same as case of single value: - if (range[0] === range[1]) range = range.slice(0, 1) - const rangeQuery = { - gte: range[0] - } - if (range.length === 2) { - // search on both range values - rangeQuery.lte = range[range.length - 1] - } else if (range.length === 1) { - // if there is just one range, query up until the next year - rangeQuery.lt = range[0] + 1 - } - return rangeQuery - } - /** * Given an object containing filters, * returns content of the ES query filter context diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js new file mode 100644 index 00000000..fb535c1b --- /dev/null +++ b/lib/utils/resource-helpers.js @@ -0,0 +1,29 @@ +/** + * Given a range represented as an array, returns a corresponding ES range object + * + * @param {Array.} range - An array consisting of a single date or a pair of dates + * @returns {object} + */ +const esRangeValue = (range) => { + // the greater-than-equal value will always be the first value in the range array. + // depending on the number of values and their equality, we query using less-than-equal + // the second value, or just less-than the first value plus one + + // Treat case where range start equals range end same as case of single value: + if (range[0] === range[1]) range = range.slice(0, 1) + const rangeQuery = { + gte: range[0] + } + if (range.length === 2) { + // search on both range values + rangeQuery.lte = range[range.length - 1] + } else if (range.length === 1) { + // if there is just one range, query up until the next year + rangeQuery.lt = range[0] + 1 + } + return rangeQuery +} + +module.exports = { + esRangeValue +} From a02a058f0f83a2605de2daf47d01a3fdc831dae6 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 12:32:24 -0500 Subject: [PATCH 07/82] Move parseParams --- lib/elasticsearch/config.js | 41 ++++++++++++++++++++- lib/resources.js | 69 +---------------------------------- lib/utils/resource-helpers.js | 35 +++++++++++++++++- 3 files changed, 76 insertions(+), 69 deletions(-) diff --git a/lib/elasticsearch/config.js b/lib/elasticsearch/config.js index b63c3545..18ef399c 100644 --- a/lib/elasticsearch/config.js +++ b/lib/elasticsearch/config.js @@ -129,8 +129,47 @@ const AGGREGATIONS_SPEC = { collection: { terms: { field: 'collectionIds' } } } +const ITEM_FILTER_AGGREGATIONS = { + item_location: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.holdingLocation_packed' } } } }, + item_status: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.status_packed' } } } }, + item_format: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.formatLiteral' } } } } +} + +// Configure sort fields: +const SORT_FIELDS = { + title: { + initialDirection: 'asc', + field: 'title_sort' + }, + date: { + initialDirection: 'desc', + field: 'dateStartYear' + }, + creator: { + initialDirection: 'asc', + field: 'creator_sort' + }, + relevance: {} +} + +// The following fields can be excluded from ES responses because we don't pass them to client: +const EXCLUDE_FIELDS = [ + 'uris', + '*_packed', + '*_sort', + 'items.*_packed', + 'contentsTitle', + 'suppressed', + // Hide contributor and creator transformed fields: + '*WithoutDates', + '*Normalized' +] + module.exports = { SEARCH_SCOPES, FILTER_CONFIG, - AGGREGATIONS_SPEC + AGGREGATIONS_SPEC, + ITEM_FILTER_AGGREGATIONS, + EXCLUDE_FIELDS, + SORT_FIELDS } diff --git a/lib/resources.js b/lib/resources.js index 5c51f029..319b9836 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -19,78 +19,13 @@ const { parseParams, deepValue } = require('../lib/util') const ApiRequest = require('./api-request') const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') -const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC } = require('./elasticsearch/config') +const { AGGREGATIONS_SPEC, ITEM_FILTER_AGGREGATIONS, EXCLUDE_FIELDS, SORT_FIELDS } = require('./elasticsearch/config') const errors = require('./errors') -const { esRangeValue } = require('./utils/resource-helpers') +const { esRangeValue, parseSearchParams } = require('./utils/resource-helpers') const RESOURCES_INDEX = process.env.RESOURCES_INDEX -const ITEM_FILTER_AGGREGATIONS = { - item_location: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.holdingLocation_packed' } } } }, - item_status: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.status_packed' } } } }, - item_format: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.formatLiteral' } } } } -} - -// Configure sort fields: -const SORT_FIELDS = { - title: { - initialDirection: 'asc', - field: 'title_sort' - }, - date: { - initialDirection: 'desc', - field: 'dateStartYear' - }, - creator: { - initialDirection: 'asc', - field: 'creator_sort' - }, - relevance: {} -} - -// The following fields can be excluded from ES responses because we don't pass them to client: -const EXCLUDE_FIELDS = [ - 'uris', - '*_packed', - '*_sort', - 'items.*_packed', - 'contentsTitle', - 'suppressed', - // Hide contributor and creator transformed fields: - '*WithoutDates', - '*Normalized' -] - -// Configure controller-wide parameter parsing: -const parseSearchParams = function (params, overrideParams = {}) { - return parseParams(params, { - q: { type: 'string' }, - page: { type: 'int', default: 1 }, - per_page: { type: 'int', default: 50, range: [0, 100] }, - field: { type: 'string', range: Object.keys(AGGREGATIONS_SPEC) }, - sort: { type: 'string', range: Object.keys(SORT_FIELDS), default: 'relevance' }, - sort_direction: { type: 'string', range: ['asc', 'desc'] }, - search_scope: { type: 'string', range: Object.keys(SEARCH_SCOPES), default: 'all' }, - filters: { type: 'hash', fields: FILTER_CONFIG }, - items_size: { type: 'int', default: 100, range: [0, 200] }, - items_from: { type: 'int', default: 0 }, - callnumber: { type: 'string' }, - standard_number: { type: 'string' }, - contributor: { type: 'string' }, - title: { type: 'string' }, - subject: { type: 'string' }, - subject_prefix: { type: 'string' }, - isbn: { type: 'string' }, - issn: { type: 'string' }, - lccn: { type: 'string' }, - oclc: { type: 'string' }, - merge_checkin_card_items: { type: 'boolean', default: true }, - include_item_aggregations: { type: 'boolean', default: true }, - ...overrideParams - }) -} - // These are the handlers made available to the router: module.exports = function (app, _private = null) { app.resources = {} diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js index fb535c1b..28ae2993 100644 --- a/lib/utils/resource-helpers.js +++ b/lib/utils/resource-helpers.js @@ -1,3 +1,6 @@ +const { parseParams } = require('../util') +const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC, SORT_FIELDS } = require('../elasticsearch/config') + /** * Given a range represented as an array, returns a corresponding ES range object * @@ -24,6 +27,36 @@ const esRangeValue = (range) => { return rangeQuery } +// Configure controller-wide parameter parsing: +const parseSearchParams = function (params, overrideParams = {}) { + return parseParams(params, { + q: { type: 'string' }, + page: { type: 'int', default: 1 }, + per_page: { type: 'int', default: 50, range: [0, 100] }, + field: { type: 'string', range: Object.keys(AGGREGATIONS_SPEC) }, + sort: { type: 'string', range: Object.keys(SORT_FIELDS), default: 'relevance' }, + sort_direction: { type: 'string', range: ['asc', 'desc'] }, + search_scope: { type: 'string', range: Object.keys(SEARCH_SCOPES), default: 'all' }, + filters: { type: 'hash', fields: FILTER_CONFIG }, + items_size: { type: 'int', default: 100, range: [0, 200] }, + items_from: { type: 'int', default: 0 }, + callnumber: { type: 'string' }, + standard_number: { type: 'string' }, + contributor: { type: 'string' }, + title: { type: 'string' }, + subject: { type: 'string' }, + subject_prefix: { type: 'string' }, + isbn: { type: 'string' }, + issn: { type: 'string' }, + lccn: { type: 'string' }, + oclc: { type: 'string' }, + merge_checkin_card_items: { type: 'boolean', default: true }, + include_item_aggregations: { type: 'boolean', default: true }, + ...overrideParams + }) +} + module.exports = { - esRangeValue + esRangeValue, + parseSearchParams } From 0beec4c3a5aa96a447ffcd54a38bafa4ff91b626 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 14:17:15 -0500 Subject: [PATCH 08/82] Refactor nyplSourc/id calculation and move nyplSourceAndId to utils' --- lib/resources.js | 8 ++------ lib/utils/resource-helpers.js | 14 +++++++++++++- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index 319b9836..6c5d1f70 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -22,7 +22,7 @@ const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') const { AGGREGATIONS_SPEC, ITEM_FILTER_AGGREGATIONS, EXCLUDE_FIELDS, SORT_FIELDS } = require('./elasticsearch/config') const errors = require('./errors') -const { esRangeValue, parseSearchParams } = require('./utils/resource-helpers') +const { esRangeValue, parseSearchParams, nyplSourceAndId } = require('./utils/resource-helpers') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -49,11 +49,7 @@ module.exports = function (app, _private = null) { }) // Validate uri: - const nyplSourceMapper = await NyplSourceMapper.instance() - const { id, nyplSource } = nyplSourceMapper.splitIdentifier(params.uri) ?? {} - if (!id || !nyplSource) { - throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`) - } + await nyplSourceAndId(params) // If we need to return itemAggregations or filter on item_status, // then we need to pre-retrieve SCSB item statuses to incorporate them into diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js index 28ae2993..42629299 100644 --- a/lib/utils/resource-helpers.js +++ b/lib/utils/resource-helpers.js @@ -1,5 +1,7 @@ const { parseParams } = require('../util') const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC, SORT_FIELDS } = require('../elasticsearch/config') +const NyplSourceMapper = require('research-catalog-indexer/lib/utils/nypl-source-mapper') +const errors = require('../errors') /** * Given a range represented as an array, returns a corresponding ES range object @@ -56,7 +58,17 @@ const parseSearchParams = function (params, overrideParams = {}) { }) } +const nyplSourceAndId = async function (params) { + const nyplSourceMapper = await NyplSourceMapper.instance() + const { id, nyplSource } = nyplSourceMapper.splitIdentifier(params.uri) ?? {} + if (!id || !nyplSource) { + throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`) + } + return { id, nyplSource } +} + module.exports = { esRangeValue, - parseSearchParams + parseSearchParams, + nyplSourceAndId } From a7498788f3fcc8dcdd12c568c2b47c25043c67e3 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 14:50:50 -0500 Subject: [PATCH 09/82] Add bodybuilder methods for findByUri --- lib/elasticsearch/elastic-body-builder.js | 329 ++++++++++++++++++++++ lib/resources.js | 321 +-------------------- 2 files changed, 334 insertions(+), 316 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index e69de29b..655ca1c9 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -0,0 +1,329 @@ +const { EXCLUDE_FIELDS, ITEM_FILTER_AGGREGATIONS } = require('./config') +const { deepValue } = require('../util') +const { esRangeValue } = require('../utils/resource-helpers') + +/** + * Given a ES search body, returns same object modified to include the + * additional query necessary to limit (and paginate through) items + * + * @param {object} body - An ES query object (suitable for POSTing to ES + * @param {object} options - An object optionally defining `size` and `from` + * for limiting and paginating through items + */ +const addInnerHits = (body, _options = {}) => { + const options = Object.assign({ + size: process.env.SEARCH_ITEMS_SIZE || 200, + from: 0, + merge_checkin_card_items: true + }, _options) + + // Make sure necessary structure exists: + if (!deepValue(body, 'query.bool') && !deepValue(body, 'query.function_score.query.bool')) { + body.query = { bool: {} } + } + + // The place to add the filter depends on the query built to this point: + const placeToAddFilter = (body.query.bool || body.query.function_score.query.bool) + // Initialize filter object if it doesn't already exist: + placeToAddFilter.filter = placeToAddFilter.filter || [] + // If filter object already exists, convert it to array: + if (!Array.isArray(placeToAddFilter.filter)) placeToAddFilter.filter = [placeToAddFilter.filter] + + const itemsQuery = { + bool: Object.assign( + itemsQueryContext(options), + itemsFilterContext(options) + ) + } + + const wrappedItemsQuery = { + bool: { + should: [ + { + nested: { + path: 'items', + query: itemsQuery, + inner_hits: { + sort: [{ 'items.enumerationChronology_sort': 'desc' }], + size: options.size, + from: options.from, + name: 'items' + } + } + }, + // Add a catch-all to ensure we return the bib document even when + // numItems=0 or applied item filters exclude all items: + { match_all: {} } + ] + } + } + placeToAddFilter.filter.push(wrappedItemsQuery) + + // If there is any item query at all, run an additional inner_hits query + // to retrieve the total number of items without filtering: + if (itemsQuery.bool.filter) { + wrappedItemsQuery.bool.should.push({ + nested: { + path: 'items', + query: { + bool: { + must_not: [{ exists: { field: 'items.electronicLocator' } }] + } + }, + inner_hits: { name: 'allItems' } + } + }) + } + + return body +} + +/** + * Given an object containing filters, + * returns content of the ES query filter context + * + * @param {object} options - An object with keys,value pairs of the form [filter_name]:[filter_value] + * @returns {object} + */ +const itemsFilterContext = (options) => { + if (!options.query) return {} + + const filterHandlers = { + volume: (volumes) => { + return { + range: { + 'items.volumeRange': esRangeValue(volumes) + } + } + }, + date: (dates) => { + return { + range: { + 'items.dateRange': esRangeValue(dates) + } + } + }, + format: (formats) => { + return { + terms: { + 'items.formatLiteral': formats + } + } + }, + location: (locations) => { + return { + terms: { + 'items.holdingLocation.id': locations + } + } + }, + status: (statuses) => { + // Determine if all possible ReCAP statuses were selected: + const selectedRecapStatuses = recapStatuses(statuses) + + if (selectedRecapStatuses.length === 1 && + Array.isArray(options.unavailable_recap_barcodes) && + options.unavailable_recap_barcodes.length > 0) { + // There are known unavailable ReCAP items, so build a complicated + // filter clause with appropriate barcode overrides: + return itemStatusFilterWithUnavailableRecapItems(statuses, options.unavailable_recap_barcodes) + } else { + // If there are no known unavailable ReCAP items, just do a straight + // status match: + return { + terms: { + 'items.status.id': statuses + } + } + } + }, + itemUri: (uri) => { + return { term: { 'items.uri': uri } } + } + } + + const filters = Object.keys(options.query).map((filter) => { + const value = options.query[filter] + const handler = filterHandlers[filter] + return value && handler ? handler(value) : null + }).filter((x) => x) + + return filters.length + ? { filter: filters } + : {} +} + +/** + * Given an array of status ids (e.g. "status:a", "status:na") returns the + * subset of statuses that are relevant in ReCAP + */ +const recapStatuses = (statuses) => { + return statuses + .filter((status) => ['status:a', 'status:na'].includes(status)) +} + +/** + * Builds a big complicated ES filter to allow us to filter items by status, + * but override the indexed status for ReCAP items with statuses retrieved + * from SCSB. This corrects for the fact that ReCAP item statuses tend to be + * wrong in the ES index: + * - partner items are indexed as Available and remain thus forever + * - NYPL item statuses _should_ equal SCSB status, but the mechanism + * for keeping them synced isn't perfect and operates on a delay + * + * @param {string[]} statuses - An array of statuses to filter on + * @param {string[]} unavailableRecapBarcodes - An array of item barcodes + * known to be unavailble + * + * Returns an ES filter that matches the desired statuses, but also uses + * the known unavailable items to override indexed item statuses for ReCAP + * items (because ReCAP is the authority for status of off-site items). + * Essentially, the criteria is for matching an item is: + * + * - if on-site (non-ReCAP): + * - has a matching indexed status + * - if off-site: + * - if filtering on status:na + * - item barcode must be in unavailableRecapBarcodes + * - if filtering on status:a: + * - item barcode must NOT be in unavailableRecapBarcodes + */ +const itemStatusFilterWithUnavailableRecapItems = (statuses, unavailableRecapBarcodes) => { + // First, let's set up some common clauses: + + // Item is in ReCAP: + const itemIsRecapClause = { + regexp: { 'items.holdingLocation.id': 'loc:rc.*' } + } + // Item's indexed status matches one of the filtered statuses: + const itemHasIndexedStatusClause = { + terms: { 'items.status.id': statuses } + } + // Item is marked Unavailable in SCSB: + const itemIsUnavailableInRecapClause = { + script: { + script: { + inline: 'doc[\'items.idBarcode\'].value == null || ' + + 'params.unavailableRecapBarcodes.contains(doc[\'items.idBarcode\'][0])', + lang: 'painless', + params: { unavailableRecapBarcodes } + } + } + } + // This function is only called if `statuses` param contains a single + // ReCAP-relevant status (i.e. status:a or status:na), so determine which + // ReCAP status to use: + const selectedRecapStatus = recapStatuses(statuses).shift() + // Item's ReCAP status agrees with filter: + const itemRecapStatusAgreesWithFilterClause = + selectedRecapStatus === 'status:na' + ? itemIsUnavailableInRecapClause + : { bool: { must_not: itemIsUnavailableInRecapClause } } + + return { + bool: { + should: [ + // Either 1) item is on-site and has correctly indexed status: + { + bool: { + must: [ + // Item is on-site (i.e. not recap): + { bool: { must_not: itemIsRecapClause } }, + // Item indexed status matches filter: + itemHasIndexedStatusClause + ] + } + }, + // Or 2) item is off-site and has a scsb status that agrees with the + // filter (e.g. if filtering on status:na, scsb marks the barcode as + // 'Not Available') + { + bool: { + must: [ + // Item is off-site: + JSON.parse(JSON.stringify(itemIsRecapClause)), + // Item is not marked unavailable + itemRecapStatusAgreesWithFilterClause + ] + } + } + ] + } + } +} + +/** + * Given an object containing query options, + * returns content of the ES query context + * + * @param {object} options - An object with request options. `merge_checkin_card_items` is the only one + * that matters right now + * @returns {object} + */ +const itemsQueryContext = (options) => { + const excludeClauses = [] + + if (!options.merge_checkin_card_items) excludeClauses.push({ term: { 'items.type': 'nypl:CheckinCardItem' } }) + + return excludeClauses.length ? { must_not: excludeClauses } : { must: { match_all: {} } } +} + +const bodyForFindByUri = async function (recapBarcodesByStatus, params) { + // Establish base query: + let body = { + _source: { + excludes: EXCLUDE_FIELDS + }, + size: 1, + query: { + bool: { + must: [ + { + term: { + uri: params.uri + } + } + ] + } + } + } + const paramsIncludesItemLevelFiltering = Object.keys(params) + .filter((param) => param.startsWith('item_')).length > 0 + const returnAllItems = params.all_items && !paramsIncludesItemLevelFiltering + if (returnAllItems) { + body._source.excludes = EXCLUDE_FIELDS.filter((field) => field !== '*_sort') + } else { + // No specific item requested, so add pagination and matching params: + const itemsOptions = { + size: params.items_size, + from: params.items_from, + merge_checkin_card_items: params.merge_checkin_card_items, + query: { + volume: params.item_volume, + date: params.item_date, + format: params.item_format, + location: params.item_location, + status: params.item_status, + itemUri: params.itemUri + }, + unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] + } + body = addInnerHits(body, itemsOptions) + body._source = { + excludes: EXCLUDE_FIELDS.concat(['items']) + } + } + if (params.include_item_aggregations) { + body.aggregations = ITEM_FILTER_AGGREGATIONS + } + return body +} + +module.exports = { + bodyForFindByUri, + addInnerHits, + itemsFilterContext, + recapStatuses, + itemStatusFilterWithUnavailableRecapItems, + itemsQueryContext +} diff --git a/lib/resources.js b/lib/resources.js index 6c5d1f70..f0b47d37 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -15,14 +15,15 @@ const ResponseMassager = require('./response_massager.js') const DeliveryLocationsResolver = require('./delivery-locations-resolver') const AvailableDeliveryLocationTypes = require('./available_delivery_location_types') -const { parseParams, deepValue } = require('../lib/util') +const { parseParams } = require('../lib/util') const ApiRequest = require('./api-request') const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') -const { AGGREGATIONS_SPEC, ITEM_FILTER_AGGREGATIONS, EXCLUDE_FIELDS, SORT_FIELDS } = require('./elasticsearch/config') +const { AGGREGATIONS_SPEC, EXCLUDE_FIELDS, SORT_FIELDS } = require('./elasticsearch/config') const errors = require('./errors') const { esRangeValue, parseSearchParams, nyplSourceAndId } = require('./utils/resource-helpers') +const { bodyForFindByUri, addInnerHits, itemsFilterContext, itemsQueryContext } = require('./elasticsearch/elastic-body-builder') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -67,54 +68,8 @@ module.exports = function (app, _private = null) { : Promise.resolve({}) return scsbStatusLookup - .then((recapBarcodesByStatus) => { - // Establish base query: - let body = { - _source: { - excludes: EXCLUDE_FIELDS - }, - size: 1, - query: { - bool: { - must: [ - { - term: { - uri: params.uri - } - } - ] - } - } - } - const paramsIncludesItemLevelFiltering = Object.keys(params) - .filter((param) => param.startsWith('item_')).length > 0 - const returnAllItems = params.all_items && !paramsIncludesItemLevelFiltering - if (returnAllItems) { - body._source.excludes = EXCLUDE_FIELDS.filter((field) => field !== '*_sort') - } else { - // No specific item requested, so add pagination and matching params: - const itemsOptions = { - size: params.items_size, - from: params.items_from, - merge_checkin_card_items: params.merge_checkin_card_items, - query: { - volume: params.item_volume, - date: params.item_date, - format: params.item_format, - location: params.item_location, - status: params.item_status, - itemUri: params.itemUri - }, - unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] - } - body = addInnerHits(body, itemsOptions) - body._source = { - excludes: EXCLUDE_FIELDS.concat(['items']) - } - } - if (params.include_item_aggregations) { - body.aggregations = ITEM_FILTER_AGGREGATIONS - } + .then(async (recapBarcodesByStatus) => { + const body = await bodyForFindByUri(recapBarcodesByStatus, params) app.logger.debug('Resources#findByUri', body) return app.esClient.search(body) .then((resp) => { @@ -251,272 +206,6 @@ module.exports = function (app, _private = null) { .then((items) => ItemResultsSerializer.serialize(items, opts)) } - /** - * Given a ES search body, returns same object modified to include the - * additional query necessary to limit (and paginate through) items - * - * @param {object} body - An ES query object (suitable for POSTing to ES - * @param {object} options - An object optionally defining `size` and `from` - * for limiting and paginating through items - */ - const addInnerHits = (body, _options = {}) => { - const options = Object.assign({ - size: process.env.SEARCH_ITEMS_SIZE || 200, - from: 0, - merge_checkin_card_items: true - }, _options) - - // Make sure necessary structure exists: - if (!deepValue(body, 'query.bool') && !deepValue(body, 'query.function_score.query.bool')) { - body.query = { bool: {} } - } - - // The place to add the filter depends on the query built to this point: - const placeToAddFilter = (body.query.bool || body.query.function_score.query.bool) - // Initialize filter object if it doesn't already exist: - placeToAddFilter.filter = placeToAddFilter.filter || [] - // If filter object already exists, convert it to array: - if (!Array.isArray(placeToAddFilter.filter)) placeToAddFilter.filter = [placeToAddFilter.filter] - - const itemsQuery = { - bool: Object.assign( - itemsQueryContext(options), - itemsFilterContext(options) - ) - } - - const wrappedItemsQuery = { - bool: { - should: [ - { - nested: { - path: 'items', - query: itemsQuery, - inner_hits: { - sort: [{ 'items.enumerationChronology_sort': 'desc' }], - size: options.size, - from: options.from, - name: 'items' - } - } - }, - // Add a catch-all to ensure we return the bib document even when - // numItems=0 or applied item filters exclude all items: - { match_all: {} } - ] - } - } - placeToAddFilter.filter.push(wrappedItemsQuery) - - // If there is any item query at all, run an additional inner_hits query - // to retrieve the total number of items without filtering: - if (itemsQuery.bool.filter) { - wrappedItemsQuery.bool.should.push({ - nested: { - path: 'items', - query: { - bool: { - must_not: [{ exists: { field: 'items.electronicLocator' } }] - } - }, - inner_hits: { name: 'allItems' } - } - }) - } - - return body - } - - /** - * Given an object containing filters, - * returns content of the ES query filter context - * - * @param {object} options - An object with keys,value pairs of the form [filter_name]:[filter_value] - * @returns {object} - */ - const itemsFilterContext = (options) => { - if (!options.query) return {} - - const filterHandlers = { - volume: (volumes) => { - return { - range: { - 'items.volumeRange': esRangeValue(volumes) - } - } - }, - date: (dates) => { - return { - range: { - 'items.dateRange': esRangeValue(dates) - } - } - }, - format: (formats) => { - return { - terms: { - 'items.formatLiteral': formats - } - } - }, - location: (locations) => { - return { - terms: { - 'items.holdingLocation.id': locations - } - } - }, - status: (statuses) => { - // Determine if all possible ReCAP statuses were selected: - const selectedRecapStatuses = recapStatuses(statuses) - - if (selectedRecapStatuses.length === 1 && - Array.isArray(options.unavailable_recap_barcodes) && - options.unavailable_recap_barcodes.length > 0) { - // There are known unavailable ReCAP items, so build a complicated - // filter clause with appropriate barcode overrides: - return itemStatusFilterWithUnavailableRecapItems(statuses, options.unavailable_recap_barcodes) - } else { - // If there are no known unavailable ReCAP items, just do a straight - // status match: - return { - terms: { - 'items.status.id': statuses - } - } - } - }, - itemUri: (uri) => { - return { term: { 'items.uri': uri } } - } - } - - const filters = Object.keys(options.query).map((filter) => { - const value = options.query[filter] - const handler = filterHandlers[filter] - return value && handler ? handler(value) : null - }).filter((x) => x) - - return filters.length - ? { filter: filters } - : {} - } - - /** - * Given an array of status ids (e.g. "status:a", "status:na") returns the - * subset of statuses that are relevant in ReCAP - */ - const recapStatuses = (statuses) => { - return statuses - .filter((status) => ['status:a', 'status:na'].includes(status)) - } - - /** - * Builds a big complicated ES filter to allow us to filter items by status, - * but override the indexed status for ReCAP items with statuses retrieved - * from SCSB. This corrects for the fact that ReCAP item statuses tend to be - * wrong in the ES index: - * - partner items are indexed as Available and remain thus forever - * - NYPL item statuses _should_ equal SCSB status, but the mechanism - * for keeping them synced isn't perfect and operates on a delay - * - * @param {string[]} statuses - An array of statuses to filter on - * @param {string[]} unavailableRecapBarcodes - An array of item barcodes - * known to be unavailble - * - * Returns an ES filter that matches the desired statuses, but also uses - * the known unavailable items to override indexed item statuses for ReCAP - * items (because ReCAP is the authority for status of off-site items). - * Essentially, the criteria is for matching an item is: - * - * - if on-site (non-ReCAP): - * - has a matching indexed status - * - if off-site: - * - if filtering on status:na - * - item barcode must be in unavailableRecapBarcodes - * - if filtering on status:a: - * - item barcode must NOT be in unavailableRecapBarcodes - */ - const itemStatusFilterWithUnavailableRecapItems = (statuses, unavailableRecapBarcodes) => { - // First, let's set up some common clauses: - - // Item is in ReCAP: - const itemIsRecapClause = { - regexp: { 'items.holdingLocation.id': 'loc:rc.*' } - } - // Item's indexed status matches one of the filtered statuses: - const itemHasIndexedStatusClause = { - terms: { 'items.status.id': statuses } - } - // Item is marked Unavailable in SCSB: - const itemIsUnavailableInRecapClause = { - script: { - script: { - inline: 'doc[\'items.idBarcode\'].value == null || ' + - 'params.unavailableRecapBarcodes.contains(doc[\'items.idBarcode\'][0])', - lang: 'painless', - params: { unavailableRecapBarcodes } - } - } - } - // This function is only called if `statuses` param contains a single - // ReCAP-relevant status (i.e. status:a or status:na), so determine which - // ReCAP status to use: - const selectedRecapStatus = recapStatuses(statuses).shift() - // Item's ReCAP status agrees with filter: - const itemRecapStatusAgreesWithFilterClause = - selectedRecapStatus === 'status:na' - ? itemIsUnavailableInRecapClause - : { bool: { must_not: itemIsUnavailableInRecapClause } } - - return { - bool: { - should: [ - // Either 1) item is on-site and has correctly indexed status: - { - bool: { - must: [ - // Item is on-site (i.e. not recap): - { bool: { must_not: itemIsRecapClause } }, - // Item indexed status matches filter: - itemHasIndexedStatusClause - ] - } - }, - // Or 2) item is off-site and has a scsb status that agrees with the - // filter (e.g. if filtering on status:na, scsb marks the barcode as - // 'Not Available') - { - bool: { - must: [ - // Item is off-site: - JSON.parse(JSON.stringify(itemIsRecapClause)), - // Item is not marked unavailable - itemRecapStatusAgreesWithFilterClause - ] - } - } - ] - } - } - } - - /** - * Given an object containing query options, - * returns content of the ES query context - * - * @param {object} options - An object with request options. `merge_checkin_card_items` is the only one - * that matters right now - * @returns {object} - */ - const itemsQueryContext = (options) => { - const excludeClauses = [] - - if (!options.merge_checkin_card_items) excludeClauses.push({ term: { 'items.type': 'nypl:CheckinCardItem' } }) - - return excludeClauses.length ? { must_not: excludeClauses } : { must: { match_all: {} } } - } - // Conduct a search across resources: app.resources.search = function (params, opts, request) { app.logger.debug('Unparsed params: ', params) From 683445f6c3d4103b3ad0b3b50ad46d8b87f034f0 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 14:52:49 -0500 Subject: [PATCH 10/82] Add nyplSourceAndId call to annotatedMarc --- lib/resources.js | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index f0b47d37..b6726bfa 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -1,4 +1,3 @@ -const NyplSourceMapper = require('research-catalog-indexer/lib/utils/nypl-source-mapper') const scsbClient = require('./scsb-client') const ResourceResultsSerializer = require('./jsonld_serializers.js').ResourceResultsSerializer @@ -97,12 +96,8 @@ module.exports = function (app, _private = null) { // Get a single raw annotated-marc resource: app.resources.annotatedMarc = async function (params, opts) { // Convert discovery id to nyplSource and un-prefixed id: - const nyplSourceMapper = await NyplSourceMapper.instance() - const { id, nyplSource } = nyplSourceMapper.splitIdentifier(params.uri) ?? {} - if (!id || !nyplSource) { - throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`) - } + const { id, nyplSource } = await nyplSourceAndId(params) app.logger.debug('Resources#annotatedMarc', { id, nyplSource }) From 5ac57ee6c2ce65b1a2211e021e9f7c317e7db003 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 15:17:23 -0500 Subject: [PATCH 11/82] Move itemsByFilter to utils --- lib/resources.js | 54 ++--------------------------------- lib/utils/resource-helpers.js | 51 ++++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 53 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index b6726bfa..82f9e112 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -5,7 +5,6 @@ const ResourceSerializer = require('./jsonld_serializers.js').ResourceSerializer const AggregationsSerializer = require('./jsonld_serializers.js').AggregationsSerializer const AggregationSerializer = require('./jsonld_serializers.js').AggregationSerializer const ItemResultsSerializer = require('./jsonld_serializers.js').ItemResultsSerializer -const LocationLabelUpdater = require('./location_label_updater') const AnnotatedMarcSerializer = require('./annotated-marc-serializer') const { makeNyplDataApiClient } = require('./data-api-client') const { IndexSearchError, IndexConnectionError } = require('./errors') @@ -21,7 +20,7 @@ const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') const { AGGREGATIONS_SPEC, EXCLUDE_FIELDS, SORT_FIELDS } = require('./elasticsearch/config') const errors = require('./errors') -const { esRangeValue, parseSearchParams, nyplSourceAndId } = require('./utils/resource-helpers') +const { esRangeValue, parseSearchParams, nyplSourceAndId, itemsByFilter } = require('./utils/resource-helpers') const { bodyForFindByUri, addInnerHits, itemsFilterContext, itemsQueryContext } = require('./elasticsearch/elastic-body-builder') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -113,46 +112,6 @@ module.exports = function (app, _private = null) { .then(AnnotatedMarcSerializer.serialize) } - function itemsByFilter (filter, opts) { - opts = Object.assign({ - _source: null - }, opts) - - // Build ES query body: - const body = { - query: { - nested: { - path: 'items', - score_mode: 'avg', - query: { - constant_score: { - filter - } - } - } - } - } - if (opts._source) body._source = opts._source - - app.logger.debug('Resources#itemsByFilter', body) - return app.esClient.search(body) - .then((resp) => { - if (!resp || !resp.hits || resp.hits.total === 0) return Promise.reject(new Error('No matching items')) - resp = new LocationLabelUpdater(resp).responseWithUpdatedLabels() - // Convert this ES bibs response into an array of flattened items: - return resp.hits.hits - .map((doc) => doc._source) - // Reduce to a flat array of items - .reduce((a, bib) => { - return a.concat(bib.items) - // Let's affix that bnum into the item's identifiers so we know where it came from: - .map((i) => { - return Object.assign(i, { identifier: [`urn:bnum:${bib.uri}`].concat(i.identifier) }) - }) - }, []) - }) - } - // Get deliveryLocations for given resource(s) app.resources.deliveryLocationsByBarcode = function (params, opts) { params = parseParams(params, { @@ -170,16 +129,7 @@ module.exports = function (app, _private = null) { }) // Create promise to resolve items: - const fetchItems = itemsByFilter( - { terms: { 'items.identifier': identifierValues } }, - { _source: ['uri', 'type', 'items.uri', 'items.type', 'items.identifier', 'items.holdingLocation', 'items.status', 'items.catalogItemType', 'items.accessMessage', 'items.m2CustomerCode'] } - - // Filter out any items (multi item bib) that don't match one of the queriered barcodes: - ).then((items) => { - return items.filter((item) => { - return item.identifier.filter((i) => identifierValues.indexOf(i) >= 0).length > 0 - }) - }) + const fetchItems = itemsByFilter(identifierValues, app) // Run both item fetch and patron fetch in parallel: return Promise.all([fetchItems, lookupPatronType]) diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js index 42629299..e87d6e79 100644 --- a/lib/utils/resource-helpers.js +++ b/lib/utils/resource-helpers.js @@ -2,6 +2,7 @@ const { parseParams } = require('../util') const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC, SORT_FIELDS } = require('../elasticsearch/config') const NyplSourceMapper = require('research-catalog-indexer/lib/utils/nypl-source-mapper') const errors = require('../errors') +const LocationLabelUpdater = require('../location_label_updater') /** * Given a range represented as an array, returns a corresponding ES range object @@ -67,8 +68,56 @@ const nyplSourceAndId = async function (params) { return { id, nyplSource } } +function itemsByFilter (identifierValues, app) { + const filter = { terms: { 'items.identifier': identifierValues } } + let opts = { _source: ['uri', 'type', 'items.uri', 'items.type', 'items.identifier', 'items.holdingLocation', 'items.status', 'items.catalogItemType', 'items.accessMessage', 'items.m2CustomerCode'] } + + opts = Object.assign({ + _source: null + }, opts) + + // Build ES query body: + const body = { + query: { + nested: { + path: 'items', + score_mode: 'avg', + query: { + constant_score: { + filter + } + } + } + } + } + if (opts._source) body._source = opts._source + + app.logger.debug('Resources#itemsByFilter', body) + return app.esClient.search(body) + .then((resp) => { + if (!resp || !resp.hits || resp.hits.total === 0) return Promise.reject(new Error('No matching items')) + resp = new LocationLabelUpdater(resp).responseWithUpdatedLabels() + // Convert this ES bibs response into an array of flattened items: + return resp.hits.hits + .map((doc) => doc._source) + // Reduce to a flat array of items + .reduce((a, bib) => { + return a.concat(bib.items) + // Let's affix that bnum into the item's identifiers so we know where it came from: + .map((i) => { + return Object.assign(i, { identifier: [`urn:bnum:${bib.uri}`].concat(i.identifier) }) + }) + }, []) + }).then((items) => { + return items.filter((item) => { + return item.identifier.filter((i) => identifierValues.indexOf(i) >= 0).length > 0 + }) + }) +} + module.exports = { esRangeValue, parseSearchParams, - nyplSourceAndId + nyplSourceAndId, + itemsByFilter } From d50ea4570c9e5e7a8513a31085e2207f88505f36 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 15:21:40 -0500 Subject: [PATCH 12/82] Move buildElasticQuery/Body to bodybuilder --- lib/elasticsearch/elastic-body-builder.js | 54 ++++++++++++++++++++- lib/resources.js | 59 ++++------------------- 2 files changed, 61 insertions(+), 52 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 655ca1c9..77bcb210 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -1,6 +1,8 @@ -const { EXCLUDE_FIELDS, ITEM_FILTER_AGGREGATIONS } = require('./config') +const { EXCLUDE_FIELDS, ITEM_FILTER_AGGREGATIONS, SORT_FIELDS } = require('./config') const { deepValue } = require('../util') const { esRangeValue } = require('../utils/resource-helpers') +const ApiRequest = require('../api-request') +const ElasticQueryBuilder = require('../elasticsearch/elastic-query-builder') /** * Given a ES search body, returns same object modified to include the @@ -319,11 +321,59 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { return body } +/** + * Given GET params, returns a plainobject suitable for use in a ES query. + * + * @param {object} params - A hash of request params including `filters`, + * `search_scope`, `q` + * + * @return {object} ES query object suitable to be POST'd to ES endpoint + */ +const buildElasticQuery = function (params) { + const request = ApiRequest.fromParams(params) + + const builder = ElasticQueryBuilder.forApiRequest(request) + return builder.query.toJson() +} + +/** + * Given GET params, returns a plainobject with `from`, `size`, `query`, + * `sort`, and any other params necessary to perform the ES query based + * on the GET params. + * + * @return {object} An object that can be posted directly to ES + */ +const buildElasticBody = function (params) { + const body = { + from: (params.per_page * (params.page - 1)), + size: params.per_page + } + + body.query = buildElasticQuery(params) + + // Apply sort: + let direction + let field + + if (params.sort === 'relevance') { + field = '_score' + direction = 'desc' + } else { + field = SORT_FIELDS[params.sort].field || params.sort + direction = params.sort_direction || SORT_FIELDS[params.sort].initialDirection + } + body.sort = [{ [field]: direction }, { uri: 'asc' }] + + return body +} + module.exports = { bodyForFindByUri, addInnerHits, itemsFilterContext, recapStatuses, itemStatusFilterWithUnavailableRecapItems, - itemsQueryContext + itemsQueryContext, + buildElasticQuery, + buildElasticBody } diff --git a/lib/resources.js b/lib/resources.js index 82f9e112..fbe3dc78 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -15,13 +15,18 @@ const AvailableDeliveryLocationTypes = require('./available_delivery_location_ty const { parseParams } = require('../lib/util') -const ApiRequest = require('./api-request') -const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') -const { AGGREGATIONS_SPEC, EXCLUDE_FIELDS, SORT_FIELDS } = require('./elasticsearch/config') +const { AGGREGATIONS_SPEC, EXCLUDE_FIELDS } = require('./elasticsearch/config') const errors = require('./errors') const { esRangeValue, parseSearchParams, nyplSourceAndId, itemsByFilter } = require('./utils/resource-helpers') -const { bodyForFindByUri, addInnerHits, itemsFilterContext, itemsQueryContext } = require('./elasticsearch/elastic-body-builder') +const { + bodyForFindByUri, + addInnerHits, + itemsFilterContext, + itemsQueryContext, + buildElasticQuery, + buildElasticBody +} = require('./elasticsearch/elastic-body-builder') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -359,49 +364,3 @@ module.exports = function (app, _private = null) { _private.mergeAggregationsResponses = mergeAggregationsResponses } } - -/** - * Given GET params, returns a plainobject with `from`, `size`, `query`, - * `sort`, and any other params necessary to perform the ES query based - * on the GET params. - * - * @return {object} An object that can be posted directly to ES - */ -const buildElasticBody = function (params) { - const body = { - from: (params.per_page * (params.page - 1)), - size: params.per_page - } - - body.query = buildElasticQuery(params) - - // Apply sort: - let direction - let field - - if (params.sort === 'relevance') { - field = '_score' - direction = 'desc' - } else { - field = SORT_FIELDS[params.sort].field || params.sort - direction = params.sort_direction || SORT_FIELDS[params.sort].initialDirection - } - body.sort = [{ [field]: direction }, { uri: 'asc' }] - - return body -} - -/** - * Given GET params, returns a plainobject suitable for use in a ES query. - * - * @param {object} params - A hash of request params including `filters`, - * `search_scope`, `q` - * - * @return {object} ES query object suitable to be POST'd to ES endpoint - */ -const buildElasticQuery = function (params) { - const request = ApiRequest.fromParams(params) - - const builder = ElasticQueryBuilder.forApiRequest(request) - return builder.query.toJson() -} From 2f6c8875cca34e19d2b31ecb27f3fda093a36baf Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 15:24:08 -0500 Subject: [PATCH 13/82] Factor out body for search --- lib/elasticsearch/elastic-body-builder.js | 16 +++++++++++++++- lib/resources.js | 14 ++++---------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 77bcb210..e77ba2ff 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -367,6 +367,19 @@ const buildElasticBody = function (params) { return body } +const bodyForSearch = function (params) { + let body = buildElasticBody(params) + + // Strip unnecessary _source fields + body._source = { + excludes: EXCLUDE_FIELDS.concat(['items']) + } + + body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) + + return body +} + module.exports = { bodyForFindByUri, addInnerHits, @@ -375,5 +388,6 @@ module.exports = { itemStatusFilterWithUnavailableRecapItems, itemsQueryContext, buildElasticQuery, - buildElasticBody + buildElasticBody, + bodyForSearch } diff --git a/lib/resources.js b/lib/resources.js index fbe3dc78..18646abb 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -15,7 +15,7 @@ const AvailableDeliveryLocationTypes = require('./available_delivery_location_ty const { parseParams } = require('../lib/util') -const { AGGREGATIONS_SPEC, EXCLUDE_FIELDS } = require('./elasticsearch/config') +const { AGGREGATIONS_SPEC } = require('./elasticsearch/config') const errors = require('./errors') const { esRangeValue, parseSearchParams, nyplSourceAndId, itemsByFilter } = require('./utils/resource-helpers') @@ -25,7 +25,8 @@ const { itemsFilterContext, itemsQueryContext, buildElasticQuery, - buildElasticBody + buildElasticBody, + bodyForSearch } = require('./elasticsearch/elastic-body-builder') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -163,14 +164,7 @@ module.exports = function (app, _private = null) { app.logger.debug('Parsed params: ', params) - let body = buildElasticBody(params) - - // Strip unnecessary _source fields - body._source = { - excludes: EXCLUDE_FIELDS.concat(['items']) - } - - body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) + const body = bodyForSearch(params) app.logger.debug('Resources#search', RESOURCES_INDEX, body) From 1075d65e8dbbff84316dc071202d69887735265a Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 15:30:53 -0500 Subject: [PATCH 14/82] Move buildElasticAggregationsBody --- lib/elasticsearch/elastic-body-builder.js | 25 +++++++++++++++++++++-- lib/resources.js | 23 ++------------------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index e77ba2ff..57f87389 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -1,4 +1,4 @@ -const { EXCLUDE_FIELDS, ITEM_FILTER_AGGREGATIONS, SORT_FIELDS } = require('./config') +const { EXCLUDE_FIELDS, ITEM_FILTER_AGGREGATIONS, SORT_FIELDS, AGGREGATIONS_SPEC } = require('./config') const { deepValue } = require('../util') const { esRangeValue } = require('../utils/resource-helpers') const ApiRequest = require('../api-request') @@ -380,6 +380,26 @@ const bodyForSearch = function (params) { return body } +const buildElasticAggregationsBody = (params, aggregateProps) => { + // Add an `aggregations` entry to the ES body describing the aggretations + // we want. Set the `size` property to per_page (default 50) for each. + // https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-size + const aggregations = aggregateProps.reduce((aggs, prop) => { + aggs[prop] = AGGREGATIONS_SPEC[prop] + // Only set size for terms aggs for now: + if (aggs[prop].terms) { + aggs[prop].terms.size = params.per_page + } + return aggs + }, {}) + + const body = buildElasticBody(params) + body.size = 0 + body.aggregations = aggregations + + return body +} + module.exports = { bodyForFindByUri, addInnerHits, @@ -389,5 +409,6 @@ module.exports = { itemsQueryContext, buildElasticQuery, buildElasticBody, - bodyForSearch + bodyForSearch, + buildElasticAggregationsBody } diff --git a/lib/resources.js b/lib/resources.js index 18646abb..2fdd8e96 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -26,7 +26,8 @@ const { itemsQueryContext, buildElasticQuery, buildElasticBody, - bodyForSearch + bodyForSearch, + buildElasticAggregationsBody } = require('./elasticsearch/elastic-body-builder') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -207,26 +208,6 @@ module.exports = function (app, _private = null) { }) } - const buildElasticAggregationsBody = (params, aggregateProps) => { - // Add an `aggregations` entry to the ES body describing the aggretations - // we want. Set the `size` property to per_page (default 50) for each. - // https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-size - const aggregations = aggregateProps.reduce((aggs, prop) => { - aggs[prop] = AGGREGATIONS_SPEC[prop] - // Only set size for terms aggs for now: - if (aggs[prop].terms) { - aggs[prop].terms.size = params.per_page - } - return aggs - }, {}) - - const body = buildElasticBody(params) - body.size = 0 - body.aggregations = aggregations - - return body - } - /** * Given a params hash, returns an array of ES queries for fetching relevant aggregations. */ From a0237b707472f9e902f8daa316fad2d011a2c7e4 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 15:33:10 -0500 Subject: [PATCH 15/82] Move aggregationQueriesForParams to bodybuilder --- lib/elasticsearch/elastic-body-builder.js | 36 ++++++++++++++++++++++- lib/resources.js | 35 +--------------------- 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 57f87389..42a69ae7 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -400,6 +400,39 @@ const buildElasticAggregationsBody = (params, aggregateProps) => { return body } +/** +* Given a params hash, returns an array of ES queries for fetching relevant aggregations. +*/ +const aggregationQueriesForParams = (params) => { + // Build the complete set of distinct aggregation queries we need to run + // depending on active filters. We want: + // - one agg representing the counts for all properties _not_ used in filter + // - one agg each for each property that is used in a filter, but counts should exclude that filter + + // Build the standard aggregation: + const unfilteredAggregationProps = Object.keys(AGGREGATIONS_SPEC) + // Aggregate on all properties that aren't involved in filters: + .filter((prop) => !Object.keys(params.filters || {}).includes(prop)) + const queries = [buildElasticAggregationsBody(params, unfilteredAggregationProps)] + + // Now append all property-specific aggregation queries (one for each + // distinct property used in a filter): + return queries.concat( + Object.entries(params.filters || {}) + // Only consider filters that are also aggregations: + .filter(([prop, values]) => Object.keys(AGGREGATIONS_SPEC).includes(prop)) + .map(([prop, values]) => { + const aggFilters = structuredClone(params.filters) + // For this aggregation, don't filter on namesake property: + delete aggFilters[prop] + + // Build query for single aggregation: + const modifiedParams = Object.assign({}, params, { filters: aggFilters }) + return buildElasticAggregationsBody(modifiedParams, [prop]) + }) + ) +} + module.exports = { bodyForFindByUri, addInnerHits, @@ -410,5 +443,6 @@ module.exports = { buildElasticQuery, buildElasticBody, bodyForSearch, - buildElasticAggregationsBody + buildElasticAggregationsBody, + aggregationQueriesForParams } diff --git a/lib/resources.js b/lib/resources.js index 2fdd8e96..e4ecf72a 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -27,7 +27,7 @@ const { buildElasticQuery, buildElasticBody, bodyForSearch, - buildElasticAggregationsBody + aggregationQueriesForParams } = require('./elasticsearch/elastic-body-builder') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -208,39 +208,6 @@ module.exports = function (app, _private = null) { }) } - /** - * Given a params hash, returns an array of ES queries for fetching relevant aggregations. - */ - const aggregationQueriesForParams = (params) => { - // Build the complete set of distinct aggregation queries we need to run - // depending on active filters. We want: - // - one agg representing the counts for all properties _not_ used in filter - // - one agg each for each property that is used in a filter, but counts should exclude that filter - - // Build the standard aggregation: - const unfilteredAggregationProps = Object.keys(AGGREGATIONS_SPEC) - // Aggregate on all properties that aren't involved in filters: - .filter((prop) => !Object.keys(params.filters || {}).includes(prop)) - const queries = [buildElasticAggregationsBody(params, unfilteredAggregationProps)] - - // Now append all property-specific aggregation queries (one for each - // distinct property used in a filter): - return queries.concat( - Object.entries(params.filters || {}) - // Only consider filters that are also aggregations: - .filter(([prop, values]) => Object.keys(AGGREGATIONS_SPEC).includes(prop)) - .map(([prop, values]) => { - const aggFilters = structuredClone(params.filters) - // For this aggregation, don't filter on namesake property: - delete aggFilters[prop] - - // Build query for single aggregation: - const modifiedParams = Object.assign({}, params, { filters: aggFilters }) - return buildElasticAggregationsBody(modifiedParams, [prop]) - }) - ) - } - /** * Given an array of ES aggregations responses (such as that returned from msearch) **/ From 3340f5e4f13296e5545c66d1fbbe3b3930f8b903 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 15:34:51 -0500 Subject: [PATCH 16/82] Move mergeAggregationsResposes to utils --- lib/resources.js | 36 ++++++++--------------------------- lib/utils/resource-helpers.js | 30 ++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 29 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index e4ecf72a..5082fc1d 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -18,7 +18,14 @@ const { parseParams } = require('../lib/util') const { AGGREGATIONS_SPEC } = require('./elasticsearch/config') const errors = require('./errors') -const { esRangeValue, parseSearchParams, nyplSourceAndId, itemsByFilter } = require('./utils/resource-helpers') +const { + esRangeValue, + parseSearchParams, + nyplSourceAndId, + itemsByFilter, + mergeAggregationsResponses +} = require('./utils/resource-helpers') + const { bodyForFindByUri, addInnerHits, @@ -208,33 +215,6 @@ module.exports = function (app, _private = null) { }) } - /** - * Given an array of ES aggregations responses (such as that returned from msearch) - **/ - const mergeAggregationsResponses = (responses) => { - // Filter out errored responses: - responses = responses.filter((resp) => resp.aggregations) - if (responses.length === 0) { - return {} - } - return { - // Use `hits` of last element, somewhat arbitrarily: - hits: responses[responses.length - 1].hits, - aggregations: responses - .reduce((allAggs, resp) => { - const respAggs = Object.entries(resp.aggregations) - // Build hash of response aggs, squashing _nested aggs: - .reduce((a, [field, _a]) => { - // If it's nested, it will be in our special '_nested' prop: - a[field] = _a._nested || _a - return a - }, {}) - // Add response aggs to combined aggs: - return Object.assign(allAggs, respAggs) - }, {}) - } - } - // Get all aggregations: app.resources.aggregations = async (params, opts) => { params = parseSearchParams(params) diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js index e87d6e79..4b587a2d 100644 --- a/lib/utils/resource-helpers.js +++ b/lib/utils/resource-helpers.js @@ -115,9 +115,37 @@ function itemsByFilter (identifierValues, app) { }) } +/** +* Given an array of ES aggregations responses (such as that returned from msearch) +**/ +const mergeAggregationsResponses = (responses) => { + // Filter out errored responses: + responses = responses.filter((resp) => resp.aggregations) + if (responses.length === 0) { + return {} + } + return { + // Use `hits` of last element, somewhat arbitrarily: + hits: responses[responses.length - 1].hits, + aggregations: responses + .reduce((allAggs, resp) => { + const respAggs = Object.entries(resp.aggregations) + // Build hash of response aggs, squashing _nested aggs: + .reduce((a, [field, _a]) => { + // If it's nested, it will be in our special '_nested' prop: + a[field] = _a._nested || _a + return a + }, {}) + // Add response aggs to combined aggs: + return Object.assign(allAggs, respAggs) + }, {}) + } +} + module.exports = { esRangeValue, parseSearchParams, nyplSourceAndId, - itemsByFilter + itemsByFilter, + mergeAggregationsResponses } From e072d9336a8f3438c2cfaa94e98bfea711831bc3 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 15:37:30 -0500 Subject: [PATCH 17/82] Factor out body for aggregation --- lib/elasticsearch/elastic-body-builder.js | 20 +++++++++++++++++++- lib/resources.js | 18 ++++-------------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 42a69ae7..82688400 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -433,6 +433,23 @@ const aggregationQueriesForParams = (params) => { ) } +const bodyForAggregation = (params) => { + const body = buildElasticBody(params) + + // We're fetching aggs, so specify 0 resource results: + body.size = 0 + + body.aggregations = {} + body.aggregations[params.field] = AGGREGATIONS_SPEC[params.field] + + // If it's a terms agg, we can apply per_page: + if (body.aggregations[params.field].terms) { + body.aggregations[params.field].terms.size = params.per_page + } + + return body +} + module.exports = { bodyForFindByUri, addInnerHits, @@ -444,5 +461,6 @@ module.exports = { buildElasticBody, bodyForSearch, buildElasticAggregationsBody, - aggregationQueriesForParams + aggregationQueriesForParams, + bodyForAggregation } diff --git a/lib/resources.js b/lib/resources.js index 5082fc1d..da5048da 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -34,7 +34,8 @@ const { buildElasticQuery, buildElasticBody, bodyForSearch, - aggregationQueriesForParams + aggregationQueriesForParams, + bodyForAggregation } = require('./elasticsearch/elastic-body-builder') const RESOURCES_INDEX = process.env.RESOURCES_INDEX @@ -244,25 +245,14 @@ module.exports = function (app, _private = null) { return Promise.reject(new Error('Invalid aggregation field')) } - const body = buildElasticBody(params) - - // We're fetching aggs, so specify 0 resource results: - body.size = 0 - - body.aggregations = {} - body.aggregations[params.field] = AGGREGATIONS_SPEC[params.field] - - // If it's a terms agg, we can apply per_page: - if (body.aggregations[params.field].terms) { - body.aggregations[params.field].terms.size = params.per_page - } - const serializationOpts = Object.assign(opts, { // This tells the serializer what fields are "packed" fields, which should be split apart packed_fields: ['materialType', 'language', 'carrierType', 'mediaType', 'issuance', 'status', 'owner'], root: true }) + const body = bodyForAggregation(params) + app.logger.debug('Resources#aggregation:', body) return app.esClient.search(body) .then((resp) => { From 054d1e4eac144ec81d0436e625a5b5a1caad9caf Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 15:58:27 -0500 Subject: [PATCH 18/82] Move findByUri to async/await --- lib/resources.js | 61 ++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index da5048da..61584191 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -72,39 +72,34 @@ module.exports = function (app, _private = null) { // We only need to retrieve scsb statuses if building item aggs or // filtering on status: const retrieveScsbStatuses = params.include_item_aggregations || params.item_status - const scsbStatusLookup = retrieveScsbStatuses - ? scsbClient.getBarcodesByStatusForBnum(params.uri) - .catch((e) => { - app.logger.error(`Error connecting to SCSB; Unable to lookup barcodes for bib ${params.uri}`, e) - return {} - }) - : Promise.resolve({}) - - return scsbStatusLookup - .then(async (recapBarcodesByStatus) => { - const body = await bodyForFindByUri(recapBarcodesByStatus, params) - app.logger.debug('Resources#findByUri', body) - return app.esClient.search(body) - .then((resp) => { - // Mindfully throw errors for known issues: - if (!resp || !resp.hits) { - throw new Error('Error connecting to index') - } else if (resp?.hits?.total?.value === 0) { - throw new errors.NotFoundError(`Record not found: ${params.uri}`) - } else { - const massagedResponse = new ResponseMassager(resp) - return massagedResponse.massagedResponse(request, { queryRecapCustomerCode: !!params.itemUri, recapBarcodesByStatus }) - .catch((e) => { - // If error hitting HTC, just return response un-modified: - return resp - }) - } - }).then((resp) => { - const hitsAndItemAggregations = resp.hits.hits[0]._source - hitsAndItemAggregations.itemAggregations = resp.aggregations - return ResourceSerializer.serialize(hitsAndItemAggregations, Object.assign(opts, { root: true })) - }) - }) + let recapBarcodesByStatus = {} + if (retrieveScsbStatuses) { + try { + recapBarcodesByStatus = await scsbClient.getBarcodesByStatusForBnum(params.uri) + } catch (e) { + app.logger.error(`Error connecting to SCSB; Unable to lookup barcodes for bib ${params.uri}`, e) + } + } + + const body = await bodyForFindByUri(recapBarcodesByStatus, params) + app.logger.debug('Resources#findByUri', body) + let resp = await app.esClient.search(body) + // Mindfully throw errors for known issues: + if (!resp || !resp.hits) { + throw new Error('Error connecting to index') + } else if (resp?.hits?.total?.value === 0) { + throw new errors.NotFoundError(`Record not found: ${params.uri}`) + } else { + const massagedResponse = new ResponseMassager(resp) + try { + resp = await massagedResponse.massagedResponse(request, { queryRecapCustomerCode: !!params.itemUri, recapBarcodesByStatus }) + } catch (e) { + // If error hitting HTC, just return response un-modified: + } + const hitsAndItemAggregations = resp.hits.hits[0]._source + hitsAndItemAggregations.itemAggregations = resp.aggregations + return ResourceSerializer.serialize(hitsAndItemAggregations, Object.assign(opts, { root: true })) + } } // Get a single raw annotated-marc resource: From 2d60b153315198d9dd2627e603332789c260d051 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 16:01:04 -0500 Subject: [PATCH 19/82] Make annotatedMarc async --- lib/resources.js | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index 61584191..1a4a0f4d 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -110,16 +110,13 @@ module.exports = function (app, _private = null) { app.logger.debug('Resources#annotatedMarc', { id, nyplSource }) - return makeNyplDataApiClient().get(`bibs/${nyplSource}/${id}`) - .then((resp) => { - // need to check that the query actually found an entry - if (!resp.data) { - throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`) - } else { - return resp.data - } - }) - .then(AnnotatedMarcSerializer.serialize) + const resp = await makeNyplDataApiClient().get(`bibs/${nyplSource}/${id}`) + // need to check that the query actually found an entry + if (!resp.data) { + throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`) + } + + return await AnnotatedMarcSerializer.serialize(resp.data) } // Get deliveryLocations for given resource(s) From 2ec818a28c77159d25a9e6c2876dae4bd4abad9f Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 9 Dec 2025 16:17:31 -0500 Subject: [PATCH 20/82] Make deliveryLocationsByBarcode async --- lib/resources.js | 46 +++++++++++++++-------------------- lib/utils/resource-helpers.js | 13 +++++++++- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index 1a4a0f4d..35f5fda1 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -11,7 +11,6 @@ const { IndexSearchError, IndexConnectionError } = require('./errors') const ResponseMassager = require('./response_massager.js') const DeliveryLocationsResolver = require('./delivery-locations-resolver') -const AvailableDeliveryLocationTypes = require('./available_delivery_location_types') const { parseParams } = require('../lib/util') @@ -23,7 +22,8 @@ const { parseSearchParams, nyplSourceAndId, itemsByFilter, - mergeAggregationsResponses + mergeAggregationsResponses, + lookupPatronType } = require('./utils/resource-helpers') const { @@ -120,7 +120,7 @@ module.exports = function (app, _private = null) { } // Get deliveryLocations for given resource(s) - app.resources.deliveryLocationsByBarcode = function (params, opts) { + app.resources.deliveryLocationsByBarcode = async function (params, opts) { params = parseParams(params, { barcodes: { type: 'string', repeatable: true }, patronId: { type: 'string' } @@ -129,33 +129,27 @@ module.exports = function (app, _private = null) { const identifierValues = barcodes.map((barcode) => `urn:barcode:${barcode}`) - // Create promise to resolve deliveryLocationTypes by patron type: - const lookupPatronType = AvailableDeliveryLocationTypes.getScholarRoomByPatronId(params.patronId) - .catch((e) => { - throw new errors.InvalidParameterError('Invalid patronId') - }) - // Create promise to resolve items: const fetchItems = itemsByFilter(identifierValues, app) // Run both item fetch and patron fetch in parallel: - return Promise.all([fetchItems, lookupPatronType]) - .then((resp) => { - // The resolved values of Promise.all are strictly ordered based on original array of promises - const items = resp[0] - const scholarRoom = resp[1] - - // Use HTC API and nypl-core mappings to ammend ES response with deliveryLocations: - return DeliveryLocationsResolver.attachDeliveryLocationsAndEddRequestability(items, scholarRoom) - .catch((e) => { - // An error here is likely an HTC API outage - // Let's return items unmodified: - // - app.logger.info({ message: 'Caught (and ignoring) error mapping barcodes to recap customer codes', htcError: e.message }) - return items - }) - }) - .then((items) => ItemResultsSerializer.serialize(items, opts)) + const [resp] = Promise.all([fetchItems, lookupPatronType]) + // The resolved values of Promise.all are strictly ordered based on original array of promises + let items = resp[0] + const scholarRoom = resp[1] + + // Use HTC API and nypl-core mappings to ammend ES response with deliveryLocations: + try { + items = await DeliveryLocationsResolver.attachDeliveryLocationsAndEddRequestability(items, scholarRoom) + } catch (e) { + // An error here is likely an HTC API outage + // Let's return items unmodified: + // + app.logger.info({ message: 'Caught (and ignoring) error mapping barcodes to recap customer codes', htcError: e.message }) + return items + } + items = await ItemResultsSerializer.serialize(items, opts) + return items } // Conduct a search across resources: diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js index 4b587a2d..52e68b45 100644 --- a/lib/utils/resource-helpers.js +++ b/lib/utils/resource-helpers.js @@ -3,6 +3,7 @@ const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC, SORT_FIELDS } = require const NyplSourceMapper = require('research-catalog-indexer/lib/utils/nypl-source-mapper') const errors = require('../errors') const LocationLabelUpdater = require('../location_label_updater') +const AvailableDeliveryLocationTypes = require('../available_delivery_location_types') /** * Given a range represented as an array, returns a corresponding ES range object @@ -142,10 +143,20 @@ const mergeAggregationsResponses = (responses) => { } } +// Create promise to resolve deliveryLocationTypes by patron type: +const lookupPatronType = async function (params) { + try { + await AvailableDeliveryLocationTypes.getScholarRoomByPatronId(params.patronId) + } catch (e) { + throw new errors.InvalidParameterError('Invalid patronId') + } +} + module.exports = { esRangeValue, parseSearchParams, nyplSourceAndId, itemsByFilter, - mergeAggregationsResponses + mergeAggregationsResponses, + lookupPatronType } From ba7fcf5a973ba89a4fc4089d1236f50a79cb63f6 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 11 Dec 2025 12:22:16 -0500 Subject: [PATCH 21/82] Pull search from promise chain in search --- lib/resources.js | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index 35f5fda1..fa26faeb 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -153,7 +153,7 @@ module.exports = function (app, _private = null) { } // Conduct a search across resources: - app.resources.search = function (params, opts, request) { + app.resources.search = async function (params, opts, request) { app.logger.debug('Unparsed params: ', params) params = parseSearchParams(params) @@ -163,7 +163,19 @@ module.exports = function (app, _private = null) { app.logger.debug('Resources#search', RESOURCES_INDEX, body) - return app.esClient.search(body) + let resp + + try { + resp = await app.esClient.search(body) + } catch (e) { + // Wrap ES client errors or any downstream error + if (e instanceof IndexSearchError || e instanceof IndexConnectionError) { + throw e // already a custom error + } + throw new IndexSearchError(`Error processing search: ${e.message || e}`) + } + + return Promise.resolve(resp) .then((resp) => { const massagedResponse = new ResponseMassager(resp) return massagedResponse.massagedResponse(request) @@ -193,13 +205,6 @@ module.exports = function (app, _private = null) { return resp }) }) - .catch((e) => { - // Wrap ES client errors or any downstream error - if (e instanceof IndexSearchError || e instanceof IndexConnectionError) { - throw e // already a custom error - } - throw new IndexSearchError(`Error processing search: ${e.message || e}`) - }) } // Get all aggregations: From 9189c1b7df2c837ac9373ba5e7b618aa86cc2f00 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 11 Dec 2025 12:24:34 -0500 Subject: [PATCH 22/82] Pull massaged response from promise chain in resources#search --- lib/resources.js | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index fa26faeb..3b8aaf06 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -175,14 +175,22 @@ module.exports = function (app, _private = null) { throw new IndexSearchError(`Error processing search: ${e.message || e}`) } + try { + const massagedResponse = new ResponseMassager(resp) + resp = await massagedResponse.massagedResponse(request) + } catch (e) { + + } + return Promise.resolve(resp) .then((resp) => { - const massagedResponse = new ResponseMassager(resp) - return massagedResponse.massagedResponse(request) - .catch((e) => { - // If error hitting HTC, just return response un-modified: - return resp - }) + // const massagedResponse = new ResponseMassager(resp) + // return massagedResponse.massagedResponse(request) + // .catch((e) => { + // // If error hitting HTC, just return response un-modified: + // return resp + // }) + return Promise.resolve(resp) .then((updatedResponse) => ResourceResultsSerializer.serialize(updatedResponse, opts)) .then((resp) => { // Build relevance report (for debugging): From 3297b630c6278a8e4fe9815c2302dcb258dce018 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 11 Dec 2025 12:25:43 -0500 Subject: [PATCH 23/82] Pull ResourceResultsSerializer.serialize from promise chain in search --- lib/resources.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/resources.js b/lib/resources.js index 3b8aaf06..f10da605 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -182,6 +182,8 @@ module.exports = function (app, _private = null) { } + resp = await ResourceResultsSerializer.serialize(resp, opts) + return Promise.resolve(resp) .then((resp) => { // const massagedResponse = new ResponseMassager(resp) @@ -191,7 +193,7 @@ module.exports = function (app, _private = null) { // return resp // }) return Promise.resolve(resp) - .then((updatedResponse) => ResourceResultsSerializer.serialize(updatedResponse, opts)) + .then((resp) => { // Build relevance report (for debugging): const relevanceReport = resp.itemListElement From 03d57dcdb88d53a48a7e4759970f12887f4eb030 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 11 Dec 2025 12:28:06 -0500 Subject: [PATCH 24/82] Remove nested promise --- lib/resources.js | 45 +++++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 28 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index f10da605..eb5c8aff 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -183,37 +183,26 @@ module.exports = function (app, _private = null) { } resp = await ResourceResultsSerializer.serialize(resp, opts) - return Promise.resolve(resp) .then((resp) => { - // const massagedResponse = new ResponseMassager(resp) - // return massagedResponse.massagedResponse(request) - // .catch((e) => { - // // If error hitting HTC, just return response un-modified: - // return resp - // }) - return Promise.resolve(resp) - - .then((resp) => { - // Build relevance report (for debugging): - const relevanceReport = resp.itemListElement - .map((r, ind) => { - const out = [] - out.push(`${ind + 1}: ${r.searchResultScore} score > ${r.result.uri}:`) - if (params.search_scope === 'contributor') out.push(`(${r.result.creatorLiteral || r.result.contributorLiteral})`) - if (['standard_number', 'callnumber'].includes(params.search_scope)) out.push(`(${r.result.items && r.result.items[0]?.shelfMark})`) - out.push(`${r.result.title} (displayed as "${r.result.titleDisplay}")`) - if (r.matchedQueries) out.push(`\n ${r.matchedQueries.join(', ')}`) - return out.join(' ') - }) - app.logger.debug(`Relevances:\n ${relevanceReport.join('\n')}`) - - resp.debug = { - relevanceReport, - query: body - } - return resp + // Build relevance report (for debugging): + const relevanceReport = resp.itemListElement + .map((r, ind) => { + const out = [] + out.push(`${ind + 1}: ${r.searchResultScore} score > ${r.result.uri}:`) + if (params.search_scope === 'contributor') out.push(`(${r.result.creatorLiteral || r.result.contributorLiteral})`) + if (['standard_number', 'callnumber'].includes(params.search_scope)) out.push(`(${r.result.items && r.result.items[0]?.shelfMark})`) + out.push(`${r.result.title} (displayed as "${r.result.titleDisplay}")`) + if (r.matchedQueries) out.push(`\n ${r.matchedQueries.join(', ')}`) + return out.join(' ') }) + app.logger.debug(`Relevances:\n ${relevanceReport.join('\n')}`) + + resp.debug = { + relevanceReport, + query: body + } + return resp }) } From 1c344696b011abe454032e5215b2b96c63b678da Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 11 Dec 2025 12:30:24 -0500 Subject: [PATCH 25/82] Remove promise from search --- lib/resources.js | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index eb5c8aff..6dbbb5ad 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -183,27 +183,24 @@ module.exports = function (app, _private = null) { } resp = await ResourceResultsSerializer.serialize(resp, opts) - return Promise.resolve(resp) - .then((resp) => { - // Build relevance report (for debugging): - const relevanceReport = resp.itemListElement - .map((r, ind) => { - const out = [] - out.push(`${ind + 1}: ${r.searchResultScore} score > ${r.result.uri}:`) - if (params.search_scope === 'contributor') out.push(`(${r.result.creatorLiteral || r.result.contributorLiteral})`) - if (['standard_number', 'callnumber'].includes(params.search_scope)) out.push(`(${r.result.items && r.result.items[0]?.shelfMark})`) - out.push(`${r.result.title} (displayed as "${r.result.titleDisplay}")`) - if (r.matchedQueries) out.push(`\n ${r.matchedQueries.join(', ')}`) - return out.join(' ') - }) - app.logger.debug(`Relevances:\n ${relevanceReport.join('\n')}`) - - resp.debug = { - relevanceReport, - query: body - } - return resp + + const relevanceReport = resp.itemListElement + .map((r, ind) => { + const out = [] + out.push(`${ind + 1}: ${r.searchResultScore} score > ${r.result.uri}:`) + if (params.search_scope === 'contributor') out.push(`(${r.result.creatorLiteral || r.result.contributorLiteral})`) + if (['standard_number', 'callnumber'].includes(params.search_scope)) out.push(`(${r.result.items && r.result.items[0]?.shelfMark})`) + out.push(`${r.result.title} (displayed as "${r.result.titleDisplay}")`) + if (r.matchedQueries) out.push(`\n ${r.matchedQueries.join(', ')}`) + return out.join(' ') }) + app.logger.debug(`Relevances:\n ${relevanceReport.join('\n')}`) + + resp.debug = { + relevanceReport, + query: body + } + return resp } // Get all aggregations: From 6e8af8df660e45689bd75793121b858ab089f358 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 11 Dec 2025 12:39:38 -0500 Subject: [PATCH 26/82] Factor out relevance report --- lib/resources.js | 15 ++++----------- lib/utils/resource-helpers.js | 13 ++++++++++++- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index 6dbbb5ad..b112f13a 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -23,7 +23,8 @@ const { nyplSourceAndId, itemsByFilter, mergeAggregationsResponses, - lookupPatronType + lookupPatronType, + makeRelevanceReport } = require('./utils/resource-helpers') const { @@ -183,17 +184,9 @@ module.exports = function (app, _private = null) { } resp = await ResourceResultsSerializer.serialize(resp, opts) - + const relevanceReport = resp.itemListElement - .map((r, ind) => { - const out = [] - out.push(`${ind + 1}: ${r.searchResultScore} score > ${r.result.uri}:`) - if (params.search_scope === 'contributor') out.push(`(${r.result.creatorLiteral || r.result.contributorLiteral})`) - if (['standard_number', 'callnumber'].includes(params.search_scope)) out.push(`(${r.result.items && r.result.items[0]?.shelfMark})`) - out.push(`${r.result.title} (displayed as "${r.result.titleDisplay}")`) - if (r.matchedQueries) out.push(`\n ${r.matchedQueries.join(', ')}`) - return out.join(' ') - }) + .map(makeRelevanceReport(params)) app.logger.debug(`Relevances:\n ${relevanceReport.join('\n')}`) resp.debug = { diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js index 52e68b45..9ba7ea9d 100644 --- a/lib/utils/resource-helpers.js +++ b/lib/utils/resource-helpers.js @@ -152,11 +152,22 @@ const lookupPatronType = async function (params) { } } +const makeRelevanceReport = (params) => (r, ind) => { + const out = [] + out.push(`${ind + 1}: ${r.searchResultScore} score > ${r.result.uri}:`) + if (params.search_scope === 'contributor') out.push(`(${r.result.creatorLiteral || r.result.contributorLiteral})`) + if (['standard_number', 'callnumber'].includes(params.search_scope)) out.push(`(${r.result.items && r.result.items[0]?.shelfMark})`) + out.push(`${r.result.title} (displayed as "${r.result.titleDisplay}")`) + if (r.matchedQueries) out.push(`\n ${r.matchedQueries.join(', ')}`) + return out.join(' ') +} + module.exports = { esRangeValue, parseSearchParams, nyplSourceAndId, itemsByFilter, mergeAggregationsResponses, - lookupPatronType + lookupPatronType, + makeRelevanceReport } From cf7edcdc0bf90907d00c9eb2d7da4db741a4de1d Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 11 Dec 2025 12:45:03 -0500 Subject: [PATCH 27/82] Make aggregation endpoint async --- lib/resources.js | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index b112f13a..062a0a28 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -217,7 +217,7 @@ module.exports = function (app, _private = null) { } // Get a single aggregation: - app.resources.aggregation = (params, opts) => { + app.resources.aggregation = async (params, opts) => { params = parseSearchParams(params, { per_page: { type: 'int', default: 50, range: [0, 1000] } }) @@ -234,13 +234,11 @@ module.exports = function (app, _private = null) { const body = bodyForAggregation(params) app.logger.debug('Resources#aggregation:', body) - return app.esClient.search(body) - .then((resp) => { - // If it's nested, it will be in our special '_nested' prop: - resp = resp.aggregations[params.field]._nested || resp.aggregations[params.field] - resp.id = params.field - return AggregationSerializer.serialize(resp, serializationOpts) - }) + + let resp = await app.esClient.search(body) + resp = resp.aggregations[params.field]._nested || resp.aggregations[params.field] + resp.id = params.field + return AggregationSerializer.serialize(resp, serializationOpts) } // For unit testing, export private methods if second arg given: From 6ca7e33ecf84dc04a44b5a2d5c4237523a77cbe9 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 18 Dec 2025 12:17:15 -0500 Subject: [PATCH 28/82] Exclude parentheses in query term --- lib/elasticsearch/cql_grammar.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index be8cceca..c2b32d06 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -9,7 +9,7 @@ const cql = ` relation ::= "any" | "adj" | "=" | "==" | "within" | "encloses" key ::= NON_WS_KEY | '"' KEYPHRASE '"' KEYPHRASE ::= [^"]+ - NON_WS_KEY ::= [^#x20#x09#x0A#x0D"]+ + NON_WS_KEY ::= [^#x20#x09#x0A#x0D"()]+ ` const cqlParser = new Grammars.W3C.Parser(cql) From 39c09204a786813101e75350fc338dc38e510b63 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 18 Dec 2025 12:35:28 -0500 Subject: [PATCH 29/82] Make keyphrase/non_ws_key lowercase --- lib/elasticsearch/cql_grammar.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index c2b32d06..17844b43 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -7,9 +7,9 @@ const cql = ` atomic_query ::= scope " " relation " " key | key scope ::= "title" | "contributor" | "keyword" | "callNumber" | "identifier" | "subject" | "language" | "date" | "center" | "format" relation ::= "any" | "adj" | "=" | "==" | "within" | "encloses" - key ::= NON_WS_KEY | '"' KEYPHRASE '"' - KEYPHRASE ::= [^"]+ - NON_WS_KEY ::= [^#x20#x09#x0A#x0D"()]+ + key ::= non_ws_key | '"' keyphrase '"' + keyphrase ::= [^"]+ + non_ws_key ::= [^#x20#x09#x0A#x0D"()]+ ` const cqlParser = new Grammars.W3C.Parser(cql) From 6714e246c4b2f5a9e8bac3dcdb242d739187c689 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 18 Dec 2025 13:08:04 -0500 Subject: [PATCH 30/82] Change callNumber to callnumber to enable callnumber searches --- lib/elasticsearch/cql_grammar.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 17844b43..3cf0b63c 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -5,7 +5,7 @@ const cql = ` connective ::= "and" | "or" sub_query ::= atomic_query | "(" query ")" atomic_query ::= scope " " relation " " key | key - scope ::= "title" | "contributor" | "keyword" | "callNumber" | "identifier" | "subject" | "language" | "date" | "center" | "format" + scope ::= "title" | "contributor" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "center" | "format" relation ::= "any" | "adj" | "=" | "==" | "within" | "encloses" key ::= non_ws_key | '"' keyphrase '"' keyphrase ::= [^"]+ From cd4c24f249e6e78e9576b7f73b64b5eb37b430a4 Mon Sep 17 00:00:00 2001 From: danamansana Date: Wed, 7 Jan 2026 13:10:23 -0500 Subject: [PATCH 31/82] Add finding text by key for atomic queries --- lib/elasticsearch/cql_query_builder.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index a26c7e38..f1f2d8c4 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -32,12 +32,12 @@ function buildEsQueryFromTree (tree) { if (tree.children.length > 1) { scope = tree.children[0].text relation = tree.children[1].text - term = tree.children[2].text } else { scope = 'all' relation = 'any' - term = tree.children[0].text } + term = tree.children.find(child => child.type === 'key').children[0].text + return buildAtomic(scope, relation, term) } default: From eefeb444be4f8e7fefc0fdf1c4648713f67b17f9 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 9 Jan 2026 15:23:56 -0500 Subject: [PATCH 32/82] Reorganize addInnerHits --- lib/elasticsearch/elastic-body-builder.js | 300 ++---------------- lib/elasticsearch/elastic-query-builder.js | 8 +- .../elastic-query-filter-builder.js | 257 +++++++++++++++ lib/elasticsearch/elastic-query.js | 8 +- test/resources.test.js | 9 +- 5 files changed, 299 insertions(+), 283 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 82688400..f2d357f4 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -1,6 +1,7 @@ const { EXCLUDE_FIELDS, ITEM_FILTER_AGGREGATIONS, SORT_FIELDS, AGGREGATIONS_SPEC } = require('./config') const { deepValue } = require('../util') const { esRangeValue } = require('../utils/resource-helpers') +const { innerHits, itemsQueryContext, itemsFilterContext } = require('./elastic-query-filter-builder') const ApiRequest = require('../api-request') const ElasticQueryBuilder = require('../elasticsearch/elastic-query-builder') @@ -13,263 +14,15 @@ const ElasticQueryBuilder = require('../elasticsearch/elastic-query-builder') * for limiting and paginating through items */ const addInnerHits = (body, _options = {}) => { - const options = Object.assign({ - size: process.env.SEARCH_ITEMS_SIZE || 200, - from: 0, - merge_checkin_card_items: true - }, _options) + const wrappedItemsQuery = innerHits(_options) - // Make sure necessary structure exists: - if (!deepValue(body, 'query.bool') && !deepValue(body, 'query.function_score.query.bool')) { - body.query = { bool: {} } - } - - // The place to add the filter depends on the query built to this point: - const placeToAddFilter = (body.query.bool || body.query.function_score.query.bool) - // Initialize filter object if it doesn't already exist: - placeToAddFilter.filter = placeToAddFilter.filter || [] - // If filter object already exists, convert it to array: - if (!Array.isArray(placeToAddFilter.filter)) placeToAddFilter.filter = [placeToAddFilter.filter] + const placeToAddFilter = body.query.bool - const itemsQuery = { - bool: Object.assign( - itemsQueryContext(options), - itemsFilterContext(options) - ) - } - - const wrappedItemsQuery = { - bool: { - should: [ - { - nested: { - path: 'items', - query: itemsQuery, - inner_hits: { - sort: [{ 'items.enumerationChronology_sort': 'desc' }], - size: options.size, - from: options.from, - name: 'items' - } - } - }, - // Add a catch-all to ensure we return the bib document even when - // numItems=0 or applied item filters exclude all items: - { match_all: {} } - ] - } - } placeToAddFilter.filter.push(wrappedItemsQuery) - // If there is any item query at all, run an additional inner_hits query - // to retrieve the total number of items without filtering: - if (itemsQuery.bool.filter) { - wrappedItemsQuery.bool.should.push({ - nested: { - path: 'items', - query: { - bool: { - must_not: [{ exists: { field: 'items.electronicLocator' } }] - } - }, - inner_hits: { name: 'allItems' } - } - }) - } - return body } -/** - * Given an object containing filters, - * returns content of the ES query filter context - * - * @param {object} options - An object with keys,value pairs of the form [filter_name]:[filter_value] - * @returns {object} - */ -const itemsFilterContext = (options) => { - if (!options.query) return {} - - const filterHandlers = { - volume: (volumes) => { - return { - range: { - 'items.volumeRange': esRangeValue(volumes) - } - } - }, - date: (dates) => { - return { - range: { - 'items.dateRange': esRangeValue(dates) - } - } - }, - format: (formats) => { - return { - terms: { - 'items.formatLiteral': formats - } - } - }, - location: (locations) => { - return { - terms: { - 'items.holdingLocation.id': locations - } - } - }, - status: (statuses) => { - // Determine if all possible ReCAP statuses were selected: - const selectedRecapStatuses = recapStatuses(statuses) - - if (selectedRecapStatuses.length === 1 && - Array.isArray(options.unavailable_recap_barcodes) && - options.unavailable_recap_barcodes.length > 0) { - // There are known unavailable ReCAP items, so build a complicated - // filter clause with appropriate barcode overrides: - return itemStatusFilterWithUnavailableRecapItems(statuses, options.unavailable_recap_barcodes) - } else { - // If there are no known unavailable ReCAP items, just do a straight - // status match: - return { - terms: { - 'items.status.id': statuses - } - } - } - }, - itemUri: (uri) => { - return { term: { 'items.uri': uri } } - } - } - - const filters = Object.keys(options.query).map((filter) => { - const value = options.query[filter] - const handler = filterHandlers[filter] - return value && handler ? handler(value) : null - }).filter((x) => x) - - return filters.length - ? { filter: filters } - : {} -} - -/** - * Given an array of status ids (e.g. "status:a", "status:na") returns the - * subset of statuses that are relevant in ReCAP - */ -const recapStatuses = (statuses) => { - return statuses - .filter((status) => ['status:a', 'status:na'].includes(status)) -} - -/** - * Builds a big complicated ES filter to allow us to filter items by status, - * but override the indexed status for ReCAP items with statuses retrieved - * from SCSB. This corrects for the fact that ReCAP item statuses tend to be - * wrong in the ES index: - * - partner items are indexed as Available and remain thus forever - * - NYPL item statuses _should_ equal SCSB status, but the mechanism - * for keeping them synced isn't perfect and operates on a delay - * - * @param {string[]} statuses - An array of statuses to filter on - * @param {string[]} unavailableRecapBarcodes - An array of item barcodes - * known to be unavailble - * - * Returns an ES filter that matches the desired statuses, but also uses - * the known unavailable items to override indexed item statuses for ReCAP - * items (because ReCAP is the authority for status of off-site items). - * Essentially, the criteria is for matching an item is: - * - * - if on-site (non-ReCAP): - * - has a matching indexed status - * - if off-site: - * - if filtering on status:na - * - item barcode must be in unavailableRecapBarcodes - * - if filtering on status:a: - * - item barcode must NOT be in unavailableRecapBarcodes - */ -const itemStatusFilterWithUnavailableRecapItems = (statuses, unavailableRecapBarcodes) => { - // First, let's set up some common clauses: - - // Item is in ReCAP: - const itemIsRecapClause = { - regexp: { 'items.holdingLocation.id': 'loc:rc.*' } - } - // Item's indexed status matches one of the filtered statuses: - const itemHasIndexedStatusClause = { - terms: { 'items.status.id': statuses } - } - // Item is marked Unavailable in SCSB: - const itemIsUnavailableInRecapClause = { - script: { - script: { - inline: 'doc[\'items.idBarcode\'].value == null || ' + - 'params.unavailableRecapBarcodes.contains(doc[\'items.idBarcode\'][0])', - lang: 'painless', - params: { unavailableRecapBarcodes } - } - } - } - // This function is only called if `statuses` param contains a single - // ReCAP-relevant status (i.e. status:a or status:na), so determine which - // ReCAP status to use: - const selectedRecapStatus = recapStatuses(statuses).shift() - // Item's ReCAP status agrees with filter: - const itemRecapStatusAgreesWithFilterClause = - selectedRecapStatus === 'status:na' - ? itemIsUnavailableInRecapClause - : { bool: { must_not: itemIsUnavailableInRecapClause } } - - return { - bool: { - should: [ - // Either 1) item is on-site and has correctly indexed status: - { - bool: { - must: [ - // Item is on-site (i.e. not recap): - { bool: { must_not: itemIsRecapClause } }, - // Item indexed status matches filter: - itemHasIndexedStatusClause - ] - } - }, - // Or 2) item is off-site and has a scsb status that agrees with the - // filter (e.g. if filtering on status:na, scsb marks the barcode as - // 'Not Available') - { - bool: { - must: [ - // Item is off-site: - JSON.parse(JSON.stringify(itemIsRecapClause)), - // Item is not marked unavailable - itemRecapStatusAgreesWithFilterClause - ] - } - } - ] - } - } -} - -/** - * Given an object containing query options, - * returns content of the ES query context - * - * @param {object} options - An object with request options. `merge_checkin_card_items` is the only one - * that matters right now - * @returns {object} - */ -const itemsQueryContext = (options) => { - const excludeClauses = [] - - if (!options.merge_checkin_card_items) excludeClauses.push({ term: { 'items.type': 'nypl:CheckinCardItem' } }) - - return excludeClauses.length ? { must_not: excludeClauses } : { must: { match_all: {} } } -} - const bodyForFindByUri = async function (recapBarcodesByStatus, params) { // Establish base query: let body = { @@ -285,7 +38,8 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { uri: params.uri } } - ] + ], + filter: [] } } } @@ -310,6 +64,7 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { }, unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] } + body.filter = [] body = addInnerHits(body, itemsOptions) body._source = { excludes: EXCLUDE_FIELDS.concat(['items']) @@ -329,10 +84,10 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { * * @return {object} ES query object suitable to be POST'd to ES endpoint */ -const buildElasticQuery = function (params) { +const buildElasticQuery = function (params, options = {}) { const request = ApiRequest.fromParams(params) - const builder = ElasticQueryBuilder.forApiRequest(request) + const builder = ElasticQueryBuilder.forApiRequest(request, options) return builder.query.toJson() } @@ -343,13 +98,13 @@ const buildElasticQuery = function (params) { * * @return {object} An object that can be posted directly to ES */ -const buildElasticBody = function (params) { +const buildElasticBody = function (params, options = {}) { const body = { from: (params.per_page * (params.page - 1)), size: params.per_page } - body.query = buildElasticQuery(params) + body.query = buildElasticQuery(params, options) // Apply sort: let direction @@ -368,7 +123,7 @@ const buildElasticBody = function (params) { } const bodyForSearch = function (params) { - let body = buildElasticBody(params) + let body = buildElasticBody(params, { items: true }) // Strip unnecessary _source fields body._source = { @@ -393,11 +148,10 @@ const buildElasticAggregationsBody = (params, aggregateProps) => { return aggs }, {}) - const body = buildElasticBody(params) - body.size = 0 - body.aggregations = aggregations - - return body + return Object.assign( + buildElasticBody(params), + { size: 0, aggregations: aggregations } + ) } /** @@ -434,19 +188,23 @@ const aggregationQueriesForParams = (params) => { } const bodyForAggregation = (params) => { - const body = buildElasticBody(params) - - // We're fetching aggs, so specify 0 resource results: - body.size = 0 - - body.aggregations = {} - body.aggregations[params.field] = AGGREGATIONS_SPEC[params.field] + const aggregations = {} + aggregations[params.field] = AGGREGATIONS_SPEC[params.field] // If it's a terms agg, we can apply per_page: - if (body.aggregations[params.field].terms) { - body.aggregations[params.field].terms.size = params.per_page + if (aggregations[params.field].terms) { + aggregations[params.field].terms.size = params.per_page } + const body = Object.assign( + buildElasticBody(params), + { + size: 0, + aggregations: aggregations + } + ) + + return body } @@ -454,8 +212,6 @@ module.exports = { bodyForFindByUri, addInnerHits, itemsFilterContext, - recapStatuses, - itemStatusFilterWithUnavailableRecapItems, itemsQueryContext, buildElasticQuery, buildElasticBody, diff --git a/lib/elasticsearch/elastic-query-builder.js b/lib/elasticsearch/elastic-query-builder.js index d561795a..97fe7018 100644 --- a/lib/elasticsearch/elastic-query-builder.js +++ b/lib/elasticsearch/elastic-query-builder.js @@ -11,9 +11,9 @@ const POPULARITY_BOOSTS = [ ] class ElasticQueryBuilder { - constructor (apiRequest) { + constructor (apiRequest, options = {}) { this.request = apiRequest - this.query = new ElasticQuery() + this.query = new ElasticQuery(options) // Break on search_scope: switch (this.request.params.search_scope) { @@ -706,8 +706,8 @@ class ElasticQueryBuilder { /** * Create a ElasticQueryBuilder for given ApiRequest instance */ - static forApiRequest (request) { - return new ElasticQueryBuilder(request) + static forApiRequest (request, options = {}) { + return new ElasticQueryBuilder(request, options) } } diff --git a/lib/elasticsearch/elastic-query-filter-builder.js b/lib/elasticsearch/elastic-query-filter-builder.js index e69de29b..a7d3e0bb 100644 --- a/lib/elasticsearch/elastic-query-filter-builder.js +++ b/lib/elasticsearch/elastic-query-filter-builder.js @@ -0,0 +1,257 @@ +const { esRangeValue } = require('../utils/resource-helpers') + +/** + * Given an object containing filters, + * returns content of the ES query filter context + * + * @param {object} options - An object with keys,value pairs of the form [filter_name]:[filter_value] + * @returns {object} + */ +const itemsFilterContext = (options) => { + if (!options.query) return {} + + const filterHandlers = { + volume: (volumes) => { + return { + range: { + 'items.volumeRange': esRangeValue(volumes) + } + } + }, + date: (dates) => { + return { + range: { + 'items.dateRange': esRangeValue(dates) + } + } + }, + format: (formats) => { + return { + terms: { + 'items.formatLiteral': formats + } + } + }, + location: (locations) => { + return { + terms: { + 'items.holdingLocation.id': locations + } + } + }, + status: (statuses) => { + // Determine if all possible ReCAP statuses were selected: + const selectedRecapStatuses = recapStatuses(statuses) + + if (selectedRecapStatuses.length === 1 && + Array.isArray(options.unavailable_recap_barcodes) && + options.unavailable_recap_barcodes.length > 0) { + // There are known unavailable ReCAP items, so build a complicated + // filter clause with appropriate barcode overrides: + return itemStatusFilterWithUnavailableRecapItems(statuses, options.unavailable_recap_barcodes) + } else { + // If there are no known unavailable ReCAP items, just do a straight + // status match: + return { + terms: { + 'items.status.id': statuses + } + } + } + }, + itemUri: (uri) => { + return { term: { 'items.uri': uri } } + } + } + + const filters = Object.keys(options.query).map((filter) => { + const value = options.query[filter] + const handler = filterHandlers[filter] + return value && handler ? handler(value) : null + }).filter((x) => x) + + return filters.length + ? { filter: filters } + : {} +} + +/** + * Given an array of status ids (e.g. "status:a", "status:na") returns the + * subset of statuses that are relevant in ReCAP + */ +const recapStatuses = (statuses) => { + return statuses + .filter((status) => ['status:a', 'status:na'].includes(status)) +} + +/** + * Builds a big complicated ES filter to allow us to filter items by status, + * but override the indexed status for ReCAP items with statuses retrieved + * from SCSB. This corrects for the fact that ReCAP item statuses tend to be + * wrong in the ES index: + * - partner items are indexed as Available and remain thus forever + * - NYPL item statuses _should_ equal SCSB status, but the mechanism + * for keeping them synced isn't perfect and operates on a delay + * + * @param {string[]} statuses - An array of statuses to filter on + * @param {string[]} unavailableRecapBarcodes - An array of item barcodes + * known to be unavailble + * + * Returns an ES filter that matches the desired statuses, but also uses + * the known unavailable items to override indexed item statuses for ReCAP + * items (because ReCAP is the authority for status of off-site items). + * Essentially, the criteria is for matching an item is: + * + * - if on-site (non-ReCAP): + * - has a matching indexed status + * - if off-site: + * - if filtering on status:na + * - item barcode must be in unavailableRecapBarcodes + * - if filtering on status:a: + * - item barcode must NOT be in unavailableRecapBarcodes + */ +const itemStatusFilterWithUnavailableRecapItems = (statuses, unavailableRecapBarcodes) => { + // First, let's set up some common clauses: + + // Item is in ReCAP: + const itemIsRecapClause = { + regexp: { 'items.holdingLocation.id': 'loc:rc.*' } + } + // Item's indexed status matches one of the filtered statuses: + const itemHasIndexedStatusClause = { + terms: { 'items.status.id': statuses } + } + // Item is marked Unavailable in SCSB: + const itemIsUnavailableInRecapClause = { + script: { + script: { + inline: 'doc[\'items.idBarcode\'].value == null || ' + + 'params.unavailableRecapBarcodes.contains(doc[\'items.idBarcode\'][0])', + lang: 'painless', + params: { unavailableRecapBarcodes } + } + } + } + // This function is only called if `statuses` param contains a single + // ReCAP-relevant status (i.e. status:a or status:na), so determine which + // ReCAP status to use: + const selectedRecapStatus = recapStatuses(statuses).shift() + // Item's ReCAP status agrees with filter: + const itemRecapStatusAgreesWithFilterClause = + selectedRecapStatus === 'status:na' + ? itemIsUnavailableInRecapClause + : { bool: { must_not: itemIsUnavailableInRecapClause } } + + return { + bool: { + should: [ + // Either 1) item is on-site and has correctly indexed status: + { + bool: { + must: [ + // Item is on-site (i.e. not recap): + { bool: { must_not: itemIsRecapClause } }, + // Item indexed status matches filter: + itemHasIndexedStatusClause + ] + } + }, + // Or 2) item is off-site and has a scsb status that agrees with the + // filter (e.g. if filtering on status:na, scsb marks the barcode as + // 'Not Available') + { + bool: { + must: [ + // Item is off-site: + JSON.parse(JSON.stringify(itemIsRecapClause)), + // Item is not marked unavailable + itemRecapStatusAgreesWithFilterClause + ] + } + } + ] + } + } +} + + +/** + * Given an object containing query options, + * returns content of the ES query context + * + * @param {object} options - An object with request options. `merge_checkin_card_items` is the only one + * that matters right now + * @returns {object} + */ +const itemsQueryContext = (options) => { + const excludeClauses = [] + + if (!options.merge_checkin_card_items) excludeClauses.push({ term: { 'items.type': 'nypl:CheckinCardItem' } }) + + return excludeClauses.length ? { must_not: excludeClauses } : { must: { match_all: {} } } +} + +const innerHits = (_options = {}) => { + const options = Object.assign({ + size: process.env.SEARCH_ITEMS_SIZE || 200, + from: 0, + merge_checkin_card_items: true + }, _options) + + // const placeToAddFilter = body.query.bool + + + // If there is any item query at all, run an additional inner_hits query + // to retrieve the total number of items without filtering: + const itemsQuery = { + bool: Object.assign( + itemsQueryContext(options), + itemsFilterContext(options) + ) + } + + const allItemsQuery = itemsQuery.bool.filter ? + [{ + nested: { + path: 'items', + query: { + bool: { + must_not: [{ exists: { field: 'items.electronicLocator' } }] + } + }, + inner_hits: { name: 'allItems' } + } + }] : + [] + + const wrappedItemsQuery = { + bool: { + should: [ + { + nested: { + path: 'items', + query: itemsQuery, + inner_hits: { + sort: [{ 'items.enumerationChronology_sort': 'desc' }], + size: options.size, + from: options.from, + name: 'items' + } + } + }, + // Add a catch-all to ensure we return the bib document even when + // numItems=0 or applied item filters exclude all items: + { match_all: {} }, + ...allItemsQuery + ] + } + } + + return wrappedItemsQuery +} + +module.exports = { + innerHits, + itemsQueryContext, + itemsFilterContext +} diff --git a/lib/elasticsearch/elastic-query.js b/lib/elasticsearch/elastic-query.js index aa2d2f1e..82edb0f6 100644 --- a/lib/elasticsearch/elastic-query.js +++ b/lib/elasticsearch/elastic-query.js @@ -7,10 +7,11 @@ **/ class ElasticQuery { - constructor () { + constructor (options = {}) { this.musts = [] this.shoulds = [] this.filters = [] + this.options = options } addMust (clause) { @@ -42,7 +43,8 @@ class ElasticQuery { * "query" param in a ES call */ toJson () { - if (!this.musts.length && !this.shoulds.length && !this.filters.length) { + console.log('options: ', this.options) + if (!this.musts.length && !this.shoulds.length && !this.filters.length && !this.options.items) { return { match_all: {} } @@ -56,7 +58,7 @@ class ElasticQuery { if (this.shoulds.length) { result.bool.should = this.shoulds } - if (this.filters.length) { + if (this.filters.length || this.options.items) { result.bool.filter = this.filters } diff --git a/test/resources.test.js b/test/resources.test.js index 056a2926..c91b5d37 100644 --- a/test/resources.test.js +++ b/test/resources.test.js @@ -466,7 +466,8 @@ describe('Resources query', function () { size: 1, query: { bool: { - must: [{ term: { uri: 'b1234' } }] + must: [{ term: { uri: 'b1234' } }], + filter: [] } }, aggregations: { @@ -724,7 +725,7 @@ describe('Resources query', function () { describe('addInnerHits', () => { it('should include query for items', () => { - expect(resourcesPrivMethods.addInnerHits({ query: { bool: {} } }, { size: 1, from: 2 })) + expect(resourcesPrivMethods.addInnerHits({ query: { bool: { filter: [] } } }, { size: 1, from: 2 })) .to.deep.equal({ query: { bool: { @@ -761,7 +762,7 @@ describe('Resources query', function () { }) it('should exclude check in card items if explicitly set', () => { - expect(resourcesPrivMethods.addInnerHits({ query: { bool: {} } }, { size: 1, from: 2, merge_checkin_card_items: false })) + expect(resourcesPrivMethods.addInnerHits({ query: { bool: { filter: [] } } }, { size: 1, from: 2, merge_checkin_card_items: false })) .to.deep.equal({ query: { bool: { @@ -799,7 +800,7 @@ describe('Resources query', function () { it('should include filters for items', () => { expect(resourcesPrivMethods.addInnerHits( - { query: { bool: {} } }, + { query: { bool: { filter: [] } } }, { size: 1, from: 2, query: { volume: [1, 2], location: ['SASB', 'LPA'], other: 'filter' } } )).to.deep.equal({ query: { From d26553b4749c31ae07a24cf7df60f0e3cbc2f6bb Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 9 Jan 2026 15:43:32 -0500 Subject: [PATCH 33/82] Reorganize bodyForFindByUri except for innerHits --- lib/elasticsearch/elastic-body-builder.js | 63 +++++++++++++---------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index f2d357f4..5f236f4b 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -24,10 +24,37 @@ const addInnerHits = (body, _options = {}) => { } const bodyForFindByUri = async function (recapBarcodesByStatus, params) { + const paramsIncludesItemLevelFiltering = Object.keys(params) + .filter((param) => param.startsWith('item_')).length > 0 + + const returnAllItems = params.all_items && !paramsIncludesItemLevelFiltering + + const excludes = returnAllItems ? EXCLUDE_FIELDS.filter((field) => field !== '*_sort') : EXCLUDE_FIELDS.concat(['items']) + + + const aggregations = params.include_item_aggregations ? + { aggregations: ITEM_FILTER_AGGREGATIONS } : + {} + + const itemsOptions = { + size: params.items_size, + from: params.items_from, + merge_checkin_card_items: params.merge_checkin_card_items, + query: { + volume: params.item_volume, + date: params.item_date, + format: params.item_format, + location: params.item_location, + status: params.item_status, + itemUri: params.itemUri + }, + unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] + } + // Establish base query: let body = { _source: { - excludes: EXCLUDE_FIELDS + excludes: excludes }, size: 1, query: { @@ -41,38 +68,22 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { ], filter: [] } - } + }, + ...aggregations } - const paramsIncludesItemLevelFiltering = Object.keys(params) - .filter((param) => param.startsWith('item_')).length > 0 - const returnAllItems = params.all_items && !paramsIncludesItemLevelFiltering + if (returnAllItems) { - body._source.excludes = EXCLUDE_FIELDS.filter((field) => field !== '*_sort') + // body._source.excludes = EXCLUDE_FIELDS.filter((field) => field !== '*_sort') } else { // No specific item requested, so add pagination and matching params: - const itemsOptions = { - size: params.items_size, - from: params.items_from, - merge_checkin_card_items: params.merge_checkin_card_items, - query: { - volume: params.item_volume, - date: params.item_date, - format: params.item_format, - location: params.item_location, - status: params.item_status, - itemUri: params.itemUri - }, - unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] - } + body.filter = [] body = addInnerHits(body, itemsOptions) - body._source = { - excludes: EXCLUDE_FIELDS.concat(['items']) - } - } - if (params.include_item_aggregations) { - body.aggregations = ITEM_FILTER_AGGREGATIONS + // body._source = { + // excludes: EXCLUDE_FIELDS.concat(['items']) + // } } + return body } From 7429ad6ed44fcf83ce1e18a644d4ad08aa3f3bb0 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 9 Jan 2026 15:54:31 -0500 Subject: [PATCH 34/82] Remove addInnerHits from findByUri --- lib/elasticsearch/elastic-body-builder.js | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 5f236f4b..077ae8f2 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -51,6 +51,10 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] } + const filter = returnAllItems ? {} : { filter: []} + + const queryFilter = { filter: !returnAllItems ? [innerHits(itemsOptions)] : [] } + // Establish base query: let body = { _source: { @@ -66,24 +70,13 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { } } ], - filter: [] + ...queryFilter } }, + ...filter, ...aggregations } - if (returnAllItems) { - // body._source.excludes = EXCLUDE_FIELDS.filter((field) => field !== '*_sort') - } else { - // No specific item requested, so add pagination and matching params: - - body.filter = [] - body = addInnerHits(body, itemsOptions) - // body._source = { - // excludes: EXCLUDE_FIELDS.concat(['items']) - // } - } - return body } From b386ec1dc17c68b6d808a8df6af68126ef39bbc0 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 9 Jan 2026 16:11:16 -0500 Subject: [PATCH 35/82] Reorganize buildElasticBody --- lib/elasticsearch/elastic-body-builder.js | 24 ++++++++--------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 077ae8f2..f285f0f2 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -103,13 +103,6 @@ const buildElasticQuery = function (params, options = {}) { * @return {object} An object that can be posted directly to ES */ const buildElasticBody = function (params, options = {}) { - const body = { - from: (params.per_page * (params.page - 1)), - size: params.per_page - } - - body.query = buildElasticQuery(params, options) - // Apply sort: let direction let field @@ -121,9 +114,13 @@ const buildElasticBody = function (params, options = {}) { field = SORT_FIELDS[params.sort].field || params.sort direction = params.sort_direction || SORT_FIELDS[params.sort].initialDirection } - body.sort = [{ [field]: direction }, { uri: 'asc' }] - return body + return { + from: (params.per_page * (params.page - 1)), + size: params.per_page, + query: buildElasticQuery(params, options), + sort: [{ [field]: direction }, { uri: 'asc' }] + } } const bodyForSearch = function (params) { @@ -134,9 +131,7 @@ const bodyForSearch = function (params) { excludes: EXCLUDE_FIELDS.concat(['items']) } - body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) - - return body + return addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) } const buildElasticAggregationsBody = (params, aggregateProps) => { @@ -200,16 +195,13 @@ const bodyForAggregation = (params) => { aggregations[params.field].terms.size = params.per_page } - const body = Object.assign( + return Object.assign( buildElasticBody(params), { size: 0, aggregations: aggregations } ) - - - return body } module.exports = { From 36d56c55864a1baaaf2d24860f9a3afbe2fb0161 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 13 Jan 2026 10:56:19 -0500 Subject: [PATCH 36/82] Fix linting --- lib/elasticsearch/elastic-body-builder.js | 21 ++++++------- .../elastic-query-filter-builder.js | 30 ++++++++----------- 2 files changed, 22 insertions(+), 29 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index f285f0f2..e12eb9b1 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -1,6 +1,4 @@ const { EXCLUDE_FIELDS, ITEM_FILTER_AGGREGATIONS, SORT_FIELDS, AGGREGATIONS_SPEC } = require('./config') -const { deepValue } = require('../util') -const { esRangeValue } = require('../utils/resource-helpers') const { innerHits, itemsQueryContext, itemsFilterContext } = require('./elastic-query-filter-builder') const ApiRequest = require('../api-request') const ElasticQueryBuilder = require('../elasticsearch/elastic-query-builder') @@ -31,10 +29,9 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { const excludes = returnAllItems ? EXCLUDE_FIELDS.filter((field) => field !== '*_sort') : EXCLUDE_FIELDS.concat(['items']) - - const aggregations = params.include_item_aggregations ? - { aggregations: ITEM_FILTER_AGGREGATIONS } : - {} + const aggregations = params.include_item_aggregations + ? { aggregations: ITEM_FILTER_AGGREGATIONS } + : {} const itemsOptions = { size: params.items_size, @@ -51,14 +48,14 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] } - const filter = returnAllItems ? {} : { filter: []} + const filter = returnAllItems ? {} : { filter: [] } const queryFilter = { filter: !returnAllItems ? [innerHits(itemsOptions)] : [] } // Establish base query: - let body = { + const body = { _source: { - excludes: excludes + excludes }, size: 1, query: { @@ -124,7 +121,7 @@ const buildElasticBody = function (params, options = {}) { } const bodyForSearch = function (params) { - let body = buildElasticBody(params, { items: true }) + const body = buildElasticBody(params, { items: true }) // Strip unnecessary _source fields body._source = { @@ -149,7 +146,7 @@ const buildElasticAggregationsBody = (params, aggregateProps) => { return Object.assign( buildElasticBody(params), - { size: 0, aggregations: aggregations } + { size: 0, aggregations } ) } @@ -199,7 +196,7 @@ const bodyForAggregation = (params) => { buildElasticBody(params), { size: 0, - aggregations: aggregations + aggregations } ) } diff --git a/lib/elasticsearch/elastic-query-filter-builder.js b/lib/elasticsearch/elastic-query-filter-builder.js index a7d3e0bb..5149cde8 100644 --- a/lib/elasticsearch/elastic-query-filter-builder.js +++ b/lib/elasticsearch/elastic-query-filter-builder.js @@ -174,7 +174,6 @@ const itemStatusFilterWithUnavailableRecapItems = (statuses, unavailableRecapBar } } - /** * Given an object containing query options, * returns content of the ES query context @@ -198,9 +197,6 @@ const innerHits = (_options = {}) => { merge_checkin_card_items: true }, _options) - // const placeToAddFilter = body.query.bool - - // If there is any item query at all, run an additional inner_hits query // to retrieve the total number of items without filtering: const itemsQuery = { @@ -210,19 +206,19 @@ const innerHits = (_options = {}) => { ) } - const allItemsQuery = itemsQuery.bool.filter ? - [{ - nested: { - path: 'items', - query: { - bool: { - must_not: [{ exists: { field: 'items.electronicLocator' } }] - } - }, - inner_hits: { name: 'allItems' } - } - }] : - [] + const allItemsQuery = itemsQuery.bool.filter + ? [{ + nested: { + path: 'items', + query: { + bool: { + must_not: [{ exists: { field: 'items.electronicLocator' } }] + } + }, + inner_hits: { name: 'allItems' } + } + }] + : [] const wrappedItemsQuery = { bool: { From d8249acf0ee9a514af7b63ccb18354365c64f813 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 13 Jan 2026 11:20:18 -0500 Subject: [PATCH 37/82] Remove adding source to body in body for search --- lib/elasticsearch/elastic-body-builder.js | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index e12eb9b1..924b72c8 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -121,12 +121,14 @@ const buildElasticBody = function (params, options = {}) { } const bodyForSearch = function (params) { - const body = buildElasticBody(params, { items: true }) - - // Strip unnecessary _source fields - body._source = { - excludes: EXCLUDE_FIELDS.concat(['items']) - } + const body = Object.assign( + buildElasticBody(params, { items: true }), + { + _source: { + excludes: EXCLUDE_FIELDS.concat(['items']) + } + } + ) return addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) } From 9353887cce2f03393712f44470fecc488192dffc Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 15 Jan 2026 14:50:29 -0500 Subject: [PATCH 38/82] Add innerHits options --- lib/elasticsearch/elastic-body-builder.js | 3 ++- lib/elasticsearch/elastic-query-builder.js | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 924b72c8..f4b9f9af 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -121,8 +121,9 @@ const buildElasticBody = function (params, options = {}) { } const bodyForSearch = function (params) { + const itemsOptions = { merge_checkin_card_items: params.merge_checkin_card_items } const body = Object.assign( - buildElasticBody(params, { items: true }), + buildElasticBody(params, { items: itemsOptions }), { _source: { excludes: EXCLUDE_FIELDS.concat(['items']) diff --git a/lib/elasticsearch/elastic-query-builder.js b/lib/elasticsearch/elastic-query-builder.js index 97fe7018..f65dbd74 100644 --- a/lib/elasticsearch/elastic-query-builder.js +++ b/lib/elasticsearch/elastic-query-builder.js @@ -2,6 +2,7 @@ const ElasticQuery = require('./elastic-query') const ApiRequest = require('../api-request') const { escapeQuery, namedQuery, prefixMatch, termMatch, phraseMatch } = require('./utils') const { regexEscape } = require('../util') +const { innerHits } = require('./elastic-query-filter-builder') const { FILTER_CONFIG, SEARCH_SCOPES } = require('./config') @@ -41,6 +42,10 @@ class ElasticQueryBuilder { this.buildAllQuery() } + if (options.items) { + this.query.addFilter(innerHits(options.items)) + } + // Add user filters: this.applyFilters() From 15a5e5ca29e40747876b751ff14f54e2d757252c Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 15 Jan 2026 16:19:09 -0500 Subject: [PATCH 39/82] Remove dependence on addInnerHits --- lib/elasticsearch/elastic-body-builder.js | 8 +++++--- lib/elasticsearch/elastic-query-builder.js | 8 ++++---- lib/elasticsearch/elastic-query-filter-builder.js | 1 + lib/elasticsearch/elastic-query.js | 1 - 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index f4b9f9af..28bd58c7 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -122,8 +122,9 @@ const buildElasticBody = function (params, options = {}) { const bodyForSearch = function (params) { const itemsOptions = { merge_checkin_card_items: params.merge_checkin_card_items } - const body = Object.assign( - buildElasticBody(params, { items: itemsOptions }), + + const otherBody = Object.assign( + buildElasticBody(params, { items: itemsOptions, test: true }), { _source: { excludes: EXCLUDE_FIELDS.concat(['items']) @@ -131,7 +132,8 @@ const bodyForSearch = function (params) { } ) - return addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) + // return withInnerHits + return otherBody } const buildElasticAggregationsBody = (params, aggregateProps) => { diff --git a/lib/elasticsearch/elastic-query-builder.js b/lib/elasticsearch/elastic-query-builder.js index f65dbd74..03602596 100644 --- a/lib/elasticsearch/elastic-query-builder.js +++ b/lib/elasticsearch/elastic-query-builder.js @@ -42,13 +42,13 @@ class ElasticQueryBuilder { this.buildAllQuery() } - if (options.items) { - this.query.addFilter(innerHits(options.items)) - } - // Add user filters: this.applyFilters() + if (options.items && options.test) { + this.query.addFilter(innerHits(options.items)) + } + // Apply global clauses: // Hide specific nypl-sources when configured to do so: this.applyHiddenNyplSources() diff --git a/lib/elasticsearch/elastic-query-filter-builder.js b/lib/elasticsearch/elastic-query-filter-builder.js index 5149cde8..4eba4ae6 100644 --- a/lib/elasticsearch/elastic-query-filter-builder.js +++ b/lib/elasticsearch/elastic-query-filter-builder.js @@ -191,6 +191,7 @@ const itemsQueryContext = (options) => { } const innerHits = (_options = {}) => { + console.log('innerHits for: ', _options) const options = Object.assign({ size: process.env.SEARCH_ITEMS_SIZE || 200, from: 0, diff --git a/lib/elasticsearch/elastic-query.js b/lib/elasticsearch/elastic-query.js index 82edb0f6..68dfd30f 100644 --- a/lib/elasticsearch/elastic-query.js +++ b/lib/elasticsearch/elastic-query.js @@ -43,7 +43,6 @@ class ElasticQuery { * "query" param in a ES call */ toJson () { - console.log('options: ', this.options) if (!this.musts.length && !this.shoulds.length && !this.filters.length && !this.options.items) { return { match_all: {} From b5530ebe6cb91b1c73b5814ccd6cff393f2b613e Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 16 Jan 2026 14:20:10 -0500 Subject: [PATCH 40/82] Add tests for bodyForSearch; remove tests for addInnerHits --- lib/elasticsearch/elastic-body-builder.js | 32 +--- lib/elasticsearch/elastic-query-builder.js | 2 +- .../elastic-query-filter-builder.js | 1 - test/elastic-body-builder.test.js | 145 ++++++++++++++++++ test/resources.test.js | 132 ---------------- 5 files changed, 154 insertions(+), 158 deletions(-) create mode 100644 test/elastic-body-builder.test.js diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 28bd58c7..9a5f682a 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -3,24 +3,6 @@ const { innerHits, itemsQueryContext, itemsFilterContext } = require('./elastic- const ApiRequest = require('../api-request') const ElasticQueryBuilder = require('../elasticsearch/elastic-query-builder') -/** - * Given a ES search body, returns same object modified to include the - * additional query necessary to limit (and paginate through) items - * - * @param {object} body - An ES query object (suitable for POSTing to ES - * @param {object} options - An object optionally defining `size` and `from` - * for limiting and paginating through items - */ -const addInnerHits = (body, _options = {}) => { - const wrappedItemsQuery = innerHits(_options) - - const placeToAddFilter = body.query.bool - - placeToAddFilter.filter.push(wrappedItemsQuery) - - return body -} - const bodyForFindByUri = async function (recapBarcodesByStatus, params) { const paramsIncludesItemLevelFiltering = Object.keys(params) .filter((param) => param.startsWith('item_')).length > 0 @@ -112,9 +94,12 @@ const buildElasticBody = function (params, options = {}) { direction = params.sort_direction || SORT_FIELDS[params.sort].initialDirection } + const from = params.per_page && params.page ? { from: params.per_page * (params.page - 1) } : {} + const size = params.per_page ? { size: params.per_page } : {} + return { - from: (params.per_page * (params.page - 1)), - size: params.per_page, + ...from, + ...size, query: buildElasticQuery(params, options), sort: [{ [field]: direction }, { uri: 'asc' }] } @@ -123,8 +108,8 @@ const buildElasticBody = function (params, options = {}) { const bodyForSearch = function (params) { const itemsOptions = { merge_checkin_card_items: params.merge_checkin_card_items } - const otherBody = Object.assign( - buildElasticBody(params, { items: itemsOptions, test: true }), + const body = Object.assign( + buildElasticBody(params, { items: itemsOptions }), { _source: { excludes: EXCLUDE_FIELDS.concat(['items']) @@ -133,7 +118,7 @@ const bodyForSearch = function (params) { ) // return withInnerHits - return otherBody + return body } const buildElasticAggregationsBody = (params, aggregateProps) => { @@ -208,7 +193,6 @@ const bodyForAggregation = (params) => { module.exports = { bodyForFindByUri, - addInnerHits, itemsFilterContext, itemsQueryContext, buildElasticQuery, diff --git a/lib/elasticsearch/elastic-query-builder.js b/lib/elasticsearch/elastic-query-builder.js index 03602596..de87a263 100644 --- a/lib/elasticsearch/elastic-query-builder.js +++ b/lib/elasticsearch/elastic-query-builder.js @@ -45,7 +45,7 @@ class ElasticQueryBuilder { // Add user filters: this.applyFilters() - if (options.items && options.test) { + if (options.items) { this.query.addFilter(innerHits(options.items)) } diff --git a/lib/elasticsearch/elastic-query-filter-builder.js b/lib/elasticsearch/elastic-query-filter-builder.js index 4eba4ae6..5149cde8 100644 --- a/lib/elasticsearch/elastic-query-filter-builder.js +++ b/lib/elasticsearch/elastic-query-filter-builder.js @@ -191,7 +191,6 @@ const itemsQueryContext = (options) => { } const innerHits = (_options = {}) => { - console.log('innerHits for: ', _options) const options = Object.assign({ size: process.env.SEARCH_ITEMS_SIZE || 200, from: 0, diff --git a/test/elastic-body-builder.test.js b/test/elastic-body-builder.test.js new file mode 100644 index 00000000..97986858 --- /dev/null +++ b/test/elastic-body-builder.test.js @@ -0,0 +1,145 @@ +const { expect } = require('chai') + +const { bodyForSearch, bodyForFindByUri } = require('../lib/elasticsearch/elastic-body-builder') + +describe('bodyForSearch', function () { + it('excludes checkin cards by default', function () { + expect(bodyForSearch({ sort: 'relevance' })) + .to.deep.equal( + { + query: { + bool: { + filter: [ + { + bool: { + should: [ + { + nested: { + path: 'items', + query: { + bool: { + must_not: [ + { + term: { + 'items.type': 'nypl:CheckinCardItem' + } + } + ] + } + }, + inner_hits: { + sort: [ + { + 'items.enumerationChronology_sort': 'desc' + } + ], + size: '3', + from: 0, + name: 'items' + } + } + }, + { + match_all: {} + } + ] + } + } + ] + } + }, + sort: [ + { + _score: 'desc' + }, + { + uri: 'asc' + } + ], + _source: { + excludes: [ + 'uris', + '*_packed', + '*_sort', + 'items.*_packed', + 'contentsTitle', + 'suppressed', + '*WithoutDates', + '*Normalized', + 'items' + ] + } + } + ) + }) + + it('includes checkin cards when present in params', function () { + expect(bodyForSearch({ sort: 'relevance', merge_checkin_card_items: true })) + .to.deep.equal( + { + query: { + bool: { + filter: [ + { + bool: { + should: [ + { + nested: { + path: 'items', + query: { + bool: { + must: { + match_all: {} + } + } + }, + inner_hits: { + sort: [ + { + 'items.enumerationChronology_sort': 'desc' + } + ], + size: '3', + from: 0, + name: 'items' + } + } + }, + { + match_all: {} + } + ] + } + } + ] + } + }, + sort: [ + { + _score: 'desc' + }, + { + uri: 'asc' + } + ], + _source: { + excludes: [ + 'uris', + '*_packed', + '*_sort', + 'items.*_packed', + 'contentsTitle', + 'suppressed', + '*WithoutDates', + '*Normalized', + 'items' + ] + } + } + ) + }) +}) + +describe('bodyForFindByUri', async function () { + bodyForFindByUri() +}) diff --git a/test/resources.test.js b/test/resources.test.js index c91b5d37..98220b26 100644 --- a/test/resources.test.js +++ b/test/resources.test.js @@ -723,138 +723,6 @@ describe('Resources query', function () { }) }) - describe('addInnerHits', () => { - it('should include query for items', () => { - expect(resourcesPrivMethods.addInnerHits({ query: { bool: { filter: [] } } }, { size: 1, from: 2 })) - .to.deep.equal({ - query: { - bool: { - filter: [ - { - bool: { - should: [ - { - nested: { - path: 'items', - query: { - bool: { - must: { - match_all: {} - } - } - }, - inner_hits: { - sort: [{ 'items.enumerationChronology_sort': 'desc' }], - size: 1, - from: 2, - name: 'items' - } - } - }, - { match_all: {} } - ] - } - } - ] - } - } - }) - }) - - it('should exclude check in card items if explicitly set', () => { - expect(resourcesPrivMethods.addInnerHits({ query: { bool: { filter: [] } } }, { size: 1, from: 2, merge_checkin_card_items: false })) - .to.deep.equal({ - query: { - bool: { - filter: [ - { - bool: { - should: [ - { - nested: { - path: 'items', - query: { - bool: { - must_not: [ - { term: { 'items.type': 'nypl:CheckinCardItem' } } - ] - } - }, - inner_hits: { - sort: [{ 'items.enumerationChronology_sort': 'desc' }], - size: 1, - from: 2, - name: 'items' - } - } - }, - { match_all: {} } - ] - } - } - ] - } - } - }) - }) - - it('should include filters for items', () => { - expect(resourcesPrivMethods.addInnerHits( - { query: { bool: { filter: [] } } }, - { size: 1, from: 2, query: { volume: [1, 2], location: ['SASB', 'LPA'], other: 'filter' } } - )).to.deep.equal({ - query: { - bool: { - filter: [ - { - bool: { - should: [ - { - nested: { - path: 'items', - query: { - bool: { - must: { - match_all: {} - }, - filter: [ - { range: { 'items.volumeRange': { gte: 1, lte: 2 } } }, - { terms: { 'items.holdingLocation.id': ['SASB', 'LPA'] } } - ] - } - }, - inner_hits: { - sort: [{ 'items.enumerationChronology_sort': 'desc' }], - size: 1, - from: 2, - name: 'items' - } - } - }, - { match_all: {} }, - { - nested: { - inner_hits: { name: 'allItems' }, - path: 'items', - query: { - bool: { - must_not: [ - { exists: { field: 'items.electronicLocator' } } - ] - } - } - } - } - ] - } - } - ] - } - } - }) - }) - }) - describe('search exception handling', () => { describe('lexical error', () => { before(() => { From 6cfbd49e9ac36c87aae2d440cef660dfa2e32399 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 16 Jan 2026 14:20:50 -0500 Subject: [PATCH 41/82] Remove remaining references to addInnerHits --- lib/resources.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index 062a0a28..87a41b45 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -29,7 +29,6 @@ const { const { bodyForFindByUri, - addInnerHits, itemsFilterContext, itemsQueryContext, buildElasticQuery, @@ -249,7 +248,6 @@ module.exports = function (app, _private = null) { _private.esRangeValue = esRangeValue _private.itemsFilterContext = itemsFilterContext _private.itemsQueryContext = itemsQueryContext - _private.addInnerHits = addInnerHits _private.aggregationQueriesForParams = aggregationQueriesForParams _private.mergeAggregationsResponses = mergeAggregationsResponses } From 1c769c9cddef35a85f02e8ac48f3658677eac8d5 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 16 Jan 2026 15:36:19 -0500 Subject: [PATCH 42/82] Add bodybuilder tests --- lib/elasticsearch/elastic-body-builder.js | 5 +- test/elastic-body-builder.test.js | 235 +++++++++++++++++++++- 2 files changed, 235 insertions(+), 5 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 9a5f682a..110917b1 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -3,7 +3,7 @@ const { innerHits, itemsQueryContext, itemsFilterContext } = require('./elastic- const ApiRequest = require('../api-request') const ElasticQueryBuilder = require('../elasticsearch/elastic-query-builder') -const bodyForFindByUri = async function (recapBarcodesByStatus, params) { +const bodyForFindByUri = function (recapBarcodesByStatus, params) { const paramsIncludesItemLevelFiltering = Object.keys(params) .filter((param) => param.startsWith('item_')).length > 0 @@ -30,7 +30,7 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] } - const filter = returnAllItems ? {} : { filter: [] } + // const filter = returnAllItems ? {} : { filter: [] } const queryFilter = { filter: !returnAllItems ? [innerHits(itemsOptions)] : [] } @@ -52,7 +52,6 @@ const bodyForFindByUri = async function (recapBarcodesByStatus, params) { ...queryFilter } }, - ...filter, ...aggregations } diff --git a/test/elastic-body-builder.test.js b/test/elastic-body-builder.test.js index 97986858..ccec82dd 100644 --- a/test/elastic-body-builder.test.js +++ b/test/elastic-body-builder.test.js @@ -140,6 +140,237 @@ describe('bodyForSearch', function () { }) }) -describe('bodyForFindByUri', async function () { - bodyForFindByUri() +describe('bodyForFindByUri', function () { + it('queries for uri', function () { + const expected = { + _source: { + excludes: [ + 'uris', + '*_packed', + '*_sort', + 'items.*_packed', + 'contentsTitle', + 'suppressed', + '*WithoutDates', + '*Normalized', + 'items' + ] + }, + size: 1, + query: { + bool: { + must: [ + { + term: { + uri: 'b15781267' + } + } + ], + filter: [ + { + bool: { + should: [ + { + nested: { + path: 'items', + query: { + bool: { + must: { + match_all: {} + } + } + }, + inner_hits: { + sort: [ + { + 'items.enumerationChronology_sort': 'desc' + } + ], + size: 100, + from: 0, + name: 'items' + } + } + }, + { + match_all: {} + } + ] + } + } + ] + } + }, + aggregations: { + item_location: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.holdingLocation_packed' + } + } + } + }, + item_status: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.status_packed' + } + } + } + }, + item_format: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.formatLiteral' + } + } + } + } + } + } + + const params = { + all_items: false, + uri: 'b15781267', + items_size: 100, + items_from: 0, + merge_checkin_card_items: true, + include_item_aggregations: true + } + const barcodes = {} + expect(bodyForFindByUri(barcodes, params)) + .to.deep.equal(expected) + }) + + it('accepts item params', function () { + const barcodes = { 'Not Available': ['1234'] } + const params = { + all_items: false, + uri: 'b15781267', + items_size: 10, + items_from: 10, + merge_checkin_card_items: true, + include_item_aggregations: true + } + + const expected = { + _source: { + excludes: [ + 'uris', + '*_packed', + '*_sort', + 'items.*_packed', + 'contentsTitle', + 'suppressed', + '*WithoutDates', + '*Normalized', + 'items' + ] + }, + size: 1, + query: { + bool: { + must: [ + { + term: { + uri: 'b15781267' + } + } + ], + filter: [ + { + bool: { + should: [ + { + nested: { + path: 'items', + query: { + bool: { + must: { + match_all: {} + } + } + }, + inner_hits: { + sort: [ + { + 'items.enumerationChronology_sort': 'desc' + } + ], + size: 10, + from: 10, + name: 'items' + } + } + }, + { + match_all: {} + } + ] + } + } + ] + } + }, + aggregations: { + item_location: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.holdingLocation_packed' + } + } + } + }, + item_status: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.status_packed' + } + } + } + }, + item_format: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.formatLiteral' + } + } + } + } + } + } + + expect(bodyForFindByUri(barcodes, params)) + .to.deep.equal(expected) + }) }) From a5e190e1bb8be71376d3f2feb0452cf997ab02f3 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 23 Jan 2026 11:56:29 -0500 Subject: [PATCH 43/82] Remove options from elastic-query --- lib/elasticsearch/elastic-query-builder.js | 2 +- lib/elasticsearch/elastic-query.js | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/elasticsearch/elastic-query-builder.js b/lib/elasticsearch/elastic-query-builder.js index de87a263..cc87333b 100644 --- a/lib/elasticsearch/elastic-query-builder.js +++ b/lib/elasticsearch/elastic-query-builder.js @@ -14,7 +14,7 @@ const POPULARITY_BOOSTS = [ class ElasticQueryBuilder { constructor (apiRequest, options = {}) { this.request = apiRequest - this.query = new ElasticQuery(options) + this.query = new ElasticQuery() // Break on search_scope: switch (this.request.params.search_scope) { diff --git a/lib/elasticsearch/elastic-query.js b/lib/elasticsearch/elastic-query.js index 68dfd30f..aa2d2f1e 100644 --- a/lib/elasticsearch/elastic-query.js +++ b/lib/elasticsearch/elastic-query.js @@ -7,11 +7,10 @@ **/ class ElasticQuery { - constructor (options = {}) { + constructor () { this.musts = [] this.shoulds = [] this.filters = [] - this.options = options } addMust (clause) { @@ -43,7 +42,7 @@ class ElasticQuery { * "query" param in a ES call */ toJson () { - if (!this.musts.length && !this.shoulds.length && !this.filters.length && !this.options.items) { + if (!this.musts.length && !this.shoulds.length && !this.filters.length) { return { match_all: {} } @@ -57,7 +56,7 @@ class ElasticQuery { if (this.shoulds.length) { result.bool.should = this.shoulds } - if (this.filters.length || this.options.items) { + if (this.filters.length) { result.bool.filter = this.filters } From 8b3e4e17730eb87c123171e84308e819c2c6313e Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 30 Jan 2026 13:34:07 -0500 Subject: [PATCH 44/82] Add initial bnf --- lib/elasticsearch/cql/index-mapping.js | 79 ++++++++++++++++++++++++++ lib/elasticsearch/cql_grammar.js | 40 +++++++++---- 2 files changed, 109 insertions(+), 10 deletions(-) create mode 100644 lib/elasticsearch/cql/index-mapping.js diff --git a/lib/elasticsearch/cql/index-mapping.js b/lib/elasticsearch/cql/index-mapping.js new file mode 100644 index 00000000..40e96af1 --- /dev/null +++ b/lib/elasticsearch/cql/index-mapping.js @@ -0,0 +1,79 @@ +const INDEX_MAPPING = { + keyword: { + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded', + { field: 'items.idBarcode', on: (q) => /\d{6,}/.test(q) }, + // Try to detect shelfmark searches (e.g. JFD 16-5143) + { field: 'items.shelfMark', on: (q) => /^[A-Z]{1,3} \d{2,}/.test(q) } + ] + }, + title: { + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ] + }, + author: { + fields: ['creatorLiteral', 'creatorLiteral.folded', 'contributorLiteral.folded', 'parallelCreatorLiteral.folded', 'parallelContributorLiteral.folded'] + }, + callnumber: {}, + identifier: {}, + subject: { + fields: ['subjectLiteral', 'subjectLiteral.folded', 'parallelSubjectLiteral.folded'] + }, + language: { field: ['language.id', 'language.label'] }, + date: {}, + series: { + fields: ['series', 'parallelSeries'] + }, + genre: { field: ['genreForm.raw'] }, + center: { field: ['buildingLocationIds'] }, + division: { field: ['collectionIds'] }, + format: { field: ['formatId'] } +} + +module.exports = { + INDEX_MAPPING +} diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 3cf0b63c..3f0c6fa2 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -1,17 +1,37 @@ const { Grammars } = require('ebnf') -const cql = ` +let cql = ` query ::= sub_query " " connective " " query | sub_query - connective ::= "and" | "or" - sub_query ::= atomic_query | "(" query ")" - atomic_query ::= scope " " relation " " key | key - scope ::= "title" | "contributor" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "center" | "format" - relation ::= "any" | "adj" | "=" | "==" | "within" | "encloses" - key ::= non_ws_key | '"' keyphrase '"' - keyphrase ::= [^"]+ - non_ws_key ::= [^#x20#x09#x0A#x0D"()]+ + connective ::= "AND" | "OR" + sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" + atomic_query ::= scope " " relation " " quoted_term + scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" + relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" + quoted_term ::= '"' term '"' + term ::= escaped_char term | regular_char term | escaped_char | regular_char + regular_char ::= [^"\\\\] + escaped_char ::= slash char + slash ::= "\\\\" + char ::= [a-z]|[^a-z] + +` + +let alt_cql = ` + query ::= sub_query " " connective " " query | sub_query + connective ::= "AND" | "OR" + sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" + atomic_query ::= scope " " relation " " quoted_term + scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" + relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" + quoted_term ::= '"' TERM '"' + TERM ::= ESCAPED_CHAR TERM | REGULAR_CHAR TERM | ESCAPED_CHAR | REGULAR_CHAR + REGULAR_CHAR ::= [^"\\\\] + ESCAPED_CHAR ::= SLASH CHAR + SLASH ::= "\\\\" + CHAR ::= [a-z]|[^a-z] ` const cqlParser = new Grammars.W3C.Parser(cql) +const alt_cqlParser = new Grammars.W3C.Parser(alt_cql) -module.exports = { cqlParser } +module.exports = { cqlParser, alt_cqlParser } From 87c1e32a4ba3dbb564fec79c8524e256d736d1e6 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 30 Jan 2026 13:34:26 -0500 Subject: [PATCH 45/82] Update packages --- package-lock.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package-lock.json b/package-lock.json index 5e52382b..d253d9a4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3713,6 +3713,7 @@ "version": "0.11.0", "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", + "dev": true, "optional": true, "engines": { "node": ">=14" From a55444dfd220bada1a02e7826514fc9b8316a1fa Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 30 Jan 2026 16:24:57 -0500 Subject: [PATCH 46/82] Add alternate grammars and comment for atomic --- lib/elasticsearch/cql/index-mapping.js | 9 +++++-- lib/elasticsearch/cql_grammar.js | 36 ++++++++++++++++++++++++++ lib/elasticsearch/cql_query_builder.js | 15 +++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/lib/elasticsearch/cql/index-mapping.js b/lib/elasticsearch/cql/index-mapping.js index 40e96af1..39eb0a22 100644 --- a/lib/elasticsearch/cql/index-mapping.js +++ b/lib/elasticsearch/cql/index-mapping.js @@ -58,8 +58,13 @@ const INDEX_MAPPING = { author: { fields: ['creatorLiteral', 'creatorLiteral.folded', 'contributorLiteral.folded', 'parallelCreatorLiteral.folded', 'parallelContributorLiteral.folded'] }, - callnumber: {}, - identifier: {}, + callnumber: { + fields: ['shelfMark.keywordLowercased', 'items.shelfMark.keywordLowercased'] + }, + identifier: { + prefix: ['identifierV2.value', 'items.shelfMark.keywordLowercased' ], + term: ['uri', 'items.idBarcode', 'idIsbn.clean', 'idIssn.clean'] + }, subject: { fields: ['subjectLiteral', 'subjectLiteral.folded', 'parallelSubjectLiteral.folded'] }, diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 3f0c6fa2..986399ca 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -31,6 +31,42 @@ let alt_cql = ` CHAR ::= [a-z]|[^a-z] ` +let word_cql = ` + query ::= sub_query " " connective " " query | sub_query + connective ::= "AND" | "OR" + sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" + atomic_query ::= scope " " relation " " quoted_term + scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" + relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" + quoted_term ::= '"' phrase '"' + phrase ::= word whitespace phrase | word + whitespace ::= [#x20#x09#x0A#x0D]+ + word ::= escaped_char word | regular_char word | escaped_char | regular_char + regular_char ::= [^"\\\\#x20#x09#x0A#x0D] + escaped_char ::= slash char + slash ::= "\\\\" + char ::= [a-z]|[^a-z] + +` + +let convenient_cql = ` + query ::= sub_query " " connective " " query | sub_query + connective ::= "AND" | "OR" + sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" + atomic_query ::= scope " " relation " " quoted_term | quoted_term | word + scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" + relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" + quoted_term ::= '"' phrase '"' + phrase ::= word whitespace phrase | word + whitespace ::= [#x20#x09#x0A#x0D]+ + word ::= escaped_char word | regular_char word | escaped_char | regular_char + regular_char ::= [^"\\\\#x20#x09#x0A#x0D] + escaped_char ::= slash char + slash ::= "\\\\" + char ::= [a-z]|[^a-z] + +` + const cqlParser = new Grammars.W3C.Parser(cql) const alt_cqlParser = new Grammars.W3C.Parser(alt_cql) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index f1f2d8c4..f11440ad 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -66,6 +66,21 @@ function buildAtomic (scope, relation, term) { return builder.query.toJson() } +/** + build atomic: + - identify the scope fields that match the term + - separate out into main, items, holdings + - boolean(main, items, holdings) + - items/holds = nested(items/holdings, main) + - main: + - if operator is any/all, take all query terms not starting with ^ and put in multi-match with all regular fields + - if operator is any/all, take all query terms starting with ^ and put in prefix with all regular fields + - if operator is =/adj, and query term doesn't start with ^, take all query terms and put in phrase match with all regular fields + - if operator is =/adj and query term starts with ^, strip it out and use phrase_prefix + - put all terms in prefix match with prefix fields + - put all terms in term matches with term fields + */ + module.exports = { buildEsQuery, buildEsQueryFromTree, From 05dfb3f11850e44297c26868f4332e7a4eb368b4 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 5 Feb 2026 15:52:11 -0500 Subject: [PATCH 47/82] Add reverseGrammar and related methods --- lib/elasticsearch/cql/index-mapping.js | 6 +- lib/elasticsearch/cql_grammar.js | 366 +++++++++++++++++++++---- lib/elasticsearch/cql_query_builder.js | 240 +++++++++++++--- test/cql_grammar.test.js | 112 ++++++++ 4 files changed, 626 insertions(+), 98 deletions(-) create mode 100644 test/cql_grammar.test.js diff --git a/lib/elasticsearch/cql/index-mapping.js b/lib/elasticsearch/cql/index-mapping.js index 39eb0a22..f0f19a9a 100644 --- a/lib/elasticsearch/cql/index-mapping.js +++ b/lib/elasticsearch/cql/index-mapping.js @@ -1,4 +1,4 @@ -const INDEX_MAPPING = { +const indexMapping = { keyword: { fields: [ 'title', @@ -62,7 +62,7 @@ const INDEX_MAPPING = { fields: ['shelfMark.keywordLowercased', 'items.shelfMark.keywordLowercased'] }, identifier: { - prefix: ['identifierV2.value', 'items.shelfMark.keywordLowercased' ], + prefix: ['identifierV2.value', 'items.shelfMark.keywordLowercased'], term: ['uri', 'items.idBarcode', 'idIsbn.clean', 'idIssn.clean'] }, subject: { @@ -80,5 +80,5 @@ const INDEX_MAPPING = { } module.exports = { - INDEX_MAPPING + indexMapping } diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 986399ca..aa3450d3 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -1,73 +1,323 @@ const { Grammars } = require('ebnf') -let cql = ` - query ::= sub_query " " connective " " query | sub_query - connective ::= "AND" | "OR" - sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" - atomic_query ::= scope " " relation " " quoted_term - scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" - relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" - quoted_term ::= '"' term '"' - term ::= escaped_char term | regular_char term | escaped_char | regular_char - regular_char ::= [^"\\\\] - escaped_char ::= slash char - slash ::= "\\\\" - char ::= [a-z]|[^a-z] +// let cql = ` +// query ::= sub_query " " connective " " query | sub_query +// connective ::= "AND" | "OR" +// sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" +// atomic_query ::= scope " " relation " " quoted_term +// scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" +// relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" +// quoted_term ::= '"' term '"' +// term ::= escaped_char term | regular_char term | escaped_char | regular_char +// regular_char ::= [^"\\\\] +// escaped_char ::= slash char +// slash ::= "\\\\" +// char ::= [a-z]|[^a-z] +// +// ` +// +// let alt_cql = ` +// query ::= sub_query " " connective " " query | sub_query +// connective ::= "AND" | "OR" +// sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" +// atomic_query ::= scope " " relation " " quoted_term +// scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" +// relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" +// quoted_term ::= '"' TERM '"' +// TERM ::= ESCAPED_CHAR TERM | REGULAR_CHAR TERM | ESCAPED_CHAR | REGULAR_CHAR +// REGULAR_CHAR ::= [^"\\\\] +// ESCAPED_CHAR ::= SLASH CHAR +// SLASH ::= "\\\\" +// CHAR ::= [a-z]|[^a-z] +// ` + +// let word_cql = ` +// query ::= sub_query whitespace connective whitespace query | sub_query +// connective ::= "AND" | "OR" | "NOT" +// sub_query ::= atomic_query | "(" query ")" +// atomic_query ::= scope optional_whitespace relation optional_whitespace quoted_term +// scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" +// relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" +// quoted_term ::= '"' phrase '"' +// phrase ::= word whitespace phrase | word +// optional_whitespace ::= whitespace | "" +// whitespace ::= [#x20#x09#x0A#x0D]+ +// word ::= escaped_char word | regular_char word | escaped_char | regular_char +// regular_char ::= [^"\\\\#x20#x09#x0A#x0D] +// escaped_char ::= slash char +// slash ::= "\\\\" +// char ::= [a-z]|[^a-z] + +// NEED to add some allowed whitespace before and after atomic queries +const ridic = ` + query ::= sub_query whitespace connective whitespace query | sub_query + connective ::= "TON DNA" | "DNA" | "RO" | "NOT" + sub_query ::= atomic_query | ")" query "(" + atomic_query ::= [a-z]+ + whitespace ::= [#x20#x09#x0A#x0D]+ ` -let alt_cql = ` - query ::= sub_query " " connective " " query | sub_query +// const cql = ` +// query ::= sub_query whitespace connective whitespace query | sub_query +// connective ::= "AND NOT" | "AND" | "OR" | "NOT" +// sub_query ::= atomic_query | "(" query ")" +// atomic_query ::= scope relation quoted_term +// scope ::= scope_term whitespace | scope_term +// relation ::= relation_term whitespace | relation_term +// scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" +// relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" +// quoted_term ::= '"' phrase '"' +// phrase ::= word whitespace phrase | word +// whitespace ::= [#x20#x09#x0A#x0D]+ +// word ::= escaped_char word | regular_char word | escaped_char | regular_char +// regular_char ::= [^"\\\\#x20#x09#x0A#x0D] +// escaped_char ::= slash char +// slash ::= "\\\\" +// char ::= [a-z]|[^a-z] +// +// ` + +// function reverseGrammar (grammar) { +// return grammar.split("\n").map(line => +// (line.split("::=").map(side => +// (side.split("|").map(disjunct => +// (disjunct.split(" ").map(word => +// (word => word.includes("\"") ? reverseString(word) : word) +// ).reverse().join(" ")) +// )).join("|") +// )).join("::=") +// ).join("\n") +// } + +function reverseGrammar (grammar) { + return grammar.split("\n") + .map(line => + (line.split("::=") + .map(side => + (side.split("|") + .map(dis => + (dis.split(" ") + .map(word => + (word.includes("\"") ? word.split("").reverse().join("") : word)) + .reverse().join(" ")) + ).join("|"))).join("::= "))).join("\n") +} + + + +const leftTest = ` + query ::= query connective sub_query | sub_query connective ::= "AND" | "OR" - sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" - atomic_query ::= scope " " relation " " quoted_term - scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" - relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" - quoted_term ::= '"' TERM '"' - TERM ::= ESCAPED_CHAR TERM | REGULAR_CHAR TERM | ESCAPED_CHAR | REGULAR_CHAR - REGULAR_CHAR ::= [^"\\\\] - ESCAPED_CHAR ::= SLASH CHAR - SLASH ::= "\\\\" - CHAR ::= [a-z]|[^a-z] + sub_query ::= [a-z]+ ` -let word_cql = ` - query ::= sub_query " " connective " " query | sub_query - connective ::= "AND" | "OR" - sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" - atomic_query ::= scope " " relation " " quoted_term - scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" - relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" - quoted_term ::= '"' phrase '"' - phrase ::= word whitespace phrase | word - whitespace ::= [#x20#x09#x0A#x0D]+ - word ::= escaped_char word | regular_char word | escaped_char | regular_char - regular_char ::= [^"\\\\#x20#x09#x0A#x0D] - escaped_char ::= slash char - slash ::= "\\\\" - char ::= [a-z]|[^a-z] +const leftCql = ` + query ::= query whitespace connective whitespace sub_query | sub_query + connective ::= "AND NOT" | "AND" | "OR" | "NOT" + sub_query ::= atomic_query | "(" query ")" + atomic_query ::= scope relation quoted_term + scope ::= scope_term whitespace | scope_term + relation ::= relation_term whitespace | relation_term + scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" + relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" + quoted_term ::= quote phrase quote + phrase ::= phrase whitespace word | word + whitespace ::= [#x20#x09#x0A#x0D]+ + word ::= word escaped_char | word regular_char | escaped_char | regular_char + regular_char ::= [^#x22#x5c#x20#x09#x0A#x0D] + escaped_char ::= slash char + slash ::= [#x5c] + char ::= [a-z]|[^a-z] + quote ::= [#x22] +` +const cql = ` + query ::= sub_query whitespace connective whitespace query | sub_query + connective ::= "AND NOT" | "AND" | "OR" | "NOT" + sub_query ::= atomic_query | "(" query ")" + atomic_query ::= scope relation quoted_term + scope ::= scope_term whitespace | scope_term + relation ::= relation_term whitespace | relation_term + scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" + relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" + quoted_term ::= quote phrase quote + phrase ::= word whitespace phrase | word + whitespace ::= [#x20#x09#x0A#x0D]+ + word ::= escaped_char word | regular_char word | escaped_char | regular_char + regular_char ::= [^#x22#x5c#x20#x09#x0A#x0D] + escaped_char ::= slash char + slash ::= [#x5c] + char ::= [a-z]|[^a-z] + quote ::= [#x22] ` -let convenient_cql = ` - query ::= sub_query " " connective " " query | sub_query - connective ::= "AND" | "OR" - sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" - atomic_query ::= scope " " relation " " quoted_term | quoted_term | word - scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" - relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" - quoted_term ::= '"' phrase '"' - phrase ::= word whitespace phrase | word - whitespace ::= [#x20#x09#x0A#x0D]+ - word ::= escaped_char word | regular_char word | escaped_char | regular_char - regular_char ::= [^"\\\\#x20#x09#x0A#x0D] - escaped_char ::= slash char - slash ::= "\\\\" - char ::= [a-z]|[^a-z] +// const cql = ` +// query ::= sub_query whitespace connective whitespace query | sub_query +// connective ::= "AND NOT" | "AND" | "OR" | "NOT" +// sub_query ::= atomic_query | "(" query ")" +// atomic_query ::= scope relation quoted_term +// scope ::= scope_term whitespace | scope_term +// relation ::= relation_term whitespace | relation_term +// scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" +// relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" +// quoted_term ::= quote phrase quote +// phrase ::= word whitespace phrase | word +// whitespace ::= [#x20#x09#x0A#x0D]+ +// word ::= escaped_char word | regular_char word | escaped_char | regular_char +// regular_char ::= [^#5c#x22#x20#x09#x0A#x0D] +// escaped_char ::= slash char +// slash ::= [#x5c] +// char ::= [a-z]|[^a-z] +// quote ::= [#x22] +// +// ` -` +// const escaping = ` +// quoted_term ::= quote phrase quote +// phrase ::= word whitespace phrase | word +// whitespace ::= [#x20#x09#x0A#x0D]+ +// word ::= escaped_char word | regular_char word | escaped_char | regular_char +// regular_char ::= [^#5c#x22#x20#x09#x0A#x0D] +// escaped_char ::= slash char +// slash ::= [#x5c] +// char ::= [a-z]|[^a-z] +// quote ::= [#x22] +// ` + +function simplifyRidic (ast) { + if (ast.type === 'atomic_query' || ast.type === 'connective') return ast.text + return ast.children.map(child => simplifyRidic(child)) +} + +function simplify (ast) { + switch (ast.type) { + case 'query': { + console.log('query') + const children = ast.children.filter(child => child.type !== 'whitespace').map(child => simplify(child)) + return children.length > 1 ? children : children[0] + } + case 'connective': + return ast.text + case 'sub_query': + return simplify(ast.children.find(child => child.type.includes('query'))) + case 'atomic_query': + return ast.children.map(child => simplify(child)) + case 'scope': + return simplify(ast.children.find(child => child.type.includes('scope_term'))) + case 'relation': + return simplify(ast.children.find(child => child.type.includes('relation_term'))) + case 'scope_term': + return ast.text + case 'relation_term': + return ast.text + case 'quoted_term': + return simplify(ast.children.find(child => child.type.includes('phrase'))) + case 'phrase': { + const word = ast.children.find(child => child.type === 'word') + const phrase = ast.children.find(child => child.type === 'phrase') + return [simplify(word)].concat(phrase ? simplify(phrase) : []) + } + case 'word': + return ast.text + default: + break + } +} + +function partialSimplify (tree) { + if (['phrase', 'relation_term', 'scope_term', 'connective'].includes(tree.type)) { + return tree.text + } + if (tree.type === 'sub_query') { + return [partialSimplify(tree.children.find(child => child.type.includes('query')))] + } + const simplifiedChildren = tree.children.map(child => partialSimplify(child)) + return simplifiedChildren.length === 1 ? simplifiedChildren[0]: simplifiedChildren + +} + +function rectifyTree (tree) { + // collect all the boolean queries that directly contain boolean queries + const toRotate = [] + const nodeQueue = [tree] + while (nodeQueue.length) { + let node = nodeQueue.shift() + if (node.type === 'query' && node.children.find(child => child.type === 'connective')) { + let rightChild = node.children.find(child => child.type === 'query') + if (rightChild && rightChild.children.find(child => child.type === 'connective')) { + toRotate.push(node) + } + } + node.children.forEach(child => {nodeQueue.push(child)}) + } + console.log('toRotate: ', toRotate) + toRotate.forEach(node => { + console.log('rotating: ', node) + console.dir(tree, {depth: null}) + const lastChild = node.children[node.children.length - 1] + const grandChild = lastChild.children[0] + node.children[node.children.length - 1] = grandChild + lastChild[0] = node + }) +} + +function rectifySkeleton (tree) { + const connectives = ["AND", "OR"] + const toRotate = [] + const nodeQueue = [tree] + while (nodeQueue.length) { + let node = nodeQueue.shift() + if (node.find(child => connectives.includes(child))) { + let rightChild = node[node.length - 1] + if (rightChild.find(child => connectives.includes(child))) { + toRotate.push(node) + } + } + node.forEach(child => {nodeQueue.push(child)}) + } + console.log('toRotate: ', toRotate) + toRotate.forEach(node => { + console.log('rotating: ', node) + console.dir(tree, {depth: null}) + const lastChild = node.pop() + node.push(lastChild.shift()) + lastChild.unshift(node) + }) +} + +function reverseString (string) { + return string.split("").reverse().join("") +} + +function reverseAST (tree) { + tree.text = reverseString(tree.text) + tree.children = tree.children.map(child => reverseAST(child)).reverse() + return tree +} + +// let convenient_cql = ` +// query ::= sub_query " " connective " " query | sub_query +// connective ::= "AND" | "OR" +// sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" +// atomic_query ::= scope " " relation " " quoted_term | quoted_term | word +// scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" +// relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" +// quoted_term ::= '"' phrase '"' +// phrase ::= word whitespace phrase | word +// whitespace ::= [#x20#x09#x0A#x0D]+ +// word ::= escaped_char word | regular_char word | escaped_char | regular_char +// regular_char ::= [^"\\\\#x20#x09#x0A#x0D] +// escaped_char ::= slash char +// slash ::= "\\\\" +// char ::= [a-z]|[^a-z] +// +// ` +// const cqlParser = new Grammars.W3C.Parser(cql) +// const alt_cqlParser = new Grammars.W3C.Parser(alt_cql) const cqlParser = new Grammars.W3C.Parser(cql) -const alt_cqlParser = new Grammars.W3C.Parser(alt_cql) +const ridicParser = new Grammars.W3C.Parser(ridic) +// const escapingParser = new Grammars.W3C.Parser(escaping) -module.exports = { cqlParser, alt_cqlParser } +module.exports = { cqlParser, simplify, rectifyTree, partialSimplify, ridicParser, simplifyRidic, reverseAST, leftTest, reverseGrammar, leftCql, Grammars }//, escapingParser } diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index f11440ad..44fc4e2e 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -1,10 +1,16 @@ -const { cqlParser } = require('./cql_grammar') +const { cqlParser, rectifyTree } = require('./cql_grammar') const ElasticQueryBuilder = require('./elastic-query-builder') const ApiRequest = require('../api-request') +const { indexMapping } = require('./cql/index-mapping') function buildEsQuery (cqlQuery) { - const tree = cqlParser.getAST(cqlQuery) - return buildEsQueryFromTree(tree) + // const tree = cqlParser.getAST(cqlQuery) + // return buildEsQueryFromTree(tree) + return buildEsQueryFromTree( + rectifyTree( + cqlParser.getAST(cqlQuery) + ) + ) } /** @@ -14,56 +20,79 @@ function buildEsQuery (cqlQuery) { function buildEsQueryFromTree (tree) { switch (tree.type) { case 'query': - if (tree.children.length > 1) { - return buildBoolean( - buildEsQueryFromTree(tree.children[0]), - tree.children[1].text, - buildEsQueryFromTree(tree.children[2]) - ) - } else { - return buildEsQueryFromTree(tree.children[0]) + queries = tree.children.filter(child.type.contains('query')) + connectives = tree.children.filter(child => child.type === 'connective') + if (connectives.length) { + return buildBoolean(connectives[0], queries) } + return buildEsQueryFromTree(queries[0]) case 'sub_query': - return buildEsQueryFromTree(tree.children.length > 1 ? tree.children[1] : tree.children[0]) + const query = tree.children.filter(child => child.type.contains('query'))[0] + return buildEsQueryFromTree(query) case 'atomic_query': { - let scope - let relation - let term - if (tree.children.length > 1) { - scope = tree.children[0].text - relation = tree.children[1].text - } else { - scope = 'all' - relation = 'any' - } - term = tree.children.find(child => child.type === 'key').children[0].text - - return buildAtomic(scope, relation, term) + const { scope, relation, term, terms } = atomicQueryParams(query) + return buildAtomic(scope, relation, term, terms) } default: break } } -function buildBoolean (queryOne, operator, queryTwo) { +function buildBoolean (operator, queries) { + if (operator === "NOT") return buildNegation(queries) const esOperator = operator === 'and' ? 'must' : 'should' return { bool: { - [esOperator]: [ - queryOne, - queryTwo - ] + [esOperator]: queries.map(query => buildEsQueryFromTree(query)) + } + } +} + +function buildNegation (queries) { + return { + bool: { + must: [buildEsQueryFromTree(queries[0])], + must_not: [buildEsQueryFromTree(queries[1])] } } } -function buildAtomic (scope, relation, term) { - const request = ApiRequest.fromParams({ - q: term, - search_scope: scope - }) - const builder = ElasticQueryBuilder.forApiRequest(request) - return builder.query.toJson() +function atomicQueryParams (query) { + return { + scope: query.find(child => child.type === 'scope'), + relation: query.find(child => child.type === 'relation'), + term: findTopPhrase(query), + terms: findTopWords(query) + } +} + +function findTopPhrase (tree) { + if (tree.type === 'phrase') return tree.text + const topPhrases = tree.children.map(child => findTopPhrase(child)).filter(x => x) + return topPhrases.length ? topPhrases[0] : null +} + +function findTopWords (tree) { + if (tree.type === 'word') return [tree.text] + return tree.children.map(child => findTopWords(child)).flatten() +} + +// function buildAtomic (scope, relation, term) { +// const request = ApiRequest.fromParams({ +// q: term, +// search_scope: scope +// }) +// const builder = ElasticQueryBuilder.forApiRequest(request) +// return builder.query.toJson() +// } + +function nestedMapAndFilter (obj, filter, map) { + return Object.assign({}, + ...Object.entries( + obj + ).filter(filter) // need to modify this to get full query + .map(map) + ) } /** @@ -81,6 +110,143 @@ function buildAtomic (scope, relation, term) { - put all terms in term matches with term fields */ + function buildAtomic (scope, relation, terms, term) { + const allFields = nestedMapAndFilter( + indexMapping[scope], + (k,v) => typeof v === 'string' || v.on(terms), + ([k,v]) => ({[k] : typeof v === 'string' ? v : v.field}) + ) + + const bibFields = nestedMapAndFilter( + allFields, + ([k, v]) => !['items, holdings'].any(prefix => k.startsWith(prefix)), + ([k, v]) => ({[k]: v}) + ) + + const itemFields = nestedMapAndFilter( + allFields, + ([k, v]) => k.startsWith('items'), + ([k, v]) => ({[k]: v}) + ) + + const holdingsFields = nestedMapAndFilter( + allFields, + ([k, v]) => k.startsWith('holdings'), + ([k, v]) => ({[k]: v}) + ) + + return { + bool: { // should this start with query? + should: [ + buildAtomicMain(bibFields, relation, terms, term), + buildAtomicNested('items', itemFields, relation, terms, term), + buildAtomicNested('holdings', holdingsFields, relation, terms, term) + ] + } + } + } + + function buildAtomicNested(name, fields, relation, terms, term) { + return { + nested: { + path: name, + query: buildAtomicMain(fields, relation, terms, term) + } + } + } + + /** + - main: + - if operator is any/all, take all query terms not starting with ^ and put in multi-match with all regular fields + - if operator is any/all, take all query terms starting with ^ and put in prefix with all regular fields + - if operator is =/adj, and query term doesn't start with ^, take all query terms and put in phrase match with all regular fields + - if operator is =/adj and query term starts with ^, strip it out and use phrase_prefix + - put all terms in prefix match with prefix fields + - put all terms in term matches with term fields + */ + function buildAtomicMain (fields, relation, terms, term) { + return { + bool: { + should: [ + ...anyAllQueries(fields, relation, terms, term), + ...adjEqQueries(fields, relation, terms, term), + ...termQueriesForTermFields(fields, relation, terms, term), + ...prefixQueriesForPrefixFields(fields, relation, terms, term) + ] + } + } + } + + function anyAllQueries (fields, relation, terms, term) { + if (!['any', 'all'].contains(relation)) { return [] } + const fieldsToUse = fields.fields + return [ + multiMatch(fieldsToUse, relation, terms.filter(term => !term.startsWith('^'))), + ...(terms.filter(term => term.startsWith('^')).map(term => prefixQuery(fieldsToUse, term.slice(1)))) + ] + } + + function adjEqQueries (fields, relation, terms, term) { + if (!['=', 'adj'].contains(relation)) { return [] } + const fieldsToUse = fields.fields + return [ + term.startsWith('^') ? + phrasePrefixQuery(fieldsToUse, term.slice(1)) : + phraseQuery(fieldsToUse, term) + ] + } + + function prefixQueriesForPrefixFields (fields, relation, terms, term) { + if (!fields.prefix) return [] + return fields.prefix.map(field => prefixQuery(field, term)) + } + + function termQueriesForTermFields (fields, relation, terms, term) { + if (!fields.term) return [] + return fields.term.map(field => termQuery(field, term)) + } + + function termQuery (field, term) { + return { "term" : { [field] : term } } + } + + function prefixQuery (field, term) { + return { "prefix" : { [field] : term } } + } + + function multiMatch (fields, relation, terms) { + return { + "multi_match": { + "query" : term, + "fields": fields, + "type": "cross_fields", + "operator": relation === "any" ? "or" : "and" + } + } + } + + function phrasePrefixQuery (fields, term) { + return { + "multi_match": { + "query" : term, + "fields": fields, + "type": "phrase_prefix" + } + } + } + + function phraseQuery (fields, term) { + return { + "multi_match": { + "query" : term, + "fields": fields, + "type": "phrase" + } + } + } + + + module.exports = { buildEsQuery, buildEsQueryFromTree, diff --git a/test/cql_grammar.test.js b/test/cql_grammar.test.js new file mode 100644 index 00000000..0a5439b9 --- /dev/null +++ b/test/cql_grammar.test.js @@ -0,0 +1,112 @@ +const { expect } = require('chai') + +const { cqlParser, simplify, rectifyTree } = require('../lib/elasticsearch/cql_grammar') + + +function validateAtomicQuery(parsed, scope, relation, quotedTerm) { + expect(parsed.type).to.equal("query") + expect(parsed.children.length).to.equal(1) + const subQuery = parsed.children[0] + expect(subQuery.type).to.equal("sub_query") + expect(subQuery.children.length).to.equal(1) + const atomicQuery = subQuery.children[0] + expect(atomicQuery.type).to.equal("atomic_query") + const scopeNode = atomicQuery.children.find(child => child.type === "scope") + const scopeTerm = scopeNode.children.find(child => child.type === "scope_term") + expect(scopeTerm.text).to.equal(scope) + const relationNode = atomicQuery.children.find(child => child.type === "relation") + const relationTerm = relationNode.children.find(child => child.type === "relation_term") + expect(relationTerm.text).to.equal(relation) + const quotedTermNode = atomicQuery.children.find(child => child.type === "quoted_term") + expect(quotedTermNode.text).to.equal(quotedTerm) +} + +function validateBooleanQuery(parsed, expected) { + +} + +describe.only('CQL Grammar', function () { + describe('parsing queries', function () { + it('parses atomic queries', function () { + validateAtomicQuery(cqlParser.getAST("title=\"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(cqlParser.getAST("keyword any \"hamlet shakespeare\""), "keyword", "any", "\"hamlet shakespeare\"") + validateAtomicQuery(cqlParser.getAST("subject all \"hamlet shakespeare\""), "subject", "all", "\"hamlet shakespeare\"") + }) + + it('allows whitespace variants', function () { + validateAtomicQuery(cqlParser.getAST("title =\"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(cqlParser.getAST("title= \"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(cqlParser.getAST("title = \"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(cqlParser.getAST("title = \"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(cqlParser.getAST("title = \"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + }) + + it('correctly escapes escape characters', function () { + validateAtomicQuery(cqlParser.getAST("keyword=\"Notes on \\\"The Underground\\\"\""), "keyword", "=", "\"Notes on \\\"The Underground\\\"\"") + validateAtomicQuery(cqlParser.getAST("title=\"This title ends in a slash \\\\\""), "title", "=", "\"This title ends in a slash \\\\\"") + }) + + it('identifies words correctly', function () { + const parsed = cqlParser.getAST("keyword adj \"A multiword keyword\"") + const words = [] + let nodes = [parsed] + while (nodes.length) { + let node = nodes.shift() + if (node.type === "word") { + words.push(node.text) + } else { + nodes = nodes.concat(node.children) + } + } + const expectedWords = ["A", "multiword", "keyword"] + words.forEach(word => { + expect(expectedWords).to.include(word) + }) + expect(words.length).to.equal(3) + }) + + it('parses boolean queries', function () { + expect(simplify(cqlParser.getAST( + "title=\"dogs\" AND keyword=\"cats\"" + ))).to.deep.equal( + [ [ 'title', '=', [ 'dogs' ] ], 'AND', [ 'keyword', '=', [ 'cats' ] ] ] + ) + + expect(simplify(cqlParser.getAST( + "title=\"dogs\" AND keyword=\"cats\" OR author adj \"Bird\"" + ))).to.deep.equal( + [ + [ + "title", "=", ["dogs"] + ], + "AND", + [ + [ + "keyword", "=", ["cats"] + ], + "OR", + [ + "author", "adj", ["Bird"] + ] + ] + ] + ) + }) + + it('parses queries with parentheses', function () { + expect() + .to.deep.equal( + [ + [ [ 'title', '=', ['dogs'] ], 'AND', [ 'keyword', '=', ['cats'] ] ], + 'OR', + [ 'author', 'adj', [ 'Bird' ] ] + ] + ) + }) + }) +}) From f373383f480421667efa42ab6291703b06e674f3 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 5 Feb 2026 15:59:06 -0500 Subject: [PATCH 48/82] Apparently working left associating cql --- lib/elasticsearch/cql_grammar.js | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index aa3450d3..250b1287 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -132,6 +132,8 @@ const leftCql = ` quote ::= [#x22] ` +const rightCql = reverseGrammar(leftCql) + const cql = ` query ::= sub_query whitespace connective whitespace query | sub_query connective ::= "AND NOT" | "AND" | "OR" | "NOT" @@ -318,6 +320,15 @@ function reverseAST (tree) { // const alt_cqlParser = new Grammars.W3C.Parser(alt_cql) const cqlParser = new Grammars.W3C.Parser(cql) const ridicParser = new Grammars.W3C.Parser(ridic) +const rightCqlParser = new Grammars.W3C.Parser(rightCql) + +function parseRight (string, parser) { + return reverseAST(parser.getAST(reverseString(string))) +} + +function parseWithRightCql (string) { + return parseRight(string, rightCqlParser) +} // const escapingParser = new Grammars.W3C.Parser(escaping) -module.exports = { cqlParser, simplify, rectifyTree, partialSimplify, ridicParser, simplifyRidic, reverseAST, leftTest, reverseGrammar, leftCql, Grammars }//, escapingParser } +module.exports = { cqlParser, simplify, rectifyTree, partialSimplify, ridicParser, simplifyRidic, reverseAST, leftTest, reverseGrammar, leftCql, Grammars, parseRight, parseWithRightCql }//, escapingParser } From 951a8e89f65065589923ab8d49fffa12bc3566ff Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 6 Feb 2026 11:54:02 -0500 Subject: [PATCH 49/82] Clean up grammar file --- lib/elasticsearch/cql_grammar.js | 242 +------------------------------ 1 file changed, 1 insertion(+), 241 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 250b1287..c9f73d6c 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -1,95 +1,5 @@ const { Grammars } = require('ebnf') -// let cql = ` -// query ::= sub_query " " connective " " query | sub_query -// connective ::= "AND" | "OR" -// sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" -// atomic_query ::= scope " " relation " " quoted_term -// scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" -// relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" -// quoted_term ::= '"' term '"' -// term ::= escaped_char term | regular_char term | escaped_char | regular_char -// regular_char ::= [^"\\\\] -// escaped_char ::= slash char -// slash ::= "\\\\" -// char ::= [a-z]|[^a-z] -// -// ` -// -// let alt_cql = ` -// query ::= sub_query " " connective " " query | sub_query -// connective ::= "AND" | "OR" -// sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" -// atomic_query ::= scope " " relation " " quoted_term -// scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" -// relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" -// quoted_term ::= '"' TERM '"' -// TERM ::= ESCAPED_CHAR TERM | REGULAR_CHAR TERM | ESCAPED_CHAR | REGULAR_CHAR -// REGULAR_CHAR ::= [^"\\\\] -// ESCAPED_CHAR ::= SLASH CHAR -// SLASH ::= "\\\\" -// CHAR ::= [a-z]|[^a-z] -// ` - -// let word_cql = ` -// query ::= sub_query whitespace connective whitespace query | sub_query -// connective ::= "AND" | "OR" | "NOT" -// sub_query ::= atomic_query | "(" query ")" -// atomic_query ::= scope optional_whitespace relation optional_whitespace quoted_term -// scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" -// relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" -// quoted_term ::= '"' phrase '"' -// phrase ::= word whitespace phrase | word -// optional_whitespace ::= whitespace | "" -// whitespace ::= [#x20#x09#x0A#x0D]+ -// word ::= escaped_char word | regular_char word | escaped_char | regular_char -// regular_char ::= [^"\\\\#x20#x09#x0A#x0D] -// escaped_char ::= slash char -// slash ::= "\\\\" -// char ::= [a-z]|[^a-z] - -// NEED to add some allowed whitespace before and after atomic queries - -const ridic = ` - query ::= sub_query whitespace connective whitespace query | sub_query - connective ::= "TON DNA" | "DNA" | "RO" | "NOT" - sub_query ::= atomic_query | ")" query "(" - atomic_query ::= [a-z]+ - whitespace ::= [#x20#x09#x0A#x0D]+ -` - -// const cql = ` -// query ::= sub_query whitespace connective whitespace query | sub_query -// connective ::= "AND NOT" | "AND" | "OR" | "NOT" -// sub_query ::= atomic_query | "(" query ")" -// atomic_query ::= scope relation quoted_term -// scope ::= scope_term whitespace | scope_term -// relation ::= relation_term whitespace | relation_term -// scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" -// relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" -// quoted_term ::= '"' phrase '"' -// phrase ::= word whitespace phrase | word -// whitespace ::= [#x20#x09#x0A#x0D]+ -// word ::= escaped_char word | regular_char word | escaped_char | regular_char -// regular_char ::= [^"\\\\#x20#x09#x0A#x0D] -// escaped_char ::= slash char -// slash ::= "\\\\" -// char ::= [a-z]|[^a-z] -// -// ` - -// function reverseGrammar (grammar) { -// return grammar.split("\n").map(line => -// (line.split("::=").map(side => -// (side.split("|").map(disjunct => -// (disjunct.split(" ").map(word => -// (word => word.includes("\"") ? reverseString(word) : word) -// ).reverse().join(" ")) -// )).join("|") -// )).join("::=") -// ).join("\n") -// } - function reverseGrammar (grammar) { return grammar.split("\n") .map(line => @@ -104,14 +14,6 @@ function reverseGrammar (grammar) { ).join("|"))).join("::= "))).join("\n") } - - -const leftTest = ` - query ::= query connective sub_query | sub_query - connective ::= "AND" | "OR" - sub_query ::= [a-z]+ -` - const leftCql = ` query ::= query whitespace connective whitespace sub_query | sub_query connective ::= "AND NOT" | "AND" | "OR" | "NOT" @@ -134,64 +36,6 @@ const leftCql = ` const rightCql = reverseGrammar(leftCql) -const cql = ` - query ::= sub_query whitespace connective whitespace query | sub_query - connective ::= "AND NOT" | "AND" | "OR" | "NOT" - sub_query ::= atomic_query | "(" query ")" - atomic_query ::= scope relation quoted_term - scope ::= scope_term whitespace | scope_term - relation ::= relation_term whitespace | relation_term - scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" - relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" - quoted_term ::= quote phrase quote - phrase ::= word whitespace phrase | word - whitespace ::= [#x20#x09#x0A#x0D]+ - word ::= escaped_char word | regular_char word | escaped_char | regular_char - regular_char ::= [^#x22#x5c#x20#x09#x0A#x0D] - escaped_char ::= slash char - slash ::= [#x5c] - char ::= [a-z]|[^a-z] - quote ::= [#x22] -` - -// const cql = ` -// query ::= sub_query whitespace connective whitespace query | sub_query -// connective ::= "AND NOT" | "AND" | "OR" | "NOT" -// sub_query ::= atomic_query | "(" query ")" -// atomic_query ::= scope relation quoted_term -// scope ::= scope_term whitespace | scope_term -// relation ::= relation_term whitespace | relation_term -// scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" -// relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" -// quoted_term ::= quote phrase quote -// phrase ::= word whitespace phrase | word -// whitespace ::= [#x20#x09#x0A#x0D]+ -// word ::= escaped_char word | regular_char word | escaped_char | regular_char -// regular_char ::= [^#5c#x22#x20#x09#x0A#x0D] -// escaped_char ::= slash char -// slash ::= [#x5c] -// char ::= [a-z]|[^a-z] -// quote ::= [#x22] -// -// ` - -// const escaping = ` -// quoted_term ::= quote phrase quote -// phrase ::= word whitespace phrase | word -// whitespace ::= [#x20#x09#x0A#x0D]+ -// word ::= escaped_char word | regular_char word | escaped_char | regular_char -// regular_char ::= [^#5c#x22#x20#x09#x0A#x0D] -// escaped_char ::= slash char -// slash ::= [#x5c] -// char ::= [a-z]|[^a-z] -// quote ::= [#x22] -// ` - -function simplifyRidic (ast) { - if (ast.type === 'atomic_query' || ast.type === 'connective') return ast.text - return ast.children.map(child => simplifyRidic(child)) -} - function simplify (ast) { switch (ast.type) { case 'query': { @@ -227,67 +71,6 @@ function simplify (ast) { } } -function partialSimplify (tree) { - if (['phrase', 'relation_term', 'scope_term', 'connective'].includes(tree.type)) { - return tree.text - } - if (tree.type === 'sub_query') { - return [partialSimplify(tree.children.find(child => child.type.includes('query')))] - } - const simplifiedChildren = tree.children.map(child => partialSimplify(child)) - return simplifiedChildren.length === 1 ? simplifiedChildren[0]: simplifiedChildren - -} - -function rectifyTree (tree) { - // collect all the boolean queries that directly contain boolean queries - const toRotate = [] - const nodeQueue = [tree] - while (nodeQueue.length) { - let node = nodeQueue.shift() - if (node.type === 'query' && node.children.find(child => child.type === 'connective')) { - let rightChild = node.children.find(child => child.type === 'query') - if (rightChild && rightChild.children.find(child => child.type === 'connective')) { - toRotate.push(node) - } - } - node.children.forEach(child => {nodeQueue.push(child)}) - } - console.log('toRotate: ', toRotate) - toRotate.forEach(node => { - console.log('rotating: ', node) - console.dir(tree, {depth: null}) - const lastChild = node.children[node.children.length - 1] - const grandChild = lastChild.children[0] - node.children[node.children.length - 1] = grandChild - lastChild[0] = node - }) -} - -function rectifySkeleton (tree) { - const connectives = ["AND", "OR"] - const toRotate = [] - const nodeQueue = [tree] - while (nodeQueue.length) { - let node = nodeQueue.shift() - if (node.find(child => connectives.includes(child))) { - let rightChild = node[node.length - 1] - if (rightChild.find(child => connectives.includes(child))) { - toRotate.push(node) - } - } - node.forEach(child => {nodeQueue.push(child)}) - } - console.log('toRotate: ', toRotate) - toRotate.forEach(node => { - console.log('rotating: ', node) - console.dir(tree, {depth: null}) - const lastChild = node.pop() - node.push(lastChild.shift()) - lastChild.unshift(node) - }) -} - function reverseString (string) { return string.split("").reverse().join("") } @@ -298,28 +81,6 @@ function reverseAST (tree) { return tree } -// let convenient_cql = ` -// query ::= sub_query " " connective " " query | sub_query -// connective ::= "AND" | "OR" -// sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" -// atomic_query ::= scope " " relation " " quoted_term | quoted_term | word -// scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" -// relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" -// quoted_term ::= '"' phrase '"' -// phrase ::= word whitespace phrase | word -// whitespace ::= [#x20#x09#x0A#x0D]+ -// word ::= escaped_char word | regular_char word | escaped_char | regular_char -// regular_char ::= [^"\\\\#x20#x09#x0A#x0D] -// escaped_char ::= slash char -// slash ::= "\\\\" -// char ::= [a-z]|[^a-z] -// -// ` - -// const cqlParser = new Grammars.W3C.Parser(cql) -// const alt_cqlParser = new Grammars.W3C.Parser(alt_cql) -const cqlParser = new Grammars.W3C.Parser(cql) -const ridicParser = new Grammars.W3C.Parser(ridic) const rightCqlParser = new Grammars.W3C.Parser(rightCql) function parseRight (string, parser) { @@ -329,6 +90,5 @@ function parseRight (string, parser) { function parseWithRightCql (string) { return parseRight(string, rightCqlParser) } -// const escapingParser = new Grammars.W3C.Parser(escaping) -module.exports = { cqlParser, simplify, rectifyTree, partialSimplify, ridicParser, simplifyRidic, reverseAST, leftTest, reverseGrammar, leftCql, Grammars, parseRight, parseWithRightCql }//, escapingParser } +module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql } From 3a03d816e30016aa1b9cce8a9a6c0cc0d42bc603 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 6 Feb 2026 11:54:38 -0500 Subject: [PATCH 50/82] Use parseWithRightCql in query builder and tests --- lib/elasticsearch/cql_query_builder.js | 8 +--- test/cql_grammar.test.js | 57 ++++++++++++++------------ 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 44fc4e2e..6c2f5505 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -1,15 +1,11 @@ -const { cqlParser, rectifyTree } = require('./cql_grammar') +const { parseWithRightCql } = require('./cql_grammar') const ElasticQueryBuilder = require('./elastic-query-builder') const ApiRequest = require('../api-request') const { indexMapping } = require('./cql/index-mapping') function buildEsQuery (cqlQuery) { - // const tree = cqlParser.getAST(cqlQuery) - // return buildEsQueryFromTree(tree) return buildEsQueryFromTree( - rectifyTree( - cqlParser.getAST(cqlQuery) - ) + parseWithRightCql(cqlQuery) ) } diff --git a/test/cql_grammar.test.js b/test/cql_grammar.test.js index 0a5439b9..cb3ee69d 100644 --- a/test/cql_grammar.test.js +++ b/test/cql_grammar.test.js @@ -1,6 +1,6 @@ const { expect } = require('chai') -const { cqlParser, simplify, rectifyTree } = require('../lib/elasticsearch/cql_grammar') +const { cqlParser, simplify, parseWithRightCql } = require('../lib/elasticsearch/cql_grammar') function validateAtomicQuery(parsed, scope, relation, quotedTerm) { @@ -28,31 +28,31 @@ function validateBooleanQuery(parsed, expected) { describe.only('CQL Grammar', function () { describe('parsing queries', function () { it('parses atomic queries', function () { - validateAtomicQuery(cqlParser.getAST("title=\"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") - validateAtomicQuery(cqlParser.getAST("keyword any \"hamlet shakespeare\""), "keyword", "any", "\"hamlet shakespeare\"") - validateAtomicQuery(cqlParser.getAST("subject all \"hamlet shakespeare\""), "subject", "all", "\"hamlet shakespeare\"") + validateAtomicQuery(parseWithRightCql("title=\"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(parseWithRightCql("keyword any \"hamlet shakespeare\""), "keyword", "any", "\"hamlet shakespeare\"") + validateAtomicQuery(parseWithRightCql("subject all \"hamlet shakespeare\""), "subject", "all", "\"hamlet shakespeare\"") }) it('allows whitespace variants', function () { - validateAtomicQuery(cqlParser.getAST("title =\"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(cqlParser.getAST("title= \"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(cqlParser.getAST("title = \"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(cqlParser.getAST("title = \"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(cqlParser.getAST("title = \"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") - validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") - validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") - validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(parseWithRightCql("title =\"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(parseWithRightCql("title= \"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(parseWithRightCql("title = \"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(parseWithRightCql("title = \"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(parseWithRightCql("title = \"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") }) it('correctly escapes escape characters', function () { - validateAtomicQuery(cqlParser.getAST("keyword=\"Notes on \\\"The Underground\\\"\""), "keyword", "=", "\"Notes on \\\"The Underground\\\"\"") - validateAtomicQuery(cqlParser.getAST("title=\"This title ends in a slash \\\\\""), "title", "=", "\"This title ends in a slash \\\\\"") + validateAtomicQuery(parseWithRightCql("keyword=\"Notes on \\\"The Underground\\\"\""), "keyword", "=", "\"Notes on \\\"The Underground\\\"\"") + validateAtomicQuery(parseWithRightCql("title=\"This title ends in a slash \\\\\""), "title", "=", "\"This title ends in a slash \\\\\"") }) it('identifies words correctly', function () { - const parsed = cqlParser.getAST("keyword adj \"A multiword keyword\"") + const parsed = parseWithRightCql("keyword adj \"A multiword keyword\"") const words = [] let nodes = [parsed] while (nodes.length) { @@ -71,41 +71,44 @@ describe.only('CQL Grammar', function () { }) it('parses boolean queries', function () { - expect(simplify(cqlParser.getAST( + expect(simplify(parseWithRightCql( "title=\"dogs\" AND keyword=\"cats\"" ))).to.deep.equal( [ [ 'title', '=', [ 'dogs' ] ], 'AND', [ 'keyword', '=', [ 'cats' ] ] ] ) - expect(simplify(cqlParser.getAST( + expect(simplify(parseWithRightCql( "title=\"dogs\" AND keyword=\"cats\" OR author adj \"Bird\"" ))).to.deep.equal( [ - [ - "title", "=", ["dogs"] - ], - "AND", [ [ - "keyword", "=", ["cats"] + "title", "=", ["dogs"] ], + "AND", + [ + "keyword", "=", ["cats"] + ] + ], "OR", [ "author", "adj", ["Bird"] ] ] - ] ) }) it('parses queries with parentheses', function () { - expect() + expect(simplify(parseWithRightCql( + "title=\"dogs\" AND (keyword=\"cats\" OR author adj \"Bird\")" + ))) .to.deep.equal( [ - [ [ 'title', '=', ['dogs'] ], 'AND', [ 'keyword', '=', ['cats'] ] ], + [ 'title', '=', ['dogs'] ], 'AND', [[ 'keyword', '=', ['cats'] ], 'OR', [ 'author', 'adj', [ 'Bird' ] ] ] + ] ) }) }) From 195282892d36962475516b2d4c5973054395a8e6 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 6 Feb 2026 11:59:21 -0500 Subject: [PATCH 51/82] Remove console log and commented code' --- lib/elasticsearch/cql_grammar.js | 1 - lib/elasticsearch/cql_query_builder.js | 9 --------- 2 files changed, 10 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index c9f73d6c..ef87de73 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -39,7 +39,6 @@ const rightCql = reverseGrammar(leftCql) function simplify (ast) { switch (ast.type) { case 'query': { - console.log('query') const children = ast.children.filter(child => child.type !== 'whitespace').map(child => simplify(child)) return children.length > 1 ? children : children[0] } diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 6c2f5505..ed57fbfc 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -73,15 +73,6 @@ function findTopWords (tree) { return tree.children.map(child => findTopWords(child)).flatten() } -// function buildAtomic (scope, relation, term) { -// const request = ApiRequest.fromParams({ -// q: term, -// search_scope: scope -// }) -// const builder = ElasticQueryBuilder.forApiRequest(request) -// return builder.query.toJson() -// } - function nestedMapAndFilter (obj, filter, map) { return Object.assign({}, ...Object.entries( From 5d3bf7db80ca405f5d04a84a49ac998f0c88b97d Mon Sep 17 00:00:00 2001 From: Vera Kahn Date: Fri, 6 Feb 2026 16:44:19 -0500 Subject: [PATCH 52/82] fix gha dep --- .github/workflows/test-and-deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-and-deploy.yml b/.github/workflows/test-and-deploy.yml index 6423821a..90842d52 100644 --- a/.github/workflows/test-and-deploy.yml +++ b/.github/workflows/test-and-deploy.yml @@ -81,7 +81,7 @@ jobs: runs-on: ubuntu-latest needs: - tests - - inte + - integration-test-qa if: github.ref == 'refs/heads/qa2' steps: - name: Checkout repo From ff3261969f4082e2eaa92cbc6142c04b3c727596 Mon Sep 17 00:00:00 2001 From: Vera Kahn Date: Fri, 6 Feb 2026 16:45:44 -0500 Subject: [PATCH 53/82] gha --- .github/workflows/test-and-deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-and-deploy.yml b/.github/workflows/test-and-deploy.yml index 90842d52..afeb5470 100644 --- a/.github/workflows/test-and-deploy.yml +++ b/.github/workflows/test-and-deploy.yml @@ -21,7 +21,7 @@ jobs: contents: read runs-on: ubuntu-latest needs: tests - if: github.ref == 'refs/heads/qa' + if: github.ref == 'refs/heads/qa2' steps: - uses: actions/checkout@v4 - name: Set Node version From ffe1b303f11c75efaca17dd2b0e35fe0a050493f Mon Sep 17 00:00:00 2001 From: Vera Kahn Date: Mon, 9 Feb 2026 10:45:20 -0500 Subject: [PATCH 54/82] update gha invocation --- .github/workflows/test-and-deploy.yml | 2 +- package.json | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test-and-deploy.yml b/.github/workflows/test-and-deploy.yml index afeb5470..579df541 100644 --- a/.github/workflows/test-and-deploy.yml +++ b/.github/workflows/test-and-deploy.yml @@ -33,7 +33,7 @@ jobs: - name: Start service run: ENV=qa npm start & - name: Run tests - run: npm run test-integration + run: node test/integration/delivery-locations-by-barcode.test.js deploy-qa: permissions: id-token: write diff --git a/package.json b/package.json index c4e083b7..8443ec2b 100644 --- a/package.json +++ b/package.json @@ -34,7 +34,6 @@ }, "scripts": { "test": "./node_modules/.bin/standard --env mocha && NODE_ENV=test ./node_modules/.bin/mocha test --exit", - "test-integration": "./node_modules/.bin/mocha test/integration", "start": "node server.js", "deploy-development": "git checkout development && git pull origin development && eb deploy discovery-api-dev --profile nypl-sandbox", "deploy-qa": "git checkout qa && git pull origin qa && eb deploy discovery-api-qa --profile nypl-digital-dev", From c389071aec21660cf839cfbe8eec37fceb9fa393 Mon Sep 17 00:00:00 2001 From: Vera Kahn Date: Mon, 9 Feb 2026 10:58:49 -0500 Subject: [PATCH 55/82] config aws in integration test --- .github/workflows/test-and-deploy.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/test-and-deploy.yml b/.github/workflows/test-and-deploy.yml index 579df541..ca14ff2b 100644 --- a/.github/workflows/test-and-deploy.yml +++ b/.github/workflows/test-and-deploy.yml @@ -23,6 +23,11 @@ jobs: needs: tests if: github.ref == 'refs/heads/qa2' steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: arn:aws:iam::946183545209:role/GithubActionsDeployerRole + aws-region: us-east-1 - uses: actions/checkout@v4 - name: Set Node version uses: actions/setup-node@v4 From 3a838575532cd1e9483d3171d0a5654413285bc9 Mon Sep 17 00:00:00 2001 From: danamansana Date: Mon, 9 Feb 2026 15:39:00 -0500 Subject: [PATCH 56/82] Fix some param passing and start adding query tests --- lib/elasticsearch/cql_query_builder.js | 114 +++++++++++++++---------- test/cql_query_builder.test.js | 13 +++ test/fixtures/cql_fixtures.js | 78 +++++++++++++++++ 3 files changed, 158 insertions(+), 47 deletions(-) create mode 100644 test/cql_query_builder.test.js create mode 100644 test/fixtures/cql_fixtures.js diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index ed57fbfc..a026a229 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -4,9 +4,12 @@ const ApiRequest = require('../api-request') const { indexMapping } = require('./cql/index-mapping') function buildEsQuery (cqlQuery) { - return buildEsQueryFromTree( - parseWithRightCql(cqlQuery) - ) + return { + query: + buildEsQueryFromTree( + parseWithRightCql(cqlQuery) + ) + } } /** @@ -16,18 +19,17 @@ function buildEsQuery (cqlQuery) { function buildEsQueryFromTree (tree) { switch (tree.type) { case 'query': - queries = tree.children.filter(child.type.contains('query')) + queries = tree.children.filter(child => child.type.includes('query')) connectives = tree.children.filter(child => child.type === 'connective') if (connectives.length) { return buildBoolean(connectives[0], queries) } return buildEsQueryFromTree(queries[0]) case 'sub_query': - const query = tree.children.filter(child => child.type.contains('query'))[0] + const query = tree.children.filter(child => child.type.includes('query'))[0] return buildEsQueryFromTree(query) case 'atomic_query': { - const { scope, relation, term, terms } = atomicQueryParams(query) - return buildAtomic(scope, relation, term, terms) + return buildAtomic(atomicQueryParams(tree)) } default: break @@ -53,35 +55,51 @@ function buildNegation (queries) { } } -function atomicQueryParams (query) { +/** + A convienience method that collect the scope, relation, the full query (i.e term), and + all the separate words in the query (i.e. the terms) + */ +function atomicQueryParams (atomic_query) { return { - scope: query.find(child => child.type === 'scope'), - relation: query.find(child => child.type === 'relation'), - term: findTopPhrase(query), - terms: findTopWords(query) + scope: atomic_query.children.find(child => child.type === 'scope').text, + relation: atomic_query.children.find(child => child.type === 'relation').text, + term: findTopPhrase(atomic_query), + terms: findTopWords(atomic_query) } } +/** + Find the highest (i.e. most inclusive) phrase node and return its text + Ex: if the query was keyword="Hamlet Shakespeare", there will be phrase nodes + for Hamlet Shakespeare, Hamlet, and Shakespeare, and this will return Hamlet Shakespeare + */ function findTopPhrase (tree) { if (tree.type === 'phrase') return tree.text const topPhrases = tree.children.map(child => findTopPhrase(child)).filter(x => x) return topPhrases.length ? topPhrases[0] : null } +/** + Return a list of all the words that aren't fragments of larger words + E.g. Hamlet Shakespeare => [Hamlet, Shakespeare], and doesn't include the text + of word nodes for H, Ha, Ham, etc... + */ function findTopWords (tree) { if (tree.type === 'word') return [tree.text] - return tree.children.map(child => findTopWords(child)).flatten() + return tree.children.map(child => findTopWords(child)).flat() } +/** + For an object where the keys are arrays, apply the given filter and map + to each of the arrays + */ function nestedMapAndFilter (obj, filter, map) { return Object.assign({}, - ...Object.entries( - obj - ).filter(filter) // need to modify this to get full query - .map(map) - ) + ...(Object.entries(obj) + .map(([k,v]) => ({[k]: v.filter(filter).map(map) })) + ) + ) } - /** build atomic: - identify the scope fields that match the term @@ -97,47 +115,47 @@ function nestedMapAndFilter (obj, filter, map) { - put all terms in term matches with term fields */ - function buildAtomic (scope, relation, terms, term) { + function buildAtomic ({scope, relation, terms, term}) { const allFields = nestedMapAndFilter( indexMapping[scope], - (k,v) => typeof v === 'string' || v.on(terms), - ([k,v]) => ({[k] : typeof v === 'string' ? v : v.field}) + field => typeof field === 'string' || field.on(term), + field => (typeof field === 'string' ? field : field.field) ) const bibFields = nestedMapAndFilter( allFields, - ([k, v]) => !['items, holdings'].any(prefix => k.startsWith(prefix)), - ([k, v]) => ({[k]: v}) + (field) => !['items, holdings'].some(prefix => field.startsWith(prefix)), + field => field ) const itemFields = nestedMapAndFilter( allFields, - ([k, v]) => k.startsWith('items'), - ([k, v]) => ({[k]: v}) + (field) => field.startsWith('items'), + field => field ) const holdingsFields = nestedMapAndFilter( allFields, - ([k, v]) => k.startsWith('holdings'), - ([k, v]) => ({[k]: v}) + (field) => field.startsWith('holdings'), + field => field ) return { - bool: { // should this start with query? + bool: { should: [ - buildAtomicMain(bibFields, relation, terms, term), - buildAtomicNested('items', itemFields, relation, terms, term), - buildAtomicNested('holdings', holdingsFields, relation, terms, term) + buildAtomicMain({fields: bibFields, relation, terms, term}), + buildAtomicNested('items', {fields: itemFields, relation, terms, term}), + buildAtomicNested('holdings', { fields: holdingsFields, relation, terms, term}) ] } } } - function buildAtomicNested(name, fields, relation, terms, term) { + function buildAtomicNested(name, {fields, relation, terms, term}) { return { nested: { path: name, - query: buildAtomicMain(fields, relation, terms, term) + query: buildAtomicMain({fields, relation, terms, term}) } } } @@ -151,21 +169,21 @@ function nestedMapAndFilter (obj, filter, map) { - put all terms in prefix match with prefix fields - put all terms in term matches with term fields */ - function buildAtomicMain (fields, relation, terms, term) { + function buildAtomicMain ({fields, relation, terms, term}) { return { bool: { should: [ - ...anyAllQueries(fields, relation, terms, term), - ...adjEqQueries(fields, relation, terms, term), - ...termQueriesForTermFields(fields, relation, terms, term), - ...prefixQueriesForPrefixFields(fields, relation, terms, term) + ...anyAllQueries({fields, relation, terms, term}), + ...adjEqQueries({fields, relation, terms, term}), + ...termQueriesForTermFields({fields, relation, terms, term}), + ...prefixQueriesForPrefixFields({fields, relation, terms, term}) ] } } } - function anyAllQueries (fields, relation, terms, term) { - if (!['any', 'all'].contains(relation)) { return [] } + function anyAllQueries ({fields, relation, terms, term}) { + if (!['any', 'all'].includes(relation)) { return [] } const fieldsToUse = fields.fields return [ multiMatch(fieldsToUse, relation, terms.filter(term => !term.startsWith('^'))), @@ -173,8 +191,8 @@ function nestedMapAndFilter (obj, filter, map) { ] } - function adjEqQueries (fields, relation, terms, term) { - if (!['=', 'adj'].contains(relation)) { return [] } + function adjEqQueries ({fields, relation, terms, term}) { + if (!['=', 'adj'].includes(relation)) { return [] } const fieldsToUse = fields.fields return [ term.startsWith('^') ? @@ -183,12 +201,12 @@ function nestedMapAndFilter (obj, filter, map) { ] } - function prefixQueriesForPrefixFields (fields, relation, terms, term) { + function prefixQueriesForPrefixFields ({fields, relation, terms, term}) { if (!fields.prefix) return [] return fields.prefix.map(field => prefixQuery(field, term)) } - function termQueriesForTermFields (fields, relation, terms, term) { + function termQueriesForTermFields ({fields, relation, terms, term}) { if (!fields.term) return [] return fields.term.map(field => termQuery(field, term)) } @@ -204,7 +222,7 @@ function nestedMapAndFilter (obj, filter, map) { function multiMatch (fields, relation, terms) { return { "multi_match": { - "query" : term, + "query" : terms.join(" "), "fields": fields, "type": "cross_fields", "operator": relation === "any" ? "or" : "and" @@ -238,5 +256,7 @@ module.exports = { buildEsQuery, buildEsQueryFromTree, buildBoolean, - buildAtomic + buildAtomic, + buildAtomicMain, + nestedMapAndFilter } diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js new file mode 100644 index 00000000..3d924c62 --- /dev/null +++ b/test/cql_query_builder.test.js @@ -0,0 +1,13 @@ +const { expect } = require('chai') + +const { buildEsQuery } = require('../lib/elasticsearch/cql_query_builder') +const { simpleAnyQuery } = require('./fixtures/cql_fixtures') + +// describe('CQL Query Builder', function () { +// it('Simple = query', function () { +// expect(buildEsQuery("title=\"Hamlet\"")) +// .to.deep.equal( +// simpleAnyQuery +// ) +// }) +// }) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js new file mode 100644 index 00000000..b8318005 --- /dev/null +++ b/test/fixtures/cql_fixtures.js @@ -0,0 +1,78 @@ +const simpleAnyQuery = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet", + "fields": [ + "title", + "title.folded", + "titleAlt.folded", + "uniformTitle.folded", + "titleDisplay.folded", + "seriesStatement.folded", + "contentsTitle.folded", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelSeriesStatement.folded", + "parallelTitleAlt.folded", + "parallelCreatorLiteral.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } +} + +module.exports = { + simpleAnyQuery +} From d2109f529ceea95a2beb40622531990dd67da5b7 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 10 Feb 2026 12:00:15 -0500 Subject: [PATCH 57/82] Add tests for atomic queries and some small corrections --- lib/elasticsearch/cql_query_builder.js | 15 +- test/cql_grammar.test.js | 2 +- test/cql_query_builder.test.js | 93 +++- test/fixtures/cql_fixtures.js | 732 ++++++++++++++++++++++++- 4 files changed, 823 insertions(+), 19 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index a026a229..0054f5e3 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -61,8 +61,8 @@ function buildNegation (queries) { */ function atomicQueryParams (atomic_query) { return { - scope: atomic_query.children.find(child => child.type === 'scope').text, - relation: atomic_query.children.find(child => child.type === 'relation').text, + scope: atomic_query.children.find(child => child.type === 'scope').text.trim(), + relation: atomic_query.children.find(child => child.type === 'relation').text.trim(), term: findTopPhrase(atomic_query), terms: findTopWords(atomic_query) } @@ -124,7 +124,7 @@ function nestedMapAndFilter (obj, filter, map) { const bibFields = nestedMapAndFilter( allFields, - (field) => !['items, holdings'].some(prefix => field.startsWith(prefix)), + (field) => !['items', 'holdings'].some(prefix => field.startsWith(prefix)), field => field ) @@ -187,8 +187,8 @@ function nestedMapAndFilter (obj, filter, map) { const fieldsToUse = fields.fields return [ multiMatch(fieldsToUse, relation, terms.filter(term => !term.startsWith('^'))), - ...(terms.filter(term => term.startsWith('^')).map(term => prefixQuery(fieldsToUse, term.slice(1)))) - ] + ...(terms.filter(term => term.startsWith('^')).map(term => phrasePrefixQuery(fieldsToUse, term.slice(1)))) + ].filter(q => q) } function adjEqQueries ({fields, relation, terms, term}) { @@ -198,7 +198,7 @@ function nestedMapAndFilter (obj, filter, map) { term.startsWith('^') ? phrasePrefixQuery(fieldsToUse, term.slice(1)) : phraseQuery(fieldsToUse, term) - ] + ].filter(q => q) } function prefixQueriesForPrefixFields ({fields, relation, terms, term}) { @@ -220,6 +220,7 @@ function nestedMapAndFilter (obj, filter, map) { } function multiMatch (fields, relation, terms) { + if (!fields) return return { "multi_match": { "query" : terms.join(" "), @@ -231,6 +232,7 @@ function nestedMapAndFilter (obj, filter, map) { } function phrasePrefixQuery (fields, term) { + if (!fields) return return { "multi_match": { "query" : term, @@ -241,6 +243,7 @@ function nestedMapAndFilter (obj, filter, map) { } function phraseQuery (fields, term) { + if (!fields) return return { "multi_match": { "query" : term, diff --git a/test/cql_grammar.test.js b/test/cql_grammar.test.js index cb3ee69d..38eafd70 100644 --- a/test/cql_grammar.test.js +++ b/test/cql_grammar.test.js @@ -25,7 +25,7 @@ function validateBooleanQuery(parsed, expected) { } -describe.only('CQL Grammar', function () { +describe('CQL Grammar', function () { describe('parsing queries', function () { it('parses atomic queries', function () { validateAtomicQuery(parseWithRightCql("title=\"hamlet\""), "title", "=", "\"hamlet\"") diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 3d924c62..088070e0 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -1,13 +1,86 @@ const { expect } = require('chai') const { buildEsQuery } = require('../lib/elasticsearch/cql_query_builder') -const { simpleAnyQuery } = require('./fixtures/cql_fixtures') - -// describe('CQL Query Builder', function () { -// it('Simple = query', function () { -// expect(buildEsQuery("title=\"Hamlet\"")) -// .to.deep.equal( -// simpleAnyQuery -// ) -// }) -// }) +const { + simpleAdjQuery, + simpleAnyQuery, + simpleAllQuery, + prefixPhraseQuery, + anyWithPrefixQuery, + keywordQueryForBarcode, + keywordQueryForShelfMark, + keywordQueryForGeneralTerm, + identifierQuery +} = require('./fixtures/cql_fixtures') + +describe.only('CQL Query Builder', function () { + it('Simple = query', function () { + expect(buildEsQuery("title=\"Hamlet\"")) + .to.deep.equal( + simpleAdjQuery + ) + }) + + it('Simple adj query', function () { + expect(buildEsQuery("title adj \"Hamlet\"")) + .to.deep.equal( + simpleAdjQuery + ) + }) + + it('Simple any query', function () { + expect(buildEsQuery("title any \"Hamlet Othello\"")) + .to.deep.equal( + simpleAnyQuery + ) + }) + + it('Simple all query', function () { + expect(buildEsQuery("title all \"Hamlet Othello\"")) + .to.deep.equal( + simpleAllQuery + ) + }) + + it('Prefix phrase query', function () { + expect(buildEsQuery("title = \"^The Tragedy of Hamlet, Prince of Denmark\"")) + .to.deep.equal( + prefixPhraseQuery + ) + }) + + it('Prefix queries mixed into any query', function () { + expect(buildEsQuery("title any \"^Tragedy ^Comedy Hamlet Othello\"")) + .to.deep.equal( + anyWithPrefixQuery + ) + }) + + it('Keyword query for barcode', function () { + expect(buildEsQuery("keyword = \"123456\"")) + .to.deep.equal( + keywordQueryForBarcode + ) + }) + + it('Keyword query for shelfMark', function () { + expect(buildEsQuery("keyword = \"B 12\"")) + .to.deep.equal( + keywordQueryForShelfMark + ) + }) + + it('Keyword query for general term', function () { + expect(buildEsQuery("keyword = \"Hamlet\"")) + .to.deep.equal( + keywordQueryForGeneralTerm + ) + }) + + it('Identifier query', function () { + expect(buildEsQuery("identifier = \"b1234\"")) + .to.deep.equal( + identifierQuery + ) + }) +}) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index b8318005..b5fb21eb 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -1,4 +1,4 @@ -const simpleAnyQuery = { +const simpleAdjQuery = { "query": { "bool": { "should": [ @@ -73,6 +73,734 @@ const simpleAnyQuery = { } } +const prefixPhraseQuery = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "The Tragedy of Hamlet, Prince of Denmark", + "fields": [ + "title", + "title.folded", + "titleAlt.folded", + "uniformTitle.folded", + "titleDisplay.folded", + "seriesStatement.folded", + "contentsTitle.folded", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelSeriesStatement.folded", + "parallelTitleAlt.folded", + "parallelCreatorLiteral.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle" + ], + "type": "phrase_prefix" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "The Tragedy of Hamlet, Prince of Denmark", + "fields": [], + "type": "phrase_prefix" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "The Tragedy of Hamlet, Prince of Denmark", + "fields": [], + "type": "phrase_prefix" + } + } + ] + } + } + } + } + ] + } + } +} + +const simpleAnyQuery = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [ + "title", + "title.folded", + "titleAlt.folded", + "uniformTitle.folded", + "titleDisplay.folded", + "seriesStatement.folded", + "contentsTitle.folded", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelSeriesStatement.folded", + "parallelTitleAlt.folded", + "parallelCreatorLiteral.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle" + ], + "type": "cross_fields", + "operator": "or" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [], + "type": "cross_fields", + "operator": "or" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [], + "type": "cross_fields", + "operator": "or" + } + } + ] + } + } + } + } + ] + } + } +} + +const anyWithPrefixQuery = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [ + "title", + "title.folded", + "titleAlt.folded", + "uniformTitle.folded", + "titleDisplay.folded", + "seriesStatement.folded", + "contentsTitle.folded", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelSeriesStatement.folded", + "parallelTitleAlt.folded", + "parallelCreatorLiteral.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle" + ], + "type": "cross_fields", + "operator": "or" + } + }, + { + "multi_match": { + "query": "Tragedy", + "fields": [ + "title", + "title.folded", + "titleAlt.folded", + "uniformTitle.folded", + "titleDisplay.folded", + "seriesStatement.folded", + "contentsTitle.folded", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelSeriesStatement.folded", + "parallelTitleAlt.folded", + "parallelCreatorLiteral.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle" + ], + "type": "phrase_prefix" + } + }, + { + "multi_match": { + "query": "Comedy", + "fields": [ + "title", + "title.folded", + "titleAlt.folded", + "uniformTitle.folded", + "titleDisplay.folded", + "seriesStatement.folded", + "contentsTitle.folded", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelSeriesStatement.folded", + "parallelTitleAlt.folded", + "parallelCreatorLiteral.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle" + ], + "type": "phrase_prefix" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [], + "type": "cross_fields", + "operator": "or" + } + }, + { + "multi_match": { + "query": "Tragedy", + "fields": [], + "type": "phrase_prefix" + } + }, + { + "multi_match": { + "query": "Comedy", + "fields": [], + "type": "phrase_prefix" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [], + "type": "cross_fields", + "operator": "or" + } + }, + { + "multi_match": { + "query": "Tragedy", + "fields": [], + "type": "phrase_prefix" + } + }, + { + "multi_match": { + "query": "Comedy", + "fields": [], + "type": "phrase_prefix" + } + } + ] + } + } + } + } + ] + } + } +} + +const simpleAllQuery = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [ + "title", + "title.folded", + "titleAlt.folded", + "uniformTitle.folded", + "titleDisplay.folded", + "seriesStatement.folded", + "contentsTitle.folded", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelSeriesStatement.folded", + "parallelTitleAlt.folded", + "parallelCreatorLiteral.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle" + ], + "type": "cross_fields", + "operator": "and" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [], + "type": "cross_fields", + "operator": "and" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [], + "type": "cross_fields", + "operator": "and" + } + } + ] + } + } + } + } + ] + } + } +} + +const keywordQueryForBarcode = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "123456", + "fields": [ + "title", + "title.folded", + "description.foldedStemmed", + "subjectLiteral", + "subjectLiteral.folded", + "creatorLiteral", + "creatorLiteral.folded", + "contributorLiteral.folded", + "note.label.foldedStemmed", + "publisherLiteral.folded", + "seriesStatement.folded", + "titleAlt.folded", + "titleDisplay.folded", + "contentsTitle.folded", + "tableOfContents.folded", + "genreForm", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelTitleAlt.folded", + "parallelSeriesStatement.folded", + "parallelCreatorLiteral.folded", + "parallelPublisher", + "parallelPublisherLiteral", + "uniformTitle.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle", + "placeOfPublication.folded" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "123456", + "fields": [ + "items.idBarcode" + ], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "123456", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } +} + +const keywordQueryForShelfMark = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "B 12", + "fields": [ + "title", + "title.folded", + "description.foldedStemmed", + "subjectLiteral", + "subjectLiteral.folded", + "creatorLiteral", + "creatorLiteral.folded", + "contributorLiteral.folded", + "note.label.foldedStemmed", + "publisherLiteral.folded", + "seriesStatement.folded", + "titleAlt.folded", + "titleDisplay.folded", + "contentsTitle.folded", + "tableOfContents.folded", + "genreForm", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelTitleAlt.folded", + "parallelSeriesStatement.folded", + "parallelCreatorLiteral.folded", + "parallelPublisher", + "parallelPublisherLiteral", + "uniformTitle.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle", + "placeOfPublication.folded" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "B 12", + "fields": [ + "items.shelfMark" + ], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "B 12", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } +} + +const keywordQueryForGeneralTerm = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet", + "fields": [ + "title", + "title.folded", + "description.foldedStemmed", + "subjectLiteral", + "subjectLiteral.folded", + "creatorLiteral", + "creatorLiteral.folded", + "contributorLiteral.folded", + "note.label.foldedStemmed", + "publisherLiteral.folded", + "seriesStatement.folded", + "titleAlt.folded", + "titleDisplay.folded", + "contentsTitle.folded", + "tableOfContents.folded", + "genreForm", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelTitleAlt.folded", + "parallelSeriesStatement.folded", + "parallelCreatorLiteral.folded", + "parallelPublisher", + "parallelPublisherLiteral", + "uniformTitle.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle", + "placeOfPublication.folded" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } +} + +const identifierQuery = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "term": { + "uri": "b1234" + } + }, + { + "term": { + "idIsbn.clean": "b1234" + } + }, + { + "term": { + "idIssn.clean": "b1234" + } + }, + { + "prefix": { + "identifierV2.value": "b1234" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "term": { + "items.idBarcode": "b1234" + } + }, + { + "prefix": { + "items.shelfMark.keywordLowercased": "b1234" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [] + } + } + } + } + ] + } + } +} + + module.exports = { - simpleAnyQuery + simpleAdjQuery, + simpleAnyQuery, + simpleAllQuery, + prefixPhraseQuery, + anyWithPrefixQuery, + keywordQueryForBarcode, + keywordQueryForShelfMark, + keywordQueryForGeneralTerm, + identifierQuery } From baf803c0b99d37d6dc13fef66a16061b23fbb5dc Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 10 Feb 2026 12:56:34 -0500 Subject: [PATCH 58/82] Add initial boolean tests --- lib/elasticsearch/cql/index-mapping.js | 10 +- lib/elasticsearch/cql_query_builder.js | 4 +- test/cql_query_builder.test.js | 30 +- test/fixtures/cql_fixtures.js | 514 ++++++++++++++++++++++++- 4 files changed, 549 insertions(+), 9 deletions(-) diff --git a/lib/elasticsearch/cql/index-mapping.js b/lib/elasticsearch/cql/index-mapping.js index f0f19a9a..758deddc 100644 --- a/lib/elasticsearch/cql/index-mapping.js +++ b/lib/elasticsearch/cql/index-mapping.js @@ -68,15 +68,15 @@ const indexMapping = { subject: { fields: ['subjectLiteral', 'subjectLiteral.folded', 'parallelSubjectLiteral.folded'] }, - language: { field: ['language.id', 'language.label'] }, + language: { fields: ['language.id', 'language.label'] }, date: {}, series: { fields: ['series', 'parallelSeries'] }, - genre: { field: ['genreForm.raw'] }, - center: { field: ['buildingLocationIds'] }, - division: { field: ['collectionIds'] }, - format: { field: ['formatId'] } + genre: { fields: ['genreForm.raw'] }, + center: { fields: ['buildingLocationIds'] }, + division: { fields: ['collectionIds'] }, + format: { fields: ['formatId'] } } module.exports = { diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 0054f5e3..d2fa32e9 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -22,7 +22,7 @@ function buildEsQueryFromTree (tree) { queries = tree.children.filter(child => child.type.includes('query')) connectives = tree.children.filter(child => child.type === 'connective') if (connectives.length) { - return buildBoolean(connectives[0], queries) + return buildBoolean(connectives[0].text, queries) } return buildEsQueryFromTree(queries[0]) case 'sub_query': @@ -38,7 +38,7 @@ function buildEsQueryFromTree (tree) { function buildBoolean (operator, queries) { if (operator === "NOT") return buildNegation(queries) - const esOperator = operator === 'and' ? 'must' : 'should' + const esOperator = operator === 'AND' ? 'must' : 'should' return { bool: { [esOperator]: queries.map(query => buildEsQueryFromTree(query)) diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 088070e0..14f8e47b 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -10,7 +10,10 @@ const { keywordQueryForBarcode, keywordQueryForShelfMark, keywordQueryForGeneralTerm, - identifierQuery + identifierQuery, + binaryBooleanQuery, + ternaryBooleanQuery, + queryWithParentheses } = require('./fixtures/cql_fixtures') describe.only('CQL Query Builder', function () { @@ -83,4 +86,29 @@ describe.only('CQL Query Builder', function () { identifierQuery ) }) + + it('Binary boolean query', function () { + expect(buildEsQuery("author = \"Shakespeare\" AND language = \"English\"")) + .to.deep.equal( + binaryBooleanQuery + ) + }) + + it('Ternary boolean query', function () { + expect(buildEsQuery("author = \"Shakespeare\" AND language = \"English\" OR genre = \"tragedy\"")) + .to.deep.equal( + ternaryBooleanQuery + ) + }) + + it('Boolean query with parentheses', function () { + expect(buildEsQuery("author = \"Shakespeare\" AND (language = \"English\" OR genre = \"tragedy\")")) + .to.deep.equal( + queryWithParentheses + ) + }) + + it('Query with NOT') + + it('Query with AND NOT') }) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index b5fb21eb..e4183d3e 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -792,6 +792,515 @@ const identifierQuery = { } } +const binaryBooleanQuery = { + "query": { + "bool": { + "must": [ + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [ + "creatorLiteral", + "creatorLiteral.folded", + "contributorLiteral.folded", + "parallelCreatorLiteral.folded", + "parallelContributorLiteral.folded" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + }, + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [ + "language.id", + "language.label" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } + ] + } + } +} + +const ternaryBooleanQuery = { + "query": { + "bool": { + "should": [ + { + "bool": { + "must": [ + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [ + "creatorLiteral", + "creatorLiteral.folded", + "contributorLiteral.folded", + "parallelCreatorLiteral.folded", + "parallelContributorLiteral.folded" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + }, + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [ + "language.id", + "language.label" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } + ] + } + }, + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "tragedy", + "fields": [ + "genreForm.raw" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "tragedy", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "tragedy", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } + ] + } + } +} + +const queryWithParentheses = { + "query": { + "bool": { + "must": [ + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [ + "creatorLiteral", + "creatorLiteral.folded", + "contributorLiteral.folded", + "parallelCreatorLiteral.folded", + "parallelContributorLiteral.folded" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + }, + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [ + "language.id", + "language.label" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + }, + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "tragedy", + "fields": [ + "genreForm.raw" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "tragedy", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "tragedy", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } + ] + } + } + ] + } + } +} module.exports = { simpleAdjQuery, @@ -802,5 +1311,8 @@ module.exports = { keywordQueryForBarcode, keywordQueryForShelfMark, keywordQueryForGeneralTerm, - identifierQuery + identifierQuery, + binaryBooleanQuery, + ternaryBooleanQuery, + queryWithParentheses } From 8842a881fea2b438741f3c25ad4450df35bbd7ac Mon Sep 17 00:00:00 2001 From: danamansana Date: Wed, 11 Feb 2026 14:07:31 -0500 Subject: [PATCH 59/82] Add tests for negation --- test/cql_query_builder.test.js | 17 ++++- test/fixtures/cql_fixtures.js | 133 ++++++++++++++++++++++++++++++++- 2 files changed, 146 insertions(+), 4 deletions(-) diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 14f8e47b..33f088d4 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -13,7 +13,8 @@ const { identifierQuery, binaryBooleanQuery, ternaryBooleanQuery, - queryWithParentheses + queryWithParentheses, + negationQuery } = require('./fixtures/cql_fixtures') describe.only('CQL Query Builder', function () { @@ -108,7 +109,17 @@ describe.only('CQL Query Builder', function () { ) }) - it('Query with NOT') + it('Query with NOT', function () { + expect(buildEsQuery("author = \"Shakespeare\" NOT language = \"English\"")) + .to.deep.equal( + negationQuery + ) + }) - it('Query with AND NOT') + it('Query with AND NOT', function () { + expect(buildEsQuery("author = \"Shakespeare\" AND NOT language = \"English\"")) + .to.deep.equal( + negationQuery + ) + }) }) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index e4183d3e..101ff7be 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -1302,6 +1302,136 @@ const queryWithParentheses = { } } +const negationQuery = { + "query": { + "bool": { + "must": [ + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [ + "creatorLiteral", + "creatorLiteral.folded", + "contributorLiteral.folded", + "parallelCreatorLiteral.folded", + "parallelContributorLiteral.folded" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } + ], + "must_not": [ + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [ + "language.id", + "language.label" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } + ] + } + } +} + module.exports = { simpleAdjQuery, simpleAnyQuery, @@ -1314,5 +1444,6 @@ module.exports = { identifierQuery, binaryBooleanQuery, ternaryBooleanQuery, - queryWithParentheses + queryWithParentheses, + negationQuery } From fd8ff377447f249ddf14c7be557f858859b47781 Mon Sep 17 00:00:00 2001 From: danamansana Date: Wed, 11 Feb 2026 14:30:10 -0500 Subject: [PATCH 60/82] Fix linting/tests/small errors --- lib/elasticsearch/cql_grammar.js | 16 +- lib/elasticsearch/cql_query_builder.js | 250 ++-- test/cql_grammar.test.js | 95 +- test/cql_query_builder.test.js | 32 +- test/fixtures/cql_fixtures.js | 1570 ++++++++++++------------ 5 files changed, 978 insertions(+), 985 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index ef87de73..9d7700bb 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -1,17 +1,17 @@ const { Grammars } = require('ebnf') function reverseGrammar (grammar) { - return grammar.split("\n") + return grammar.split('\n') .map(line => - (line.split("::=") + (line.split('::=') .map(side => - (side.split("|") + (side.split('|') .map(dis => - (dis.split(" ") + (dis.split(' ') .map(word => - (word.includes("\"") ? word.split("").reverse().join("") : word)) - .reverse().join(" ")) - ).join("|"))).join("::= "))).join("\n") + (word.includes('"') ? word.split('').reverse().join('') : word)) + .reverse().join(' ')) + ).join('|'))).join('::= '))).join('\n') } const leftCql = ` @@ -71,7 +71,7 @@ function simplify (ast) { } function reverseString (string) { - return string.split("").reverse().join("") + return string.split('').reverse().join('') } function reverseAST (tree) { diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index d2fa32e9..fc5643eb 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -1,6 +1,4 @@ const { parseWithRightCql } = require('./cql_grammar') -const ElasticQueryBuilder = require('./elastic-query-builder') -const ApiRequest = require('../api-request') const { indexMapping } = require('./cql/index-mapping') function buildEsQuery (cqlQuery) { @@ -18,16 +16,18 @@ function buildEsQuery (cqlQuery) { function buildEsQueryFromTree (tree) { switch (tree.type) { - case 'query': - queries = tree.children.filter(child => child.type.includes('query')) - connectives = tree.children.filter(child => child.type === 'connective') + case 'query': { + const queries = tree.children.filter(child => child.type.includes('query')) + const connectives = tree.children.filter(child => child.type === 'connective') if (connectives.length) { return buildBoolean(connectives[0].text, queries) } return buildEsQueryFromTree(queries[0]) - case 'sub_query': + } + case 'sub_query': { const query = tree.children.filter(child => child.type.includes('query'))[0] return buildEsQueryFromTree(query) + } case 'atomic_query': { return buildAtomic(atomicQueryParams(tree)) } @@ -37,7 +37,7 @@ function buildEsQueryFromTree (tree) { } function buildBoolean (operator, queries) { - if (operator === "NOT") return buildNegation(queries) + if (['NOT', 'AND NOT'].includes(operator)) return buildNegation(queries) const esOperator = operator === 'AND' ? 'must' : 'should' return { bool: { @@ -59,12 +59,12 @@ function buildNegation (queries) { A convienience method that collect the scope, relation, the full query (i.e term), and all the separate words in the query (i.e. the terms) */ -function atomicQueryParams (atomic_query) { +function atomicQueryParams (atomicQuery) { return { - scope: atomic_query.children.find(child => child.type === 'scope').text.trim(), - relation: atomic_query.children.find(child => child.type === 'relation').text.trim(), - term: findTopPhrase(atomic_query), - terms: findTopWords(atomic_query) + scope: atomicQuery.children.find(child => child.type === 'scope').text.trim(), + relation: atomicQuery.children.find(child => child.type === 'relation').text.trim(), + term: findTopPhrase(atomicQuery), + terms: findTopWords(atomicQuery) } } @@ -96,9 +96,9 @@ function findTopWords (tree) { function nestedMapAndFilter (obj, filter, map) { return Object.assign({}, ...(Object.entries(obj) - .map(([k,v]) => ({[k]: v.filter(filter).map(map) })) - ) + .map(([k, v]) => ({ [k]: v.filter(filter).map(map) })) ) + ) } /** build atomic: @@ -115,52 +115,52 @@ function nestedMapAndFilter (obj, filter, map) { - put all terms in term matches with term fields */ - function buildAtomic ({scope, relation, terms, term}) { - const allFields = nestedMapAndFilter( - indexMapping[scope], - field => typeof field === 'string' || field.on(term), - field => (typeof field === 'string' ? field : field.field) - ) +function buildAtomic ({ scope, relation, terms, term }) { + const allFields = nestedMapAndFilter( + indexMapping[scope], + field => typeof field === 'string' || field.on(term), + field => (typeof field === 'string' ? field : field.field) + ) - const bibFields = nestedMapAndFilter( - allFields, - (field) => !['items', 'holdings'].some(prefix => field.startsWith(prefix)), - field => field - ) + const bibFields = nestedMapAndFilter( + allFields, + (field) => !['items', 'holdings'].some(prefix => field.startsWith(prefix)), + field => field + ) - const itemFields = nestedMapAndFilter( - allFields, - (field) => field.startsWith('items'), - field => field - ) + const itemFields = nestedMapAndFilter( + allFields, + (field) => field.startsWith('items'), + field => field + ) - const holdingsFields = nestedMapAndFilter( - allFields, - (field) => field.startsWith('holdings'), - field => field - ) + const holdingsFields = nestedMapAndFilter( + allFields, + (field) => field.startsWith('holdings'), + field => field + ) - return { - bool: { - should: [ - buildAtomicMain({fields: bibFields, relation, terms, term}), - buildAtomicNested('items', {fields: itemFields, relation, terms, term}), - buildAtomicNested('holdings', { fields: holdingsFields, relation, terms, term}) - ] - } + return { + bool: { + should: [ + buildAtomicMain({ fields: bibFields, relation, terms, term }), + buildAtomicNested('items', { fields: itemFields, relation, terms, term }), + buildAtomicNested('holdings', { fields: holdingsFields, relation, terms, term }) + ] } - } + } +} - function buildAtomicNested(name, {fields, relation, terms, term}) { - return { - nested: { - path: name, - query: buildAtomicMain({fields, relation, terms, term}) - } - } - } +function buildAtomicNested (name, { fields, relation, terms, term }) { + return { + nested: { + path: name, + query: buildAtomicMain({ fields, relation, terms, term }) + } + } +} - /** +/** - main: - if operator is any/all, take all query terms not starting with ^ and put in multi-match with all regular fields - if operator is any/all, take all query terms starting with ^ and put in prefix with all regular fields @@ -169,91 +169,89 @@ function nestedMapAndFilter (obj, filter, map) { - put all terms in prefix match with prefix fields - put all terms in term matches with term fields */ - function buildAtomicMain ({fields, relation, terms, term}) { - return { - bool: { - should: [ - ...anyAllQueries({fields, relation, terms, term}), - ...adjEqQueries({fields, relation, terms, term}), - ...termQueriesForTermFields({fields, relation, terms, term}), - ...prefixQueriesForPrefixFields({fields, relation, terms, term}) - ] - } - } - } +function buildAtomicMain ({ fields, relation, terms, term }) { + return { + bool: { + should: [ + ...anyAllQueries({ fields, relation, terms, term }), + ...adjEqQueries({ fields, relation, terms, term }), + ...termQueriesForTermFields({ fields, relation, terms, term }), + ...prefixQueriesForPrefixFields({ fields, relation, terms, term }) + ] + } + } +} - function anyAllQueries ({fields, relation, terms, term}) { - if (!['any', 'all'].includes(relation)) { return [] } - const fieldsToUse = fields.fields - return [ - multiMatch(fieldsToUse, relation, terms.filter(term => !term.startsWith('^'))), - ...(terms.filter(term => term.startsWith('^')).map(term => phrasePrefixQuery(fieldsToUse, term.slice(1)))) - ].filter(q => q) - } +function anyAllQueries ({ fields, relation, terms, term }) { + if (!['any', 'all'].includes(relation)) { return [] } + const fieldsToUse = fields.fields + return [ + multiMatch(fieldsToUse, relation, terms.filter(term => !term.startsWith('^'))), + ...(terms.filter(term => term.startsWith('^')).map(term => phrasePrefixQuery(fieldsToUse, term.slice(1)))) + ].filter(q => q) +} - function adjEqQueries ({fields, relation, terms, term}) { - if (!['=', 'adj'].includes(relation)) { return [] } - const fieldsToUse = fields.fields - return [ - term.startsWith('^') ? - phrasePrefixQuery(fieldsToUse, term.slice(1)) : - phraseQuery(fieldsToUse, term) - ].filter(q => q) - } +function adjEqQueries ({ fields, relation, terms, term }) { + if (!['=', 'adj'].includes(relation)) { return [] } + const fieldsToUse = fields.fields + return [ + term.startsWith('^') + ? phrasePrefixQuery(fieldsToUse, term.slice(1)) + : phraseQuery(fieldsToUse, term) + ].filter(q => q) +} - function prefixQueriesForPrefixFields ({fields, relation, terms, term}) { - if (!fields.prefix) return [] - return fields.prefix.map(field => prefixQuery(field, term)) - } +function prefixQueriesForPrefixFields ({ fields, relation, terms, term }) { + if (!fields.prefix) return [] + return fields.prefix.map(field => prefixQuery(field, term)) +} - function termQueriesForTermFields ({fields, relation, terms, term}) { - if (!fields.term) return [] - return fields.term.map(field => termQuery(field, term)) - } +function termQueriesForTermFields ({ fields, relation, terms, term }) { + if (!fields.term) return [] + return fields.term.map(field => termQuery(field, term)) +} - function termQuery (field, term) { - return { "term" : { [field] : term } } - } +function termQuery (field, term) { + return { term: { [field]: term } } +} - function prefixQuery (field, term) { - return { "prefix" : { [field] : term } } - } +function prefixQuery (field, term) { + return { prefix: { [field]: term } } +} - function multiMatch (fields, relation, terms) { +function multiMatch (fields, relation, terms) { if (!fields) return - return { - "multi_match": { - "query" : terms.join(" "), - "fields": fields, - "type": "cross_fields", - "operator": relation === "any" ? "or" : "and" - } - } - } + return { + multi_match: { + query: terms.join(' '), + fields, + type: 'cross_fields', + operator: relation === 'any' ? 'or' : 'and' + } + } +} - function phrasePrefixQuery (fields, term) { +function phrasePrefixQuery (fields, term) { if (!fields) return - return { - "multi_match": { - "query" : term, - "fields": fields, - "type": "phrase_prefix" - } - } - } + return { + multi_match: { + query: term, + fields, + type: 'phrase_prefix' + } + } +} - function phraseQuery (fields, term) { +function phraseQuery (fields, term) { if (!fields) return - return { - "multi_match": { - "query" : term, - "fields": fields, - "type": "phrase" - } - } - } - - + return { + multi_match: { + query: term, + fields, + type: 'phrase' + } + } +} module.exports = { buildEsQuery, diff --git a/test/cql_grammar.test.js b/test/cql_grammar.test.js index 38eafd70..24d5d5d6 100644 --- a/test/cql_grammar.test.js +++ b/test/cql_grammar.test.js @@ -1,69 +1,64 @@ const { expect } = require('chai') -const { cqlParser, simplify, parseWithRightCql } = require('../lib/elasticsearch/cql_grammar') +const { simplify, parseWithRightCql } = require('../lib/elasticsearch/cql_grammar') - -function validateAtomicQuery(parsed, scope, relation, quotedTerm) { - expect(parsed.type).to.equal("query") +function validateAtomicQuery (parsed, scope, relation, quotedTerm) { + expect(parsed.type).to.equal('query') expect(parsed.children.length).to.equal(1) const subQuery = parsed.children[0] - expect(subQuery.type).to.equal("sub_query") + expect(subQuery.type).to.equal('sub_query') expect(subQuery.children.length).to.equal(1) const atomicQuery = subQuery.children[0] - expect(atomicQuery.type).to.equal("atomic_query") - const scopeNode = atomicQuery.children.find(child => child.type === "scope") - const scopeTerm = scopeNode.children.find(child => child.type === "scope_term") + expect(atomicQuery.type).to.equal('atomic_query') + const scopeNode = atomicQuery.children.find(child => child.type === 'scope') + const scopeTerm = scopeNode.children.find(child => child.type === 'scope_term') expect(scopeTerm.text).to.equal(scope) - const relationNode = atomicQuery.children.find(child => child.type === "relation") - const relationTerm = relationNode.children.find(child => child.type === "relation_term") + const relationNode = atomicQuery.children.find(child => child.type === 'relation') + const relationTerm = relationNode.children.find(child => child.type === 'relation_term') expect(relationTerm.text).to.equal(relation) - const quotedTermNode = atomicQuery.children.find(child => child.type === "quoted_term") + const quotedTermNode = atomicQuery.children.find(child => child.type === 'quoted_term') expect(quotedTermNode.text).to.equal(quotedTerm) } -function validateBooleanQuery(parsed, expected) { - -} - -describe('CQL Grammar', function () { +describe('CQL Grammar', function () { describe('parsing queries', function () { it('parses atomic queries', function () { - validateAtomicQuery(parseWithRightCql("title=\"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") - validateAtomicQuery(parseWithRightCql("keyword any \"hamlet shakespeare\""), "keyword", "any", "\"hamlet shakespeare\"") - validateAtomicQuery(parseWithRightCql("subject all \"hamlet shakespeare\""), "subject", "all", "\"hamlet shakespeare\"") + validateAtomicQuery(parseWithRightCql('title="hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + validateAtomicQuery(parseWithRightCql('keyword any "hamlet shakespeare"'), 'keyword', 'any', '"hamlet shakespeare"') + validateAtomicQuery(parseWithRightCql('subject all "hamlet shakespeare"'), 'subject', 'all', '"hamlet shakespeare"') }) it('allows whitespace variants', function () { - validateAtomicQuery(parseWithRightCql("title =\"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(parseWithRightCql("title= \"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(parseWithRightCql("title = \"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(parseWithRightCql("title = \"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(parseWithRightCql("title = \"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") - validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") - validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") - validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(parseWithRightCql('title ="hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('title= "hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('title = "hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('title = "hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('title = "hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') }) it('correctly escapes escape characters', function () { - validateAtomicQuery(parseWithRightCql("keyword=\"Notes on \\\"The Underground\\\"\""), "keyword", "=", "\"Notes on \\\"The Underground\\\"\"") - validateAtomicQuery(parseWithRightCql("title=\"This title ends in a slash \\\\\""), "title", "=", "\"This title ends in a slash \\\\\"") + validateAtomicQuery(parseWithRightCql('keyword="Notes on \\"The Underground\\""'), 'keyword', '=', '"Notes on \\"The Underground\\""') + validateAtomicQuery(parseWithRightCql('title="This title ends in a slash \\\\"'), 'title', '=', '"This title ends in a slash \\\\"') }) it('identifies words correctly', function () { - const parsed = parseWithRightCql("keyword adj \"A multiword keyword\"") + const parsed = parseWithRightCql('keyword adj "A multiword keyword"') const words = [] let nodes = [parsed] while (nodes.length) { - let node = nodes.shift() - if (node.type === "word") { + const node = nodes.shift() + if (node.type === 'word') { words.push(node.text) } else { nodes = nodes.concat(node.children) } } - const expectedWords = ["A", "multiword", "keyword"] + const expectedWords = ['A', 'multiword', 'keyword'] words.forEach(word => { expect(expectedWords).to.include(word) }) @@ -72,43 +67,43 @@ describe('CQL Grammar', function () { it('parses boolean queries', function () { expect(simplify(parseWithRightCql( - "title=\"dogs\" AND keyword=\"cats\"" + 'title="dogs" AND keyword="cats"' ))).to.deep.equal( - [ [ 'title', '=', [ 'dogs' ] ], 'AND', [ 'keyword', '=', [ 'cats' ] ] ] + [['title', '=', ['dogs']], 'AND', ['keyword', '=', ['cats']]] ) expect(simplify(parseWithRightCql( - "title=\"dogs\" AND keyword=\"cats\" OR author adj \"Bird\"" + 'title="dogs" AND keyword="cats" OR author adj "Bird"' ))).to.deep.equal( [ [ [ - "title", "=", ["dogs"] + 'title', '=', ['dogs'] ], - "AND", + 'AND', [ - "keyword", "=", ["cats"] + 'keyword', '=', ['cats'] ] ], - "OR", - [ - "author", "adj", ["Bird"] - ] + 'OR', + [ + 'author', 'adj', ['Bird'] ] + ] ) }) it('parses queries with parentheses', function () { expect(simplify(parseWithRightCql( - "title=\"dogs\" AND (keyword=\"cats\" OR author adj \"Bird\")" + 'title="dogs" AND (keyword="cats" OR author adj "Bird")' ))) .to.deep.equal( [ - [ 'title', '=', ['dogs'] ], 'AND', [[ 'keyword', '=', ['cats'] ], - 'OR', - [ 'author', 'adj', [ 'Bird' ] ] + ['title', '=', ['dogs']], 'AND', [['keyword', '=', ['cats']], + 'OR', + ['author', 'adj', ['Bird']] + ] ] - ] ) }) }) diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 33f088d4..0dfdc599 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -17,107 +17,107 @@ const { negationQuery } = require('./fixtures/cql_fixtures') -describe.only('CQL Query Builder', function () { +describe('CQL Query Builder', function () { it('Simple = query', function () { - expect(buildEsQuery("title=\"Hamlet\"")) + expect(buildEsQuery('title="Hamlet"')) .to.deep.equal( simpleAdjQuery ) }) it('Simple adj query', function () { - expect(buildEsQuery("title adj \"Hamlet\"")) + expect(buildEsQuery('title adj "Hamlet"')) .to.deep.equal( simpleAdjQuery ) }) it('Simple any query', function () { - expect(buildEsQuery("title any \"Hamlet Othello\"")) + expect(buildEsQuery('title any "Hamlet Othello"')) .to.deep.equal( simpleAnyQuery ) }) it('Simple all query', function () { - expect(buildEsQuery("title all \"Hamlet Othello\"")) + expect(buildEsQuery('title all "Hamlet Othello"')) .to.deep.equal( simpleAllQuery ) }) it('Prefix phrase query', function () { - expect(buildEsQuery("title = \"^The Tragedy of Hamlet, Prince of Denmark\"")) + expect(buildEsQuery('title = "^The Tragedy of Hamlet, Prince of Denmark"')) .to.deep.equal( prefixPhraseQuery ) }) it('Prefix queries mixed into any query', function () { - expect(buildEsQuery("title any \"^Tragedy ^Comedy Hamlet Othello\"")) + expect(buildEsQuery('title any "^Tragedy ^Comedy Hamlet Othello"')) .to.deep.equal( anyWithPrefixQuery ) }) it('Keyword query for barcode', function () { - expect(buildEsQuery("keyword = \"123456\"")) + expect(buildEsQuery('keyword = "123456"')) .to.deep.equal( keywordQueryForBarcode ) }) it('Keyword query for shelfMark', function () { - expect(buildEsQuery("keyword = \"B 12\"")) + expect(buildEsQuery('keyword = "B 12"')) .to.deep.equal( keywordQueryForShelfMark ) }) it('Keyword query for general term', function () { - expect(buildEsQuery("keyword = \"Hamlet\"")) + expect(buildEsQuery('keyword = "Hamlet"')) .to.deep.equal( keywordQueryForGeneralTerm ) }) it('Identifier query', function () { - expect(buildEsQuery("identifier = \"b1234\"")) + expect(buildEsQuery('identifier = "b1234"')) .to.deep.equal( identifierQuery ) }) it('Binary boolean query', function () { - expect(buildEsQuery("author = \"Shakespeare\" AND language = \"English\"")) + expect(buildEsQuery('author = "Shakespeare" AND language = "English"')) .to.deep.equal( binaryBooleanQuery ) }) it('Ternary boolean query', function () { - expect(buildEsQuery("author = \"Shakespeare\" AND language = \"English\" OR genre = \"tragedy\"")) + expect(buildEsQuery('author = "Shakespeare" AND language = "English" OR genre = "tragedy"')) .to.deep.equal( ternaryBooleanQuery ) }) it('Boolean query with parentheses', function () { - expect(buildEsQuery("author = \"Shakespeare\" AND (language = \"English\" OR genre = \"tragedy\")")) + expect(buildEsQuery('author = "Shakespeare" AND (language = "English" OR genre = "tragedy")')) .to.deep.equal( queryWithParentheses ) }) it('Query with NOT', function () { - expect(buildEsQuery("author = \"Shakespeare\" NOT language = \"English\"")) + expect(buildEsQuery('author = "Shakespeare" NOT language = "English"')) .to.deep.equal( negationQuery ) }) it('Query with AND NOT', function () { - expect(buildEsQuery("author = \"Shakespeare\" AND NOT language = \"English\"")) + expect(buildEsQuery('author = "Shakespeare" AND NOT language = "English"')) .to.deep.equal( negationQuery ) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index 101ff7be..3c702c1f 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -1,48 +1,48 @@ const simpleAdjQuery = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Hamlet", - "fields": [ - "title", - "title.folded", - "titleAlt.folded", - "uniformTitle.folded", - "titleDisplay.folded", - "seriesStatement.folded", - "contentsTitle.folded", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelSeriesStatement.folded", - "parallelTitleAlt.folded", - "parallelCreatorLiteral.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle" + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Hamlet', + fields: [], + type: 'phrase' } } ] @@ -51,16 +51,16 @@ const simpleAdjQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Hamlet', + fields: [], + type: 'phrase' } } ] @@ -74,50 +74,50 @@ const simpleAdjQuery = { } const prefixPhraseQuery = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "The Tragedy of Hamlet, Prince of Denmark", - "fields": [ - "title", - "title.folded", - "titleAlt.folded", - "uniformTitle.folded", - "titleDisplay.folded", - "seriesStatement.folded", - "contentsTitle.folded", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelSeriesStatement.folded", - "parallelTitleAlt.folded", - "parallelCreatorLiteral.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle" + multi_match: { + query: 'The Tragedy of Hamlet, Prince of Denmark', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' ], - "type": "phrase_prefix" + type: 'phrase_prefix' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "The Tragedy of Hamlet, Prince of Denmark", - "fields": [], - "type": "phrase_prefix" + multi_match: { + query: 'The Tragedy of Hamlet, Prince of Denmark', + fields: [], + type: 'phrase_prefix' } } ] @@ -126,16 +126,16 @@ const prefixPhraseQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "The Tragedy of Hamlet, Prince of Denmark", - "fields": [], - "type": "phrase_prefix" + multi_match: { + query: 'The Tragedy of Hamlet, Prince of Denmark', + fields: [], + type: 'phrase_prefix' } } ] @@ -149,52 +149,52 @@ const prefixPhraseQuery = { } const simpleAnyQuery = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [ - "title", - "title.folded", - "titleAlt.folded", - "uniformTitle.folded", - "titleDisplay.folded", - "seriesStatement.folded", - "contentsTitle.folded", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelSeriesStatement.folded", - "parallelTitleAlt.folded", - "parallelCreatorLiteral.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle" + multi_match: { + query: 'Hamlet Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' ], - "type": "cross_fields", - "operator": "or" + type: 'cross_fields', + operator: 'or' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [], - "type": "cross_fields", - "operator": "or" + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'or' } } ] @@ -203,17 +203,17 @@ const simpleAnyQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [], - "type": "cross_fields", - "operator": "or" + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'or' } } ] @@ -227,114 +227,114 @@ const simpleAnyQuery = { } const anyWithPrefixQuery = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [ - "title", - "title.folded", - "titleAlt.folded", - "uniformTitle.folded", - "titleDisplay.folded", - "seriesStatement.folded", - "contentsTitle.folded", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelSeriesStatement.folded", - "parallelTitleAlt.folded", - "parallelCreatorLiteral.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle" + multi_match: { + query: 'Hamlet Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' ], - "type": "cross_fields", - "operator": "or" + type: 'cross_fields', + operator: 'or' } }, { - "multi_match": { - "query": "Tragedy", - "fields": [ - "title", - "title.folded", - "titleAlt.folded", - "uniformTitle.folded", - "titleDisplay.folded", - "seriesStatement.folded", - "contentsTitle.folded", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelSeriesStatement.folded", - "parallelTitleAlt.folded", - "parallelCreatorLiteral.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle" + multi_match: { + query: 'Tragedy', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' ], - "type": "phrase_prefix" + type: 'phrase_prefix' } }, { - "multi_match": { - "query": "Comedy", - "fields": [ - "title", - "title.folded", - "titleAlt.folded", - "uniformTitle.folded", - "titleDisplay.folded", - "seriesStatement.folded", - "contentsTitle.folded", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelSeriesStatement.folded", - "parallelTitleAlt.folded", - "parallelCreatorLiteral.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle" + multi_match: { + query: 'Comedy', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' ], - "type": "phrase_prefix" + type: 'phrase_prefix' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [], - "type": "cross_fields", - "operator": "or" + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'or' } }, { - "multi_match": { - "query": "Tragedy", - "fields": [], - "type": "phrase_prefix" + multi_match: { + query: 'Tragedy', + fields: [], + type: 'phrase_prefix' } }, { - "multi_match": { - "query": "Comedy", - "fields": [], - "type": "phrase_prefix" + multi_match: { + query: 'Comedy', + fields: [], + type: 'phrase_prefix' } } ] @@ -343,31 +343,31 @@ const anyWithPrefixQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [], - "type": "cross_fields", - "operator": "or" + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'or' } }, { - "multi_match": { - "query": "Tragedy", - "fields": [], - "type": "phrase_prefix" + multi_match: { + query: 'Tragedy', + fields: [], + type: 'phrase_prefix' } }, { - "multi_match": { - "query": "Comedy", - "fields": [], - "type": "phrase_prefix" + multi_match: { + query: 'Comedy', + fields: [], + type: 'phrase_prefix' } } ] @@ -381,52 +381,52 @@ const anyWithPrefixQuery = { } const simpleAllQuery = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [ - "title", - "title.folded", - "titleAlt.folded", - "uniformTitle.folded", - "titleDisplay.folded", - "seriesStatement.folded", - "contentsTitle.folded", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelSeriesStatement.folded", - "parallelTitleAlt.folded", - "parallelCreatorLiteral.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle" + multi_match: { + query: 'Hamlet Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' ], - "type": "cross_fields", - "operator": "and" + type: 'cross_fields', + operator: 'and' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [], - "type": "cross_fields", - "operator": "and" + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'and' } } ] @@ -435,17 +435,17 @@ const simpleAllQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [], - "type": "cross_fields", - "operator": "and" + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'and' } } ] @@ -459,65 +459,65 @@ const simpleAllQuery = { } const keywordQueryForBarcode = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "123456", - "fields": [ - "title", - "title.folded", - "description.foldedStemmed", - "subjectLiteral", - "subjectLiteral.folded", - "creatorLiteral", - "creatorLiteral.folded", - "contributorLiteral.folded", - "note.label.foldedStemmed", - "publisherLiteral.folded", - "seriesStatement.folded", - "titleAlt.folded", - "titleDisplay.folded", - "contentsTitle.folded", - "tableOfContents.folded", - "genreForm", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelTitleAlt.folded", - "parallelSeriesStatement.folded", - "parallelCreatorLiteral.folded", - "parallelPublisher", - "parallelPublisherLiteral", - "uniformTitle.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle", - "placeOfPublication.folded" + multi_match: { + query: '123456', + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "123456", - "fields": [ - "items.idBarcode" + multi_match: { + query: '123456', + fields: [ + 'items.idBarcode' ], - "type": "phrase" + type: 'phrase' } } ] @@ -526,16 +526,16 @@ const keywordQueryForBarcode = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "123456", - "fields": [], - "type": "phrase" + multi_match: { + query: '123456', + fields: [], + type: 'phrase' } } ] @@ -549,65 +549,65 @@ const keywordQueryForBarcode = { } const keywordQueryForShelfMark = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "B 12", - "fields": [ - "title", - "title.folded", - "description.foldedStemmed", - "subjectLiteral", - "subjectLiteral.folded", - "creatorLiteral", - "creatorLiteral.folded", - "contributorLiteral.folded", - "note.label.foldedStemmed", - "publisherLiteral.folded", - "seriesStatement.folded", - "titleAlt.folded", - "titleDisplay.folded", - "contentsTitle.folded", - "tableOfContents.folded", - "genreForm", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelTitleAlt.folded", - "parallelSeriesStatement.folded", - "parallelCreatorLiteral.folded", - "parallelPublisher", - "parallelPublisherLiteral", - "uniformTitle.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle", - "placeOfPublication.folded" + multi_match: { + query: 'B 12', + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "B 12", - "fields": [ - "items.shelfMark" + multi_match: { + query: 'B 12', + fields: [ + 'items.shelfMark' ], - "type": "phrase" + type: 'phrase' } } ] @@ -616,16 +616,16 @@ const keywordQueryForShelfMark = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "B 12", - "fields": [], - "type": "phrase" + multi_match: { + query: 'B 12', + fields: [], + type: 'phrase' } } ] @@ -639,63 +639,63 @@ const keywordQueryForShelfMark = { } const keywordQueryForGeneralTerm = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Hamlet", - "fields": [ - "title", - "title.folded", - "description.foldedStemmed", - "subjectLiteral", - "subjectLiteral.folded", - "creatorLiteral", - "creatorLiteral.folded", - "contributorLiteral.folded", - "note.label.foldedStemmed", - "publisherLiteral.folded", - "seriesStatement.folded", - "titleAlt.folded", - "titleDisplay.folded", - "contentsTitle.folded", - "tableOfContents.folded", - "genreForm", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelTitleAlt.folded", - "parallelSeriesStatement.folded", - "parallelCreatorLiteral.folded", - "parallelPublisher", - "parallelPublisherLiteral", - "uniformTitle.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle", - "placeOfPublication.folded" + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Hamlet', + fields: [], + type: 'phrase' } } ] @@ -704,16 +704,16 @@ const keywordQueryForGeneralTerm = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Hamlet', + fields: [], + type: 'phrase' } } ] @@ -727,49 +727,49 @@ const keywordQueryForGeneralTerm = { } const identifierQuery = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "term": { - "uri": "b1234" + term: { + uri: 'b1234' } }, { - "term": { - "idIsbn.clean": "b1234" + term: { + 'idIsbn.clean': 'b1234' } }, { - "term": { - "idIssn.clean": "b1234" + term: { + 'idIssn.clean': 'b1234' } }, { - "prefix": { - "identifierV2.value": "b1234" + prefix: { + 'identifierV2.value': 'b1234' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "term": { - "items.idBarcode": "b1234" + term: { + 'items.idBarcode': 'b1234' } }, { - "prefix": { - "items.shelfMark.keywordLowercased": "b1234" + prefix: { + 'items.shelfMark.keywordLowercased': 'b1234' } } ] @@ -778,11 +778,11 @@ const identifierQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [] + nested: { + path: 'holdings', + query: { + bool: { + should: [] } } } @@ -793,42 +793,42 @@ const identifierQuery = { } const binaryBooleanQuery = { - "query": { - "bool": { - "must": [ + query: { + bool: { + must: [ { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [ - "creatorLiteral", - "creatorLiteral.folded", - "contributorLiteral.folded", - "parallelCreatorLiteral.folded", - "parallelContributorLiteral.folded" + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } } ] @@ -837,16 +837,16 @@ const binaryBooleanQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } } ] @@ -858,35 +858,35 @@ const binaryBooleanQuery = { } }, { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [ - "language.id", - "language.label" + multi_match: { + query: 'English', + fields: [ + 'language.id', + 'language.label' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [], - "type": "phrase" + multi_match: { + query: 'English', + fields: [], + type: 'phrase' } } ] @@ -895,16 +895,16 @@ const binaryBooleanQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [], - "type": "phrase" + multi_match: { + query: 'English', + fields: [], + type: 'phrase' } } ] @@ -921,45 +921,45 @@ const binaryBooleanQuery = { } const ternaryBooleanQuery = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "must": [ + bool: { + must: [ { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [ - "creatorLiteral", - "creatorLiteral.folded", - "contributorLiteral.folded", - "parallelCreatorLiteral.folded", - "parallelContributorLiteral.folded" + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } } ] @@ -968,16 +968,16 @@ const ternaryBooleanQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } } ] @@ -989,35 +989,35 @@ const ternaryBooleanQuery = { } }, { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [ - "language.id", - "language.label" + multi_match: { + query: 'English', + fields: [ + 'language.id', + 'language.label' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [], - "type": "phrase" + multi_match: { + query: 'English', + fields: [], + type: 'phrase' } } ] @@ -1026,16 +1026,16 @@ const ternaryBooleanQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [], - "type": "phrase" + multi_match: { + query: 'English', + fields: [], + type: 'phrase' } } ] @@ -1050,34 +1050,34 @@ const ternaryBooleanQuery = { } }, { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "tragedy", - "fields": [ - "genreForm.raw" + multi_match: { + query: 'tragedy', + fields: [ + 'genreForm.raw' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "tragedy", - "fields": [], - "type": "phrase" + multi_match: { + query: 'tragedy', + fields: [], + type: 'phrase' } } ] @@ -1086,16 +1086,16 @@ const ternaryBooleanQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "tragedy", - "fields": [], - "type": "phrase" + multi_match: { + query: 'tragedy', + fields: [], + type: 'phrase' } } ] @@ -1112,42 +1112,42 @@ const ternaryBooleanQuery = { } const queryWithParentheses = { - "query": { - "bool": { - "must": [ + query: { + bool: { + must: [ { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [ - "creatorLiteral", - "creatorLiteral.folded", - "contributorLiteral.folded", - "parallelCreatorLiteral.folded", - "parallelContributorLiteral.folded" + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } } ] @@ -1156,16 +1156,16 @@ const queryWithParentheses = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } } ] @@ -1177,38 +1177,38 @@ const queryWithParentheses = { } }, { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [ - "language.id", - "language.label" + multi_match: { + query: 'English', + fields: [ + 'language.id', + 'language.label' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [], - "type": "phrase" + multi_match: { + query: 'English', + fields: [], + type: 'phrase' } } ] @@ -1217,16 +1217,16 @@ const queryWithParentheses = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [], - "type": "phrase" + multi_match: { + query: 'English', + fields: [], + type: 'phrase' } } ] @@ -1238,34 +1238,34 @@ const queryWithParentheses = { } }, { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "tragedy", - "fields": [ - "genreForm.raw" + multi_match: { + query: 'tragedy', + fields: [ + 'genreForm.raw' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "tragedy", - "fields": [], - "type": "phrase" + multi_match: { + query: 'tragedy', + fields: [], + type: 'phrase' } } ] @@ -1274,16 +1274,16 @@ const queryWithParentheses = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "tragedy", - "fields": [], - "type": "phrase" + multi_match: { + query: 'tragedy', + fields: [], + type: 'phrase' } } ] @@ -1303,42 +1303,42 @@ const queryWithParentheses = { } const negationQuery = { - "query": { - "bool": { - "must": [ + query: { + bool: { + must: [ { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [ - "creatorLiteral", - "creatorLiteral.folded", - "contributorLiteral.folded", - "parallelCreatorLiteral.folded", - "parallelContributorLiteral.folded" + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } } ] @@ -1347,16 +1347,16 @@ const negationQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } } ] @@ -1368,37 +1368,37 @@ const negationQuery = { } } ], - "must_not": [ + must_not: [ { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [ - "language.id", - "language.label" + multi_match: { + query: 'English', + fields: [ + 'language.id', + 'language.label' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [], - "type": "phrase" + multi_match: { + query: 'English', + fields: [], + type: 'phrase' } } ] @@ -1407,16 +1407,16 @@ const negationQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [], - "type": "phrase" + multi_match: { + query: 'English', + fields: [], + type: 'phrase' } } ] @@ -1432,7 +1432,7 @@ const negationQuery = { } } -module.exports = { +module.exports = { simpleAdjQuery, simpleAnyQuery, simpleAllQuery, From 271c692c34c36f7d9c93a6f1ebea52b98612073a Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Feb 2026 11:10:22 -0500 Subject: [PATCH 61/82] Add date queries --- lib/elasticsearch/cql/index-mapping.js | 2 +- lib/elasticsearch/cql_query_builder.js | 37 +++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/lib/elasticsearch/cql/index-mapping.js b/lib/elasticsearch/cql/index-mapping.js index 758deddc..a73f1e55 100644 --- a/lib/elasticsearch/cql/index-mapping.js +++ b/lib/elasticsearch/cql/index-mapping.js @@ -69,7 +69,7 @@ const indexMapping = { fields: ['subjectLiteral', 'subjectLiteral.folded', 'parallelSubjectLiteral.folded'] }, language: { fields: ['language.id', 'language.label'] }, - date: {}, + date: { fields: ['dates.range'] }, series: { fields: ['series', 'parallelSeries'] }, diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index fc5643eb..e9985b95 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -176,7 +176,8 @@ function buildAtomicMain ({ fields, relation, terms, term }) { ...anyAllQueries({ fields, relation, terms, term }), ...adjEqQueries({ fields, relation, terms, term }), ...termQueriesForTermFields({ fields, relation, terms, term }), - ...prefixQueriesForPrefixFields({ fields, relation, terms, term }) + ...prefixQueriesForPrefixFields({ fields, relation, terms, term }), + ...dateQueries({ fields, relation, terms, term }) ] } } @@ -201,6 +202,40 @@ function adjEqQueries ({ fields, relation, terms, term }) { ].filter(q => q) } +function dateQueries ({ fields, relation, terms, term }) { + if (!fields.some(field => field.includes('date'))) { return [] } + let range + switch (relation) { + case "<": + range = { lt: terms[0] } + case ">": + range = { gt: terms[0] } + case ">=": + range = { gte: terms[0] } + case "<=": + range = { lte: terms[0] } + case "encloses": + range = { gt: terms[0], lt: terms[1] } + case "within": + range = { gte: terms[0], lte: terms[1] } + default: + break + } + + return [ + { + nested: { + path: 'dates', + query: { + range: { + 'dates.range': range + } + } + } + } + ] +} + function prefixQueriesForPrefixFields ({ fields, relation, terms, term }) { if (!fields.prefix) return [] return fields.prefix.map(field => prefixQuery(field, term)) From ffe585f221da4585f801de2d00baabfd8b171153 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Feb 2026 11:49:09 -0500 Subject: [PATCH 62/82] Add filters to cql query builder --- lib/elasticsearch/cql_query_builder.js | 26 +++++++++++++++++----- lib/elasticsearch/elastic-query-builder.js | 2 ++ lib/resources.js | 5 +++-- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index e9985b95..7bf98705 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -1,13 +1,29 @@ const { parseWithRightCql } = require('./cql_grammar') const { indexMapping } = require('./cql/index-mapping') +const ElasticQueryBuilder = require('./elastic-query-builder') -function buildEsQuery (cqlQuery) { +function buildEsQuery (cqlQuery, request) { + const filterQuery = filterQuery(request) return { - query: - buildEsQueryFromTree( - parseWithRightCql(cqlQuery) - ) + query: { + bool: { + should: [ + buildEsQueryFromTree( + parseWithRightCql(cqlQuery) + ) + ] + }, + ...filterQuery + } + } +} + +function filterQuery (request) { + const queryJson = ElasticQueryBuilder.forApiRequest(request).query.toJson() + if (queryJson.bool && queryJson.bool.filter) { + return { filter: queryJson.bool.filter } } + return {} } /** diff --git a/lib/elasticsearch/elastic-query-builder.js b/lib/elasticsearch/elastic-query-builder.js index d561795a..28336c9d 100644 --- a/lib/elasticsearch/elastic-query-builder.js +++ b/lib/elasticsearch/elastic-query-builder.js @@ -36,6 +36,8 @@ class ElasticQueryBuilder { case 'callnumber': this.buildCallnumberQuery() break + case 'cql': + break case 'all': default: this.buildAllQuery() diff --git a/lib/resources.js b/lib/resources.js index d6a8ec3c..5ea637e7 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -888,11 +888,12 @@ const buildElasticBody = function (params) { * @return {object} ES query object suitable to be POST'd to ES endpoint */ const buildElasticQuery = function (params) { + const request = ApiRequest.fromParams(params) if (params.search_scope === 'cql') { - const query = cqlQueryBuilder.buildEsQuery(params.q) + const query = cqlQueryBuilder.buildEsQuery(params.q, request) return query } - const request = ApiRequest.fromParams(params) + console.log('request: ', request) const builder = ElasticQueryBuilder.forApiRequest(request) return builder.query.toJson() From 59901659d28e5b19ee764364a79516a7c02f53f9 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Feb 2026 13:23:39 -0500 Subject: [PATCH 63/82] Remove irrelevant code --- lib/elasticsearch/elastic-body-builder.js | 4 -- lib/resources.js | 20 +++----- lib/utils/resource-helpers.js | 6 +-- test/resources.test.js | 60 +++++++++++++---------- 4 files changed, 42 insertions(+), 48 deletions(-) diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js index 110917b1..aa5c45a9 100644 --- a/lib/elasticsearch/elastic-body-builder.js +++ b/lib/elasticsearch/elastic-body-builder.js @@ -30,11 +30,8 @@ const bodyForFindByUri = function (recapBarcodesByStatus, params) { unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] } - // const filter = returnAllItems ? {} : { filter: [] } - const queryFilter = { filter: !returnAllItems ? [innerHits(itemsOptions)] : [] } - // Establish base query: const body = { _source: { excludes @@ -116,7 +113,6 @@ const bodyForSearch = function (params) { } ) - // return withInnerHits return body } diff --git a/lib/resources.js b/lib/resources.js index fb8c1694..36b53f43 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -30,10 +30,6 @@ const { const { bodyForFindByUri, - itemsFilterContext, - itemsQueryContext, - buildElasticQuery, - buildElasticBody, bodyForSearch, aggregationQueriesForParams, bodyForAggregation @@ -64,7 +60,7 @@ module.exports = function (app, _private = null) { }) // Validate uri: - await nyplSourceAndId(params) + await nyplSourceAndId(params.uri) // If we need to return itemAggregations or filter on item_status, // then we need to pre-retrieve SCSB item statuses to incorporate them into @@ -82,7 +78,7 @@ module.exports = function (app, _private = null) { } } - const body = await bodyForFindByUri(recapBarcodesByStatus, params) + const body = bodyForFindByUri(recapBarcodesByStatus, params) app.logger.debug('Resources#findByUri', body) let resp = await app.esClient.search(body) // Mindfully throw errors for known issues: @@ -107,7 +103,7 @@ module.exports = function (app, _private = null) { app.resources.annotatedMarc = async function (params, opts) { // Convert discovery id to nyplSource and un-prefixed id: - const { id, nyplSource } = await nyplSourceAndId(params) + const { id, nyplSource } = await nyplSourceAndId(params.uri) app.logger.debug('Resources#annotatedMarc', { id, nyplSource }) @@ -117,13 +113,13 @@ module.exports = function (app, _private = null) { throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`) } - return await AnnotatedMarcSerializer.serialize(resp.data) + return AnnotatedMarcSerializer.serialize(resp.data) } // Get a single raw marc: app.resources.marc = async function (params, opts) { // Convert discovery id to nyplSource and un-prefixed id: - const { id, nyplSource } = await nyplSourceAndId(params) + const { id, nyplSource } = await nyplSourceAndId(params.uri) app.logger.debug('Resources#annotatedMarc', { id, nyplSource }) @@ -133,7 +129,7 @@ module.exports = function (app, _private = null) { throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`) } - return await MarcSerializer.serialize(resp.data) + return MarcSerializer.serialize(resp.data) } // Get deliveryLocations for given resource(s) @@ -259,12 +255,8 @@ module.exports = function (app, _private = null) { // For unit testing, export private methods if second arg given: if (_private && typeof _private === 'object') { - _private.buildElasticBody = buildElasticBody - _private.buildElasticQuery = buildElasticQuery _private.parseSearchParams = parseSearchParams _private.esRangeValue = esRangeValue - _private.itemsFilterContext = itemsFilterContext - _private.itemsQueryContext = itemsQueryContext _private.aggregationQueriesForParams = aggregationQueriesForParams _private.mergeAggregationsResponses = mergeAggregationsResponses } diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js index 9ba7ea9d..50b6c9c5 100644 --- a/lib/utils/resource-helpers.js +++ b/lib/utils/resource-helpers.js @@ -60,11 +60,11 @@ const parseSearchParams = function (params, overrideParams = {}) { }) } -const nyplSourceAndId = async function (params) { +const nyplSourceAndId = async function (uri) { const nyplSourceMapper = await NyplSourceMapper.instance() - const { id, nyplSource } = nyplSourceMapper.splitIdentifier(params.uri) ?? {} + const { id, nyplSource } = nyplSourceMapper.splitIdentifier(uri) ?? {} if (!id || !nyplSource) { - throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`) + throw new errors.InvalidParameterError(`Invalid bnum: ${uri}`) } return { id, nyplSource } } diff --git a/test/resources.test.js b/test/resources.test.js index 98220b26..ebc3c313 100644 --- a/test/resources.test.js +++ b/test/resources.test.js @@ -5,6 +5,12 @@ const scsbClient = require('../lib/scsb-client') const errors = require('../lib/errors') const { AGGREGATIONS_SPEC } = require('../lib/elasticsearch/config') const numAggregations = Object.keys(AGGREGATIONS_SPEC).length +const { + itemsFilterContext, + itemsQueryContext, + buildElasticQuery, + buildElasticBody +} = require('../lib/elasticsearch/elastic-body-builder') const fixtures = require('./fixtures') @@ -72,7 +78,7 @@ describe('Resources query', function () { describe('buildElasticQuery', function () { it('uses "query string query" if subjectLiteral: used', function () { const params = resourcesPrivMethods.parseSearchParams({ q: 'subjectLiteral:potatoes' }) - const body = resourcesPrivMethods.buildElasticQuery(params) + const body = buildElasticQuery(params) expect(body).to.be.a('object') expect(body.bool).to.be.a('object') expect(body.bool.must).to.be.a('array') @@ -83,7 +89,7 @@ describe('Resources query', function () { it('uses "query string query" if subjectLiteral: quoted phrase used', function () { const params = resourcesPrivMethods.parseSearchParams({ q: 'subjectLiteral:"hot potatoes"' }) - const body = resourcesPrivMethods.buildElasticQuery(params) + const body = buildElasticQuery(params) expect(body).to.be.a('object') expect(body.bool).to.be.a('object') expect(body.bool.must).to.be.a('array') @@ -94,7 +100,7 @@ describe('Resources query', function () { it('escapes colon if field not recognized', function () { const params = resourcesPrivMethods.parseSearchParams({ q: 'fladeedle:"hot potatoes"' }) - const body = resourcesPrivMethods.buildElasticQuery(params) + const body = buildElasticQuery(params) expect(body).to.be.a('object') expect(body.bool).to.be.a('object') expect(body.bool.must).to.be.a('array') @@ -105,7 +111,7 @@ describe('Resources query', function () { it('uses "query string query" if plain keyword query used', function () { const params = resourcesPrivMethods.parseSearchParams({ q: 'potatoes' }) - const body = resourcesPrivMethods.buildElasticQuery(params) + const body = buildElasticQuery(params) expect(body).to.be.a('object') expect(body.bool).to.be.a('object') expect(body.bool.must).to.be.a('array') @@ -116,7 +122,7 @@ describe('Resources query', function () { it('accepts advanced search parameters', function () { const params = resourcesPrivMethods.parseSearchParams({ contributor: 'Poe', title: 'Raven', subject: 'ravens' }) - const body = resourcesPrivMethods.buildElasticQuery(params) + const body = buildElasticQuery(params) expect(body).to.nested.include({ // Expect a title match on Raven: @@ -135,7 +141,7 @@ describe('Resources query', function () { describe('buildElasticBody', function () { it('uses subjectLiteral.raw when given a subjectLiteral filter', function () { const params = resourcesPrivMethods.parseSearchParams({ q: '', filters: { subjectLiteral: 'United States -- History' } }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.be.a('object') expect(body.query).to.be.a('object') expect(body.query.bool).to.be.a('object') @@ -151,7 +157,7 @@ describe('Resources query', function () { expect(process.env.HIDE_NYPL_SOURCE).to.be.a('undefined') const params = resourcesPrivMethods.parseSearchParams({ q: '' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.be.a('object') expect(body.query.filter).to.be.a('undefined') @@ -161,7 +167,7 @@ describe('Resources query', function () { process.env.HIDE_NYPL_SOURCE = 'recap-hl' const params = resourcesPrivMethods.parseSearchParams({ q: '' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) // Expect query to resemble: {"from":0,"size":50,"query":{"bool":{"filter":[{"bool":{"must_not":{"terms":{"nyplSource":["recap-hl"]}}}}]}},"sort":["uri"]} expect(body).to.be.a('object') @@ -173,26 +179,26 @@ describe('Resources query', function () { it('processes isbn correctly', () => { const params = resourcesPrivMethods.parseSearchParams({ isbn: '0689844921' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.nested .include({ 'query.bool.must[0].term.idIsbn\\.clean': '0689844921' }) }) it('processes issn correctly', () => { const params = resourcesPrivMethods.parseSearchParams({ issn: '1234-5678' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.nested.include({ 'query.bool.must[0].term.idIssn\\.clean': '1234-5678' }) }) it('processes lccn correctly', () => { const params = resourcesPrivMethods.parseSearchParams({ lccn: '00068799' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.nested.include({ 'query.bool.must[0].regexp.idLccn.value': '[^\\d]*00068799[^\\d]*' }) }) it('processes oclc correctly', () => { const params = resourcesPrivMethods.parseSearchParams({ oclc: '1033548057' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.nested.include({ 'query.bool.must[0].term.idOclc': '1033548057' }) }) @@ -205,9 +211,9 @@ describe('Resources query', function () { }) const paramsSnapshot = JSON.stringify(params) - resourcesPrivMethods.buildElasticBody(params) - resourcesPrivMethods.buildElasticBody(params) - resourcesPrivMethods.buildElasticBody(params) + buildElasticBody(params) + buildElasticBody(params) + buildElasticBody(params) expect(JSON.stringify(params)).to.equal(paramsSnapshot) }) @@ -648,40 +654,40 @@ describe('Resources query', function () { describe('itemsFilterContext', () => { it('should return an empty object in case of no query', () => { - expect(resourcesPrivMethods.itemsFilterContext({})).to.deep.equal({}) + expect(itemsFilterContext({})).to.deep.equal({}) }) it('should return an empty object in case there are no filters', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: {} })).to.deep.equal({}) + expect(itemsFilterContext({ query: {} })).to.deep.equal({}) }) it('should return filters for volume in case there is a volume', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { volume: [1, 2] } })) + expect(itemsFilterContext({ query: { volume: [1, 2] } })) .to.deep.equal({ filter: [{ range: { 'items.volumeRange': { gte: 1, lte: 2 } } }] }) }) it('should return filters for date in case there is a date', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { date: [1, 2] } })) + expect(itemsFilterContext({ query: { date: [1, 2] } })) .to.deep.equal({ filter: [{ range: { 'items.dateRange': { gte: 1, lte: 2 } } }] }) }) it('should return filters for format in case there is a format', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { format: ['text', 'microfilm', 'AV'] } })) + expect(itemsFilterContext({ query: { format: ['text', 'microfilm', 'AV'] } })) .to.deep.equal({ filter: [{ terms: { 'items.formatLiteral': ['text', 'microfilm', 'AV'] } }] }) }) it('should return filters for location in case there is a location', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { location: ['SASB', 'LPA', 'Schomburg'] } })) + expect(itemsFilterContext({ query: { location: ['SASB', 'LPA', 'Schomburg'] } })) .to.deep.equal({ filter: [{ terms: { 'items.holdingLocation.id': ['SASB', 'LPA', 'Schomburg'] } }] }) }) it('should return filters for status in case there is a status', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { status: ['Available', 'Unavailable', 'In Process'] } })) + expect(itemsFilterContext({ query: { status: ['Available', 'Unavailable', 'In Process'] } })) .to.deep.equal({ filter: [{ terms: { 'items.status.id': ['Available', 'Unavailable', 'In Process'] } }] }) }) it('should combine all filters in case of multiple filters', () => { - expect(resourcesPrivMethods.itemsFilterContext({ + expect(itemsFilterContext({ query: { volume: [1, 2], date: [3, 4], @@ -701,24 +707,24 @@ describe('Resources query', function () { }) it('should ignore all other parameters', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { location: ['SASB', 'LPA', 'Schomburg'] }, something: 'else' })) + expect(itemsFilterContext({ query: { location: ['SASB', 'LPA', 'Schomburg'] }, something: 'else' })) .to.deep.equal({ filter: [{ terms: { 'items.holdingLocation.id': ['SASB', 'LPA', 'Schomburg'] } }] }) }) }) describe('itemsQueryContext', () => { it('should exclude check in card items when options.merge_checkin_card_items is not set', () => { - expect(resourcesPrivMethods.itemsQueryContext({})) + expect(itemsQueryContext({})) .to.deep.equal({ must_not: [{ term: { 'items.type': 'nypl:CheckinCardItem' } }] }) }) it('should exclude check in card items when merge_checkin_card_items is falsey', () => { - expect(resourcesPrivMethods.itemsQueryContext({ merge_checkin_card_items: false })) + expect(itemsQueryContext({ merge_checkin_card_items: false })) .to.deep.equal({ must_not: [{ term: { 'items.type': 'nypl:CheckinCardItem' } }] }) }) it('should use match_all for items when merge_checkin_card_items is truthy', () => { - expect(resourcesPrivMethods.itemsQueryContext({ merge_checkin_card_items: true })) + expect(itemsQueryContext({ merge_checkin_card_items: true })) .to.deep.equal({ must: { match_all: {} } }) }) }) From 020c75db0f295950b79e8f6b61c0be62fdc0f80e Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Feb 2026 14:25:18 -0500 Subject: [PATCH 64/82] Remove spurious merge code --- lib/resources.js | 69 ------------------------------------------------ 1 file changed, 69 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index 0a055238..36b53f43 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -37,75 +37,6 @@ const { const RESOURCES_INDEX = process.env.RESOURCES_INDEX -<<<<<<< scc-5050-2 -======= -const ITEM_FILTER_AGGREGATIONS = { - item_location: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.holdingLocation_packed' } } } }, - item_status: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.status_packed' } } } }, - item_format: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.formatLiteral' } } } } -} - -// Configure sort fields: -const SORT_FIELDS = { - title: { - initialDirection: 'asc', - field: 'title_sort' - }, - date: { - initialDirection: 'desc', - field: 'dateStartYear' - }, - creator: { - initialDirection: 'asc', - field: 'creator_sort' - }, - relevance: {} -} - -// The following fields can be excluded from ES responses because we don't pass them to client: -const EXCLUDE_FIELDS = [ - 'uris', - '*_packed', - '*_sort', - 'items.*_packed', - 'contentsTitle', - 'suppressed', - // Hide contributor and creator transformed fields: - '*WithoutDates', - '*Normalized' -] - -// Configure controller-wide parameter parsing: -const parseSearchParams = function (params, overrideParams = {}) { - return parseParams(params, { - q: { type: 'string' }, - page: { type: 'int', default: 1 }, - per_page: { type: 'int', default: 50, range: [0, 100] }, - field: { type: 'string', range: Object.keys(AGGREGATIONS_SPEC) }, - sort: { type: 'string', range: Object.keys(SORT_FIELDS), default: 'relevance' }, - sort_direction: { type: 'string', range: ['asc', 'desc'] }, - search_scope: { type: 'string', range: Object.keys(SEARCH_SCOPES), default: 'all' }, - filters: { type: 'hash', fields: FILTER_CONFIG }, - items_size: { type: 'int', default: 100, range: [0, 200] }, - items_from: { type: 'int', default: 0 }, - callnumber: { type: 'string' }, - standard_number: { type: 'string' }, - contributor: { type: 'string' }, - title: { type: 'string' }, - subject: { type: 'string' }, - subject_prefix: { type: 'string' }, - isbn: { type: 'string' }, - issn: { type: 'string' }, - lccn: { type: 'string' }, - oclc: { type: 'string' }, - role: { type: 'string' }, - merge_checkin_card_items: { type: 'boolean', default: true }, - include_item_aggregations: { type: 'boolean', default: true }, - ...overrideParams - }) -} - ->>>>>>> main // These are the handlers made available to the router: module.exports = function (app, _private = null) { app.resources = {} From 1473e3d874cb2b7f06e53c57446a6e6bb071199a Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Feb 2026 14:52:27 -0500 Subject: [PATCH 65/82] Fix some small errors --- lib/elasticsearch/cql_query_builder.js | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 7bf98705..cc5c36ef 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -3,22 +3,20 @@ const { indexMapping } = require('./cql/index-mapping') const ElasticQueryBuilder = require('./elastic-query-builder') function buildEsQuery (cqlQuery, request) { - const filterQuery = filterQuery(request) + const filterQuery = buildFilterQuery(request) return { - query: { bool: { should: [ buildEsQueryFromTree( - parseWithRightCql(cqlQuery) + parseWithRightCql(cqlQuery.trim()) ) ] }, ...filterQuery - } } } -function filterQuery (request) { +function buildFilterQuery (request) { const queryJson = ElasticQueryBuilder.forApiRequest(request).query.toJson() if (queryJson.bool && queryJson.bool.filter) { return { filter: queryJson.bool.filter } @@ -219,7 +217,7 @@ function adjEqQueries ({ fields, relation, terms, term }) { } function dateQueries ({ fields, relation, terms, term }) { - if (!fields.some(field => field.includes('date'))) { return [] } + if (!fields.fields.some(field => field.includes('date'))) { return [] } let range switch (relation) { case "<": From ed4c14a0306234029a82a5eb1d5d22da4a21159b Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 20 Feb 2026 11:22:20 -0500 Subject: [PATCH 66/82] Add initial filter implementation for cql --- lib/elasticsearch/cql_query_builder.js | 37 +++++++++++++++----------- test/cql_query_builder.test.js | 2 +- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index cc5c36ef..d3144b46 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -2,21 +2,22 @@ const { parseWithRightCql } = require('./cql_grammar') const { indexMapping } = require('./cql/index-mapping') const ElasticQueryBuilder = require('./elastic-query-builder') -function buildEsQuery (cqlQuery, request) { +function buildEsQuery (cqlQuery, request = null) { const filterQuery = buildFilterQuery(request) return { - bool: { - should: [ - buildEsQueryFromTree( - parseWithRightCql(cqlQuery.trim()) - ) - ] - }, - ...filterQuery + bool: { + should: [ + buildEsQueryFromTree( + parseWithRightCql(cqlQuery.trim()) + ) + ] + }, + ...filterQuery } } function buildFilterQuery (request) { + if (!request) return {} const queryJson = ElasticQueryBuilder.forApiRequest(request).query.toJson() if (queryJson.bool && queryJson.bool.filter) { return { filter: queryJson.bool.filter } @@ -220,18 +221,24 @@ function dateQueries ({ fields, relation, terms, term }) { if (!fields.fields.some(field => field.includes('date'))) { return [] } let range switch (relation) { - case "<": + case '<': range = { lt: terms[0] } - case ">": + break + case '>': range = { gt: terms[0] } - case ">=": + break + case '>=': range = { gte: terms[0] } - case "<=": + break + case '<=': range = { lte: terms[0] } - case "encloses": + break + case 'encloses': range = { gt: terms[0], lt: terms[1] } - case "within": + break + case 'within': range = { gte: terms[0], lte: terms[1] } + break default: break } diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 0dfdc599..ace8a839 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -18,7 +18,7 @@ const { } = require('./fixtures/cql_fixtures') describe('CQL Query Builder', function () { - it('Simple = query', function () { + it.only('Simple = query', function () { expect(buildEsQuery('title="Hamlet"')) .to.deep.equal( simpleAdjQuery From 74f97a4aa5823ebcf03341c3fd8a994c43b4be49 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 20 Feb 2026 11:46:27 -0500 Subject: [PATCH 67/82] Fix tests --- lib/elasticsearch/cql_query_builder.js | 2 +- test/cql_query_builder.test.js | 2 +- test/fixtures/cql_fixtures.js | 2231 ++++++++++++------------ 3 files changed, 1144 insertions(+), 1091 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index d3144b46..494eab8f 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -218,7 +218,7 @@ function adjEqQueries ({ fields, relation, terms, term }) { } function dateQueries ({ fields, relation, terms, term }) { - if (!fields.fields.some(field => field.includes('date'))) { return [] } + if (!Object.values(fields).some(fieldType => fieldType.some(field => field.includes('date')))) { return [] } let range switch (relation) { case '<': diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index ace8a839..0dfdc599 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -18,7 +18,7 @@ const { } = require('./fixtures/cql_fixtures') describe('CQL Query Builder', function () { - it.only('Simple = query', function () { + it('Simple = query', function () { expect(buildEsQuery('title="Hamlet"')) .to.deep.equal( simpleAdjQuery diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index 3c702c1f..ec2a7cd7 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -1,324 +1,274 @@ const simpleAdjQuery = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Hamlet', - fields: [], + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], type: 'phrase' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [], + type: 'phrase' + } + } + ] } - ] + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [], + type: 'phrase' + } + } + ] + } + } } } - } + ] } - ] - } + } + ] } } const prefixPhraseQuery = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'The Tragedy of Hamlet, Prince of Denmark', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'phrase_prefix' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'The Tragedy of Hamlet, Prince of Denmark', - fields: [], + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], type: 'phrase_prefix' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'The Tragedy of Hamlet, Prince of Denmark', - fields: [], - type: 'phrase_prefix' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'The Tragedy of Hamlet, Prince of Denmark', + fields: [], + type: 'phrase_prefix' + } + } + ] } - ] + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'The Tragedy of Hamlet, Prince of Denmark', + fields: [], + type: 'phrase_prefix' + } + } + ] + } + } } } - } + ] } - ] - } + } + ] } } const simpleAnyQuery = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'cross_fields', - operator: 'or' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Hamlet Othello', - fields: [], + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], type: 'cross_fields', operator: 'or' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'or' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'or' + } + } + ] } - ] + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'or' + } + } + ] + } + } } } - } + + ] } - ] - } + } + ] } } const anyWithPrefixQuery = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'cross_fields', - operator: 'or' - } - }, - { - multi_match: { - query: 'Tragedy', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'phrase_prefix' - } - }, - { - multi_match: { - query: 'Comedy', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'phrase_prefix' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Hamlet Othello', - fields: [], + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], type: 'cross_fields', operator: 'or' } @@ -326,1109 +276,1212 @@ const anyWithPrefixQuery = { { multi_match: { query: 'Tragedy', - fields: [], + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], type: 'phrase_prefix' } }, { multi_match: { query: 'Comedy', - fields: [], + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], type: 'phrase_prefix' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'or' - } - }, - { - multi_match: { - query: 'Tragedy', - fields: [], - type: 'phrase_prefix' - } - }, - { - multi_match: { - query: 'Comedy', - fields: [], - type: 'phrase_prefix' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'or' + } + }, + { + multi_match: { + query: 'Tragedy', + fields: [], + type: 'phrase_prefix' + } + }, + { + multi_match: { + query: 'Comedy', + fields: [], + type: 'phrase_prefix' + } + } + ] } - ] + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'or' + } + }, + { + multi_match: { + query: 'Tragedy', + fields: [], + type: 'phrase_prefix' + } + }, + { + multi_match: { + query: 'Comedy', + fields: [], + type: 'phrase_prefix' + } + } + ] + } + } } } - } + ] } - ] - } + } + ] } } const simpleAllQuery = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'cross_fields', - operator: 'and' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Hamlet Othello', - fields: [], + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], type: 'cross_fields', operator: 'and' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'and' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'and' + } + } + ] } - ] + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'and' + } + } + ] + } + } } } - } + ] } - ] - } + } + ] } } const keywordQueryForBarcode = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: '123456', - fields: [ - 'title', - 'title.folded', - 'description.foldedStemmed', - 'subjectLiteral', - 'subjectLiteral.folded', - 'creatorLiteral', - 'creatorLiteral.folded', - 'contributorLiteral.folded', - 'note.label.foldedStemmed', - 'publisherLiteral.folded', - 'seriesStatement.folded', - 'titleAlt.folded', - 'titleDisplay.folded', - 'contentsTitle.folded', - 'tableOfContents.folded', - 'genreForm', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelTitleAlt.folded', - 'parallelSeriesStatement.folded', - 'parallelCreatorLiteral.folded', - 'parallelPublisher', - 'parallelPublisherLiteral', - 'uniformTitle.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle', - 'placeOfPublication.folded' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: '123456', fields: [ - 'items.idBarcode' + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' ], type: 'phrase' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: '123456', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: '123456', + fields: [ + 'items.idBarcode' + ], + type: 'phrase' + } + } + ] } - ] + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: '123456', + fields: [], + type: 'phrase' + } + } + ] + } + } } } - } + ] } - ] - } + } + ] } } const keywordQueryForShelfMark = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'B 12', - fields: [ - 'title', - 'title.folded', - 'description.foldedStemmed', - 'subjectLiteral', - 'subjectLiteral.folded', - 'creatorLiteral', - 'creatorLiteral.folded', - 'contributorLiteral.folded', - 'note.label.foldedStemmed', - 'publisherLiteral.folded', - 'seriesStatement.folded', - 'titleAlt.folded', - 'titleDisplay.folded', - 'contentsTitle.folded', - 'tableOfContents.folded', - 'genreForm', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelTitleAlt.folded', - 'parallelSeriesStatement.folded', - 'parallelCreatorLiteral.folded', - 'parallelPublisher', - 'parallelPublisherLiteral', - 'uniformTitle.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle', - 'placeOfPublication.folded' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'B 12', fields: [ - 'items.shelfMark' + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' ], type: 'phrase' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'B 12', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'B 12', + fields: [ + 'items.shelfMark' + ], + type: 'phrase' + } + } + ] } - ] + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'B 12', + fields: [], + type: 'phrase' + } + } + ] + } + } } } - } + ] } - ] - } + } + ] } } const keywordQueryForGeneralTerm = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet', - fields: [ - 'title', - 'title.folded', - 'description.foldedStemmed', - 'subjectLiteral', - 'subjectLiteral.folded', - 'creatorLiteral', - 'creatorLiteral.folded', - 'contributorLiteral.folded', - 'note.label.foldedStemmed', - 'publisherLiteral.folded', - 'seriesStatement.folded', - 'titleAlt.folded', - 'titleDisplay.folded', - 'contentsTitle.folded', - 'tableOfContents.folded', - 'genreForm', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelTitleAlt.folded', - 'parallelSeriesStatement.folded', - 'parallelCreatorLiteral.folded', - 'parallelPublisher', - 'parallelPublisherLiteral', - 'uniformTitle.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle', - 'placeOfPublication.folded' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Hamlet', - fields: [], + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' + ], type: 'phrase' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [], + type: 'phrase' + } + } + ] } - ] + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [], + type: 'phrase' + } + } + ] + } + } } } - } + ] } - ] - } + } + ] } } const identifierQuery = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - term: { - uri: 'b1234' - } - }, - { - term: { - 'idIsbn.clean': 'b1234' - } - }, - { - term: { - 'idIssn.clean': 'b1234' - } - }, - { - prefix: { - 'identifierV2.value': 'b1234' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { term: { - 'items.idBarcode': 'b1234' + uri: 'b1234' + } + }, + { + term: { + 'idIsbn.clean': 'b1234' + } + }, + { + term: { + 'idIssn.clean': 'b1234' } }, { prefix: { - 'items.shelfMark.keywordLowercased': 'b1234' + 'identifierV2.value': 'b1234' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [] + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + term: { + 'items.idBarcode': 'b1234' + } + }, + { + prefix: { + 'items.shelfMark.keywordLowercased': 'b1234' + } + } + ] + } + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [] + } + } } } - } + ] } - ] - } + } + ] } } const binaryBooleanQuery = { - query: { - bool: { - must: [ - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [ - 'creatorLiteral', - 'creatorLiteral.folded', - 'contributorLiteral.folded', - 'parallelCreatorLiteral.folded', - 'parallelContributorLiteral.folded' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Shakespeare', - fields: [], + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], type: 'phrase' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } + } + ] } - ] - } - } - } - } - ] - } - }, - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [ - 'language.id', - 'language.label' - ], - type: 'phrase' } } - ] - } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } + } + ] } - ] + } } } - } - }, - { - nested: { - path: 'holdings', - query: { + ] + } + }, + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'English', - fields: [], + fields: [ + 'language.id', + 'language.label' + ], type: 'phrase' } } ] } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'English', + fields: [], + type: 'phrase' + } + } + ] + } + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'English', + fields: [], + type: 'phrase' + } + } + ] + } + } + } } - } + ] } - ] - } + } + ] } - ] - } + } + ] } } const ternaryBooleanQuery = { - query: { - bool: { - should: [ - { - bool: { - must: [ - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [ - 'creatorLiteral', - 'creatorLiteral.folded', - 'contributorLiteral.folded', - 'parallelCreatorLiteral.folded', - 'parallelContributorLiteral.folded' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Shakespeare', - fields: [], + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], type: 'phrase' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } + } + ] } - ] - } - } - } - } - ] - } - }, - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [ - 'language.id', - 'language.label' - ], - type: 'phrase' } } - ] - } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } + } + ] } - ] + } } } - } - }, - { - nested: { - path: 'holdings', - query: { + ] + } + }, + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'English', - fields: [], + fields: [ + 'language.id', + 'language.label' + ], type: 'phrase' } } ] } - } - } - } - ] - } - } - ] - } - }, - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'tragedy', - fields: [ - 'genreForm.raw' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ + }, { - multi_match: { - query: 'tragedy', - fields: [], - type: 'phrase' + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'English', + fields: [], + type: 'phrase' + } + } + ] + } + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'English', + fields: [], + type: 'phrase' + } + } + ] + } + } } } ] } } - } - }, - { - nested: { - path: 'holdings', - query: { + ] + } + }, + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'tragedy', - fields: [], + fields: [ + 'genreForm.raw' + ], type: 'phrase' } } ] } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'tragedy', + fields: [], + type: 'phrase' + } + } + ] + } + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'tragedy', + fields: [], + type: 'phrase' + } + } + ] + } + } + } } - } + ] } - ] - } + } + ] } - ] - } + } + ] } } const queryWithParentheses = { - query: { - bool: { - must: [ - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [ - 'creatorLiteral', - 'creatorLiteral.folded', - 'contributorLiteral.folded', - 'parallelCreatorLiteral.folded', - 'parallelContributorLiteral.folded' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } - } - }, - { - nested: { - path: 'holdings', - query: { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Shakespeare', - fields: [], + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], type: 'phrase' } } ] } - } - } - } - ] - } - }, - { - bool: { - should: [ - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [ - 'language.id', - 'language.label' - ], - type: 'phrase' + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } } - } - ] + ] + } } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } - ] - } + } + ] } } - }, - { - nested: { - path: 'holdings', - query: { + } + } + ] + } + }, + { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'English', - fields: [], + fields: [ + 'language.id', + 'language.label' + ], type: 'phrase' } } ] } - } - } - } - ] - } - }, - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'tragedy', - fields: [ - 'genreForm.raw' - ], - type: 'phrase' + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'English', + fields: [], + type: 'phrase' + } + } + ] + } } } - ] - } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'tragedy', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'English', + fields: [], + type: 'phrase' + } + } + ] } - ] + } } } - } - }, - { - nested: { - path: 'holdings', - query: { + ] + } + }, + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'tragedy', - fields: [], + fields: [ + 'genreForm.raw' + ], type: 'phrase' } } ] } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'tragedy', + fields: [], + type: 'phrase' + } + } + ] + } + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'tragedy', + fields: [], + type: 'phrase' + } + } + ] + } + } + } } - } + ] } - ] - } + } + ] } - ] - } + } + ] } - ] - } + } + ] } } const negationQuery = { - query: { - bool: { - must: [ - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [ - 'creatorLiteral', - 'creatorLiteral.folded', - 'contributorLiteral.folded', - 'parallelCreatorLiteral.folded', - 'parallelContributorLiteral.folded' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Shakespeare', - fields: [], + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], type: 'phrase' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } + } + ] } - ] - } - } - } - } - ] - } - } - ], - must_not: [ - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [ - 'language.id', - 'language.label' - ], - type: 'phrase' } } - ] - } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } + } + ] } - ] + } } } - } - }, - { - nested: { - path: 'holdings', - query: { + ] + } + } + ], + must_not: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'English', - fields: [], + fields: [ + 'language.id', + 'language.label' + ], type: 'phrase' } } ] } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'English', + fields: [], + type: 'phrase' + } + } + ] + } + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'English', + fields: [], + type: 'phrase' + } + } + ] + } + } + } } - } + ] } - ] - } + } + ] } - ] - } + } + ] } } From d5cee7fae647fe6e862371766ede7666098d5c9c Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 20 Feb 2026 12:15:09 -0500 Subject: [PATCH 68/82] Add date and filter features to cql --- lib/elasticsearch/cql_query_builder.js | 6 +- test/cql_query_builder.test.js | 60 +++++- test/fixtures/cql_fixtures.js | 277 ++++++++++++++++++++++++- 3 files changed, 338 insertions(+), 5 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 494eab8f..fe16e03e 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -10,9 +10,9 @@ function buildEsQuery (cqlQuery, request = null) { buildEsQueryFromTree( parseWithRightCql(cqlQuery.trim()) ) - ] - }, - ...filterQuery + ], + ...filterQuery + } } } diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 0dfdc599..3139eda9 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -1,6 +1,7 @@ const { expect } = require('chai') const { buildEsQuery } = require('../lib/elasticsearch/cql_query_builder') +const ApiRequest = require('../lib/api-request') const { simpleAdjQuery, simpleAnyQuery, @@ -14,7 +15,14 @@ const { binaryBooleanQuery, ternaryBooleanQuery, queryWithParentheses, - negationQuery + negationQuery, + dateBeforeQuery, + dateBeforeOrOnQuery, + dateAfterQuery, + dateAfterOrOnQuery, + dateWithinQuery, + dateEnclosesQuery, + filterQuery } = require('./fixtures/cql_fixtures') describe('CQL Query Builder', function () { @@ -122,4 +130,54 @@ describe('CQL Query Builder', function () { negationQuery ) }) + + it('Date after query', function () { + expect(buildEsQuery('date > "1990"')) + .to.deep.equal( + dateAfterQuery + ) + }) + + it('Date after or on query', function () { + expect(buildEsQuery('date >= "1990"')) + .to.deep.equal( + dateAfterOrOnQuery + ) + }) + + it('Date before query', function () { + expect(buildEsQuery('date < "1990"')) + .to.deep.equal( + dateBeforeQuery + ) + }) + + it('Date dateBeforeOrOnQuery query', function () { + expect(buildEsQuery('date <= "1990"')) + .to.deep.equal( + dateBeforeOrOnQuery + ) + }) + + it('Date within query', function () { + expect(buildEsQuery('date within "1990 2000"')) + .to.deep.equal( + dateWithinQuery + ) + }) + + it('Date encloses query', function () { + expect(buildEsQuery('date encloses "1990 2000"')) + .to.deep.equal( + dateEnclosesQuery + ) + }) + + it('Query with applied filters', function () { + const apiRequest = new ApiRequest({ filters: { language: ['Klingon'] }, search_scope: 'cql' }) + expect(buildEsQuery('author="Shakespeare"', apiRequest)) + .to.deep.equal( + filterQuery + ) + }) }) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index ec2a7cd7..9c59d2d2 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -1485,6 +1485,274 @@ const negationQuery = { } } +const dateAfterQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { gt: '1990' } } } + } + } + ] + } + }, + { + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } + } + ] + } + } + ] + } +} + +const dateBeforeQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { lt: '1990' } } } + } + } + ] + } + }, + { + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } + } + ] + } + } + ] + } +} + +const dateBeforeOrOnQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { lte: '1990' } } } + } + } + ] + } + }, + { + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } + } + ] + } + } + ] + } +} + +const dateAfterOrOnQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { gte: '1990' } } } + } + } + ] + } + }, + { + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } + } + ] + } + } + ] + } +} + +const dateWithinQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { + range: { 'dates.range': { gte: '1990', lte: '2000' } } + } + } + } + ] + } + }, + { + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } + } + ] + } + } + ] + } +} + +const dateEnclosesQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { + range: { 'dates.range': { gt: '1990', lt: '2000' } } + } + } + } + ] + } + }, + { + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } + } + ] + } + } + ] + } +} + +const filterQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], + type: 'phrase' + } + } + ] + } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } + } + ] + } + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } + } + ] + } + } + } + } + ] + } + } + ], + filter: [ + { + bool: { + should: [ + { term: { 'language.id': 'Klingon' } }, + { term: { 'language.label': 'Klingon' } } + ] + } + } + ] + } +} + module.exports = { simpleAdjQuery, simpleAnyQuery, @@ -1498,5 +1766,12 @@ module.exports = { binaryBooleanQuery, ternaryBooleanQuery, queryWithParentheses, - negationQuery + negationQuery, + dateBeforeQuery, + dateBeforeOrOnQuery, + dateAfterQuery, + dateAfterOrOnQuery, + dateWithinQuery, + dateEnclosesQuery, + filterQuery } From e2e43d00f09c5f180a898083a86a46cfd2d41b51 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 20 Feb 2026 13:32:43 -0500 Subject: [PATCH 69/82] Add some more useful display of parsing and errors --- lib/elasticsearch/cql_grammar.js | 36 +++++++++++++++++++++++++++++++- lib/resources.js | 20 +++++++++++++++++- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 9d7700bb..011dfa97 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -90,4 +90,38 @@ function parseWithRightCql (string) { return parseRight(string, rightCqlParser) } -module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql } +function display (ast) { + if (!ast.type.includes('query')) { + return ast.text + } + + const childTypes = [ + 'atomic_query', 'sub_query', 'query', 'connective', + 'scope', 'relation', 'quoted_term' + ] + + const children = ast.children + .filter(child => childTypes.includes(child.type)) + .map(child => display(child)) + + if (children.length === 1) { + return children[0] + } + + return children +} + +function displayParsed (string) { + const parsed = rightCqlParser.getAST(reverseString(string)) + if (!parsed) return {} + if (parsed.errors.length) { + return { + error: parsed.errors.map(error => + `Parsing error likely near end of "${reverseString(error.token.rest)}"` + ).join("\n") + } + } + return { parsed: display(reverseAST(parsed)) } +} + +module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql, rightCqlParser, reverseString, displayParsed } diff --git a/lib/resources.js b/lib/resources.js index 5ea637e7..83dd2421 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -20,6 +20,7 @@ const { parseParams, deepValue } = require('../lib/util') const ApiRequest = require('./api-request') const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') const cqlQueryBuilder = require('./elasticsearch/cql_query_builder') +const { displayParsed } = require('./elasticsearch/cql_grammar') const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC } = require('./elasticsearch/config') const errors = require('./errors') @@ -644,6 +645,22 @@ module.exports = function (app, _private = null) { app.logger.debug('Parsed params: ', params) + let parsed = {} + + if (params.search_scope === 'cql') { + try { + parsed = displayParsed(params.q) //? + } catch (e) { + throw new IndexSearchError('Unknown parsing error. Error most likely near end of string') + } + if (parsed.error) { + throw new IndexSearchError(parsed.error) + } + if (!parsed.parsed) { + throw new IndexSearchError('Unknown parsing error. Error most likely near end of string') + } + } + let body = buildElasticBody(params) // Strip unnecessary _source fields @@ -682,7 +699,8 @@ module.exports = function (app, _private = null) { resp.debug = { relevanceReport, - query: body + query: body, + ...parsed } return resp }) From 93c042e734c4d35c374e3e1dd89604c6b3ecae58 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 20 Feb 2026 15:52:16 -0500 Subject: [PATCH 70/82] Fix linting --- lib/elasticsearch/cql_grammar.js | 2 +- lib/resources.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 011dfa97..9f9d7b14 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -118,7 +118,7 @@ function displayParsed (string) { return { error: parsed.errors.map(error => `Parsing error likely near end of "${reverseString(error.token.rest)}"` - ).join("\n") + ).join('\n') } } return { parsed: display(reverseAST(parsed)) } diff --git a/lib/resources.js b/lib/resources.js index 83dd2421..3d7ccf7f 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -649,7 +649,7 @@ module.exports = function (app, _private = null) { if (params.search_scope === 'cql') { try { - parsed = displayParsed(params.q) //? + parsed = displayParsed(params.q) // ? } catch (e) { throw new IndexSearchError('Unknown parsing error. Error most likely near end of string') } From 0ebe3bd4b465aae525459334d3fe11b923fb657a Mon Sep 17 00:00:00 2001 From: danamansana Date: Mon, 2 Mar 2026 16:19:56 -0500 Subject: [PATCH 71/82] Add new strategy for handling keyword vs text fields --- lib/elasticsearch/cql/index-mapping.js | 74 +- lib/elasticsearch/cql/mapping-from-es.json | 1120 +++++++++++++++++ lib/elasticsearch/cql_query_builder.js | 116 +- test/cql_query_builder.test.js | 10 +- test/fixtures/cql_fixtures.js | 1266 +++++++------------- 5 files changed, 1686 insertions(+), 900 deletions(-) create mode 100644 lib/elasticsearch/cql/mapping-from-es.json diff --git a/lib/elasticsearch/cql/index-mapping.js b/lib/elasticsearch/cql/index-mapping.js index a73f1e55..0befbc87 100644 --- a/lib/elasticsearch/cql/index-mapping.js +++ b/lib/elasticsearch/cql/index-mapping.js @@ -30,9 +30,39 @@ const indexMapping = { 'formerTitle', 'addedAuthorTitle', 'placeOfPublication.folded', - { field: 'items.idBarcode', on: (q) => /\d{6,}/.test(q) }, // Try to detect shelfmark searches (e.g. JFD 16-5143) { field: 'items.shelfMark', on: (q) => /^[A-Z]{1,3} \d{2,}/.test(q) } + ], + exact_fields: [ + 'title.keywordLowercasedStripped', + // missing description + 'subjectLiteral.raw', + 'creatorLiteral.keywordLowercased', + 'contributorLiteral.keywordLowercased', + // note.label is missing + 'publisherLiteral.raw', + 'seriesStatement.raw', + 'titleAlt.raw', + // titleDisplay missing + // contentsTitle missing + // tableOfContents missing + 'genreForm.raw', + 'donor.raw', + // parallelTitle missing + // parallelTitleDisplay missing + 'parallelTitleAlt.raw', + 'parallelSeriesStatement.raw', + 'parallelCreatorLiteral.raw', + // parallelPublisher/parallelPublisherLiteral missing + 'uniformTitle.raw', + 'parallelUniformTitle.raw', + // formerTitle missing + 'addedAuthorTitle.raw', + 'placeOfPublication', + { field: 'items.shelfMark.raw', on: (q) => /^[A-Z]{1,3} \d{2,}/.test(q) } + ], + term: [ + { field: 'items.idBarcode', on: (q) => /\d{6,}/.test(q) } ] }, title: { @@ -53,30 +83,54 @@ const indexMapping = { 'parallelUniformTitle', 'formerTitle', 'addedAuthorTitle' + ], + exact_fields: [ + 'title.keywordLowercasedStripped', + 'seriesStatement.raw', + 'titleAlt.raw', + // titleDisplay missing + // contentsTitle missing + // tableOfContents missing + 'donor.raw', + // parallelTitle missing + // parallelTitleDisplay missing + 'parallelTitleAlt.raw', + 'parallelSeriesStatement.raw', + 'parallelCreatorLiteral.raw', + 'uniformTitle.raw', + 'parallelUniformTitle.raw', + // formerTitle missing + 'addedAuthorTitle.raw', + 'placeOfPublication' ] }, author: { - fields: ['creatorLiteral', 'creatorLiteral.folded', 'contributorLiteral.folded', 'parallelCreatorLiteral.folded', 'parallelContributorLiteral.folded'] + fields: ['creatorLiteral', 'creatorLiteral.folded', 'contributorLiteral.folded', 'parallelCreatorLiteral.folded', 'parallelContributorLiteral.folded'], + exact_fields: [ + 'creatorLiteral.keywordLowercased', 'contributorLiteral.keywordLowercased', + 'parallelCreatorLiteral.raw', 'parallelContributorLiteral.raw' + ] }, callnumber: { - fields: ['shelfMark.keywordLowercased', 'items.shelfMark.keywordLowercased'] + term: ['shelfMark.keywordLowercased', 'items.shelfMark.keywordLowercased'] }, identifier: { prefix: ['identifierV2.value', 'items.shelfMark.keywordLowercased'], term: ['uri', 'items.idBarcode', 'idIsbn.clean', 'idIssn.clean'] }, subject: { - fields: ['subjectLiteral', 'subjectLiteral.folded', 'parallelSubjectLiteral.folded'] + fields: ['subjectLiteral', 'subjectLiteral.folded', 'parallelSubjectLiteral.folded'], + exact_fields: ['subjectLiteral.raw'] }, - language: { fields: ['language.id', 'language.label'] }, + language: { term: ['language.id', 'language.label'] }, date: { fields: ['dates.range'] }, series: { - fields: ['series', 'parallelSeries'] + term: ['series', 'parallelSeries'] }, - genre: { fields: ['genreForm.raw'] }, - center: { fields: ['buildingLocationIds'] }, - division: { fields: ['collectionIds'] }, - format: { fields: ['formatId'] } + genre: { fields: ['genreForm'], exact_fields: ['genreForm.raw'] }, + center: { term: ['buildingLocationIds'] }, + division: { term: ['collectionIds'] }, + format: { term: ['formatId'] } } module.exports = { diff --git a/lib/elasticsearch/cql/mapping-from-es.json b/lib/elasticsearch/cql/mapping-from-es.json new file mode 100644 index 00000000..614a62e3 --- /dev/null +++ b/lib/elasticsearch/cql/mapping-from-es.json @@ -0,0 +1,1120 @@ +{ + "resources-2025-07-07": { + "mappings": { + "dynamic": "strict", + "properties": { + "addedAuthorTitle": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "buildingLocationIds": { + "type": "keyword", + "eager_global_ordinals": true + }, + "carrierType": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "carrierType_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "collectionIds": { + "type": "keyword", + "eager_global_ordinals": true + }, + "contentsTitle": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "contributions": { + "type": "keyword" + }, + "contributorLiteral": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "keywordLowercased": { + "type": "keyword", + "normalizer": "lowercase_normalizer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "contributorLiteralNormalized": { + "type": "keyword", + "fields": { + "keywordLowercased": { + "type": "keyword", + "normalizer": "lowercase_normalizer" + } + } + }, + "contributorLiteralWithoutDates": { + "type": "keyword", + "fields": { + "keywordLowercased": { + "type": "keyword", + "normalizer": "lowercase_normalizer" + } + } + }, + "contributorNameRoleTest": { + "type": "keyword" + }, + "contributor_sort": { + "type": "keyword" + }, + "contributors": { + "properties": { + "label": { + "type": "keyword" + }, + "name": { + "type": "keyword" + }, + "nameRole": { + "type": "keyword" + } + } + }, + "contributorsPacked": { + "type": "keyword" + }, + "contributorsTest": { + "properties": { + "label": { + "type": "keyword" + }, + "name": { + "type": "keyword" + }, + "nameRole": { + "type": "keyword" + }, + "title": { + "type": "keyword" + } + } + }, + "created": { + "type": "date", + "index": false + }, + "createdDecade": { + "type": "short" + }, + "createdString": { + "type": "keyword" + }, + "createdYear": { + "type": "short" + }, + "creatorLiteral": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "keywordLowercased": { + "type": "keyword", + "normalizer": "lowercase_normalizer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "creatorLiteralNormalized": { + "type": "keyword", + "fields": { + "keywordLowercased": { + "type": "keyword", + "normalizer": "lowercase_normalizer" + } + } + }, + "creatorLiteralWithoutDates": { + "type": "keyword", + "fields": { + "keywordLowercased": { + "type": "keyword", + "normalizer": "lowercase_normalizer" + } + } + }, + "creator_sort": { + "type": "keyword" + }, + "dateEndDecade": { + "type": "short" + }, + "dateEndString": { + "type": "keyword" + }, + "dateEndYear": { + "type": "short" + }, + "dateStartDecade": { + "type": "short" + }, + "dateStartString": { + "type": "keyword" + }, + "dateStartYear": { + "type": "short" + }, + "dateString": { + "type": "keyword" + }, + "dates": { + "type": "nested", + "properties": { + "range": { + "type": "date_range" + }, + "raw": { + "type": "keyword" + }, + "tag": { + "type": "keyword" + } + } + }, + "depiction": { + "type": "keyword" + }, + "description": { + "type": "text", + "fields": { + "foldedStemmed": { + "type": "text", + "analyzer": "folding_stemming_analyzer" + } + } + }, + "dimensions": { + "type": "keyword" + }, + "donor": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "editionStatement": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "electronicResources": { + "properties": { + "label": { + "type": "keyword" + }, + "url": { + "type": "keyword" + } + } + }, + "extent": { + "type": "keyword" + }, + "formatId": { + "type": "keyword" + }, + "formerTitle": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "genreForm": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "holdings": { + "type": "nested", + "properties": { + "checkInBoxes": { + "type": "nested", + "properties": { + "copies": { + "type": "short" + }, + "coverage": { + "type": "keyword" + }, + "position": { + "type": "short" + }, + "shelfMark": { + "type": "keyword", + "index": false + }, + "status": { + "type": "keyword" + }, + "type": { + "type": "keyword" + } + } + }, + "format": { + "type": "keyword" + }, + "holdingStatement": { + "type": "keyword" + }, + "identifier": { + "properties": { + "identifierStatus": { + "type": "keyword" + }, + "type": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "location": { + "properties": { + "code": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "notes": { + "type": "keyword" + }, + "physicalLocation": { + "type": "keyword", + "index": false + }, + "shelfMark": { + "type": "keyword" + }, + "uri": { + "type": "keyword" + } + } + }, + "idIsbn": { + "type": "keyword", + "fields": { + "clean": { + "type": "keyword", + "ignore_above": 256, + "normalizer": "identifier_normalizer" + } + } + }, + "idIsbn_clean": { + "type": "keyword" + }, + "idIssn": { + "type": "keyword", + "fields": { + "clean": { + "type": "keyword", + "ignore_above": 256, + "normalizer": "identifier_normalizer" + } + } + }, + "idLcc": { + "type": "keyword" + }, + "idLccSort": { + "type": "keyword" + }, + "idLccn": { + "type": "keyword", + "normalizer": "punctuation_and_lowercase_normalizer" + }, + "idOclc": { + "type": "keyword" + }, + "idOwi": { + "type": "keyword" + }, + "identifier": { + "type": "keyword" + }, + "identifierV2": { + "properties": { + "identifierStatus": { + "type": "keyword", + "normalizer": "punctuation_and_lowercase_normalizer" + }, + "type": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "issuance": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "issuance_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "items": { + "type": "nested", + "properties": { + "accessMessage": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "accessMessage_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "aeonUrl": { + "type": "keyword" + }, + "catalogItemType": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "catalogItemType_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "dateRange": { + "type": "date_range", + "format": "yyyy-MM-dd||yyyy-MM||yyyy" + }, + "dateRaw": { + "type": "text" + }, + "deliveryLocation": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "deliveryLocation_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "dueDate": { + "type": "date" + }, + "electronicLocator": { + "properties": { + "label": { + "type": "keyword", + "index": false + }, + "url": { + "type": "keyword" + } + } + }, + "enumerationChronology": { + "type": "keyword" + }, + "enumerationChronology_sort": { + "type": "keyword" + }, + "formatLiteral": { + "type": "keyword" + }, + "holdingLocation": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "holdingLocation_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "idBarcode": { + "type": "keyword" + }, + "identifier": { + "type": "keyword" + }, + "identifierV2": { + "properties": { + "type": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "location": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "m2CustomerCode": { + "type": "keyword" + }, + "owner": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "owner_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "physicalLocation": { + "type": "keyword" + }, + "recapCustomerCode": { + "type": "keyword" + }, + "requestable": { + "type": "boolean" + }, + "shelfMark": { + "type": "text", + "fields": { + "keywordLowercased": { + "type": "keyword", + "normalizer": "shelfmark_normalizer" + }, + "raw": { + "type": "keyword" + } + } + }, + "shelfMark_sort": { + "type": "keyword" + }, + "status": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "status_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "type": { + "type": "keyword" + }, + "uri": { + "type": "keyword" + }, + "volumeRange": { + "type": "integer_range" + }, + "volumeRaw": { + "type": "text" + } + } + }, + "language": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "language_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "lccClassification": { + "type": "keyword" + }, + "materialType": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "materialType_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "mediaType": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "mediaType_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "note": { + "properties": { + "label": { + "type": "text", + "fields": { + "foldedStemmed": { + "type": "text", + "analyzer": "folding_stemming_analyzer" + } + } + }, + "noteType": { + "type": "keyword" + }, + "type": { + "type": "keyword", + "index": false + } + } + }, + "numAvailable": { + "type": "short" + }, + "numCheckinCardItems": { + "type": "short" + }, + "numElectronicResources": { + "type": "short" + }, + "numItemDatesParsed": { + "type": "short" + }, + "numItemVolumesParsed": { + "type": "short" + }, + "numItems": { + "type": "short" + }, + "numItemsTotal": { + "type": "short" + }, + "nyplSource": { + "type": "keyword" + }, + "parallelAddedAuthorTitle": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "parallelContributorLiteral": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "parallelCreatorLiteral": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "parallelDescription": { + "type": "text", + "fields": { + "foldedStemmed": { + "type": "text", + "analyzer": "folding_stemming_analyzer" + } + } + }, + "parallelDisplayField": { + "properties": { + "fieldName": { + "type": "keyword" + }, + "index": { + "type": "short" + }, + "value": { + "type": "text" + } + } + }, + "parallelEditionStatement": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "parallelNote": { + "properties": { + "label": { + "type": "text", + "fields": { + "foldedStemmed": { + "type": "text", + "analyzer": "folding_stemming_analyzer" + } + } + }, + "noteType": { + "type": "keyword" + }, + "type": { + "type": "keyword", + "index": false + } + } + }, + "parallelPlaceOfPublication": { + "type": "keyword", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "parallelPublicationStatement": { + "type": "keyword", + "index": false + }, + "parallelPublisher": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "parallelPublisherLiteral": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "parallelSeries": { + "type": "keyword" + }, + "parallelSeriesStatement": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "parallelSubjectLiteral": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "parallelSummary": { + "type": "text", + "fields": { + "foldedStemmed": { + "type": "text", + "analyzer": "folding_stemming_analyzer" + } + } + }, + "parallelTableOfContents": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "parallelTitle": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "parallelTitleAlt": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "parallelTitleDisplay": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "parallelUniformTitle": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "partOf": { + "type": "keyword" + }, + "physicalDescription": { + "type": "keyword", + "index": false + }, + "placeOfPublication": { + "type": "keyword", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "popularity": { + "type": "short" + }, + "publicDomain": { + "type": "boolean" + }, + "publicationStatement": { + "type": "keyword", + "index": false + }, + "publisherLiteral": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "recordType": { + "type": "keyword" + }, + "recordTypeId": { + "type": "keyword" + }, + "serialPublicationDates": { + "type": "keyword", + "index": false + }, + "series": { + "type": "keyword" + }, + "seriesStatement": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "shelfMark": { + "type": "text", + "fields": { + "keywordLowercased": { + "type": "keyword", + "normalizer": "shelfmark_normalizer" + }, + "raw": { + "type": "keyword" + } + } + }, + "subjectLiteral": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "subjectLiteral_exploded": { + "type": "keyword" + }, + "summary": { + "type": "text", + "fields": { + "foldedStemmed": { + "type": "text", + "analyzer": "folding_stemming_analyzer" + } + } + }, + "supplementaryContent": { + "properties": { + "label": { + "type": "keyword", + "index": false + }, + "url": { + "type": "keyword" + } + } + }, + "suppressed": { + "type": "boolean" + }, + "tableOfContents": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "title": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "foldedStemmed": { + "type": "text", + "analyzer": "folding_stemming_analyzer" + }, + "keyword": { + "type": "keyword", + "ignore_above": 256 + }, + "keywordLowercased": { + "type": "keyword", + "ignore_above": 256, + "normalizer": "lowercase_normalizer" + }, + "keywordLowercasedStripped": { + "type": "keyword", + "ignore_above": 256, + "normalizer": "punctuation_and_lowercase_normalizer" + }, + "shingle": { + "type": "text", + "analyzer": "shingles_analyzer" + } + } + }, + "titleAlt": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "titleDisplay": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "title_sort": { + "type": "keyword" + }, + "type": { + "type": "keyword" + }, + "uniformTitle": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "updatedAt": { + "type": "date" + }, + "uri": { + "type": "keyword" + }, + "uris": { + "type": "keyword" + } + } + } + } +} diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index fe16e03e..8555738f 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -185,40 +185,64 @@ function buildAtomicNested (name, { fields, relation, terms, term }) { - put all terms in term matches with term fields */ function buildAtomicMain ({ fields, relation, terms, term }) { + console.log('building atomic main ', relation, terms, term) + return anyAllQueries({ fields, relation, terms }) || + adjEqQueries({ fields, relation, terms, term }) || + dateQueries({ fields, relation, terms, term }) +} + +function anyAllQueries ({ fields, relation, terms }) { + if (!['any', 'all'].includes(relation)) { return null } + const operator = (relation === 'any' ? 'should' : 'must') return { bool: { - should: [ - ...anyAllQueries({ fields, relation, terms, term }), - ...adjEqQueries({ fields, relation, terms, term }), - ...termQueriesForTermFields({ fields, relation, terms, term }), - ...prefixQueriesForPrefixFields({ fields, relation, terms, term }), - ...dateQueries({ fields, relation, terms, term }) - ] + [operator]: terms.map(term => matchTermWithFields(fields, term, 'cross_fields')) } } } -function anyAllQueries ({ fields, relation, terms, term }) { - if (!['any', 'all'].includes(relation)) { return [] } - const fieldsToUse = fields.fields - return [ - multiMatch(fieldsToUse, relation, terms.filter(term => !term.startsWith('^'))), - ...(terms.filter(term => term.startsWith('^')).map(term => phrasePrefixQuery(fieldsToUse, term.slice(1)))) - ].filter(q => q) +function adjEqQueries ({ fields, relation, terms, term }) { + if (!['=', '==', 'adj'].includes(relation)) { return null } + const type = (relation === '==') ? 'exact' : 'phrase' + return matchTermWithFields(fields, term, type) } -function adjEqQueries ({ fields, relation, terms, term }) { - if (!['=', 'adj'].includes(relation)) { return [] } - const fieldsToUse = fields.fields - return [ - term.startsWith('^') - ? phrasePrefixQuery(fieldsToUse, term.slice(1)) - : phraseQuery(fieldsToUse, term) - ].filter(q => q) +const table = { + exact: { term: 'term', prefix: 'prefix', fields: 'X', exact_fields: 'term' }, + prefix: { term: 'prefix', prefix: 'prefix', fields: 'X', exact_fields: 'prefix' }, + basic: { term: 'term', prefix: 'prefix', fields: 'multi_match', exact_fields: 'X' } +} + +const selectFields = (queryType, fields) => (selector) => { + return Object.entries(fields) + .filter(([fieldType, fieldNames]) => { + return table[queryType][fieldType] === selector + }) + .map(([fieldType, fieldNames]) => fieldNames) + .flat() +} + +function matchTermWithFields (fields, term, type) { + const queryType = term[0] === '^' ? 'prefix' : (type === 'exact' ? 'exact' : 'basic') + if (term[0] === '^') term = term.slice(1) + + const selector = selectFields(queryType, fields) + + const queries = [ + ...multiMatch(selector('multi_match'), term, type), + ...(selector('term').map(termField => termQuery(termField, term))), + ...(selector('prefix').map(prefixField => prefixQuery(prefixField, term))) + ] + + return { + bool: { + should: queries + } + } } function dateQueries ({ fields, relation, terms, term }) { - if (!Object.values(fields).some(fieldType => fieldType.some(field => field.includes('date')))) { return [] } + if (!Object.values(fields).some(fieldType => fieldType.some(field => field.includes('date')))) { return null } let range switch (relation) { case '<': @@ -257,16 +281,6 @@ function dateQueries ({ fields, relation, terms, term }) { ] } -function prefixQueriesForPrefixFields ({ fields, relation, terms, term }) { - if (!fields.prefix) return [] - return fields.prefix.map(field => prefixQuery(field, term)) -} - -function termQueriesForTermFields ({ fields, relation, terms, term }) { - if (!fields.term) return [] - return fields.term.map(field => termQuery(field, term)) -} - function termQuery (field, term) { return { term: { [field]: term } } } @@ -275,38 +289,16 @@ function prefixQuery (field, term) { return { prefix: { [field]: term } } } -function multiMatch (fields, relation, terms) { - if (!fields) return - return { - multi_match: { - query: terms.join(' '), - fields, - type: 'cross_fields', - operator: relation === 'any' ? 'or' : 'and' - } - } -} - -function phrasePrefixQuery (fields, term) { - if (!fields) return - return { - multi_match: { - query: term, - fields, - type: 'phrase_prefix' - } - } -} +function multiMatch (fields, term, type) { + if (!fields || !fields.length) return [] -function phraseQuery (fields, term) { - if (!fields) return - return { + return [{ multi_match: { query: term, fields, - type: 'phrase' + type } - } + }] } module.exports = { @@ -315,5 +307,7 @@ module.exports = { buildBoolean, buildAtomic, buildAtomicMain, - nestedMapAndFilter + nestedMapAndFilter, + selectFields, + indexMapping } diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 3139eda9..cd55ec4a 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -22,7 +22,8 @@ const { dateAfterOrOnQuery, dateWithinQuery, dateEnclosesQuery, - filterQuery + filterQuery, + multiAdjQuery } = require('./fixtures/cql_fixtures') describe('CQL Query Builder', function () { @@ -40,6 +41,13 @@ describe('CQL Query Builder', function () { ) }) + it('Multi-word adj query', function () { + expect(buildEsQuery('title adj "Hamlet, Prince"')) + .to.deep.equal( + multiAdjQuery + ) + }) + it('Simple any query', function () { expect(buildEsQuery('title any "Hamlet Othello"')) .to.deep.equal( diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index 9c59d2d2..0ca5bae9 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -35,40 +35,10 @@ const simpleAdjQuery = { } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -77,7 +47,7 @@ const simpleAdjQuery = { } } -const prefixPhraseQuery = { +const multiAdjQuery = { bool: { should: [ { @@ -88,7 +58,7 @@ const prefixPhraseQuery = { should: [ { multi_match: { - query: 'The Tragedy of Hamlet, Prince of Denmark', + query: 'Hamlet, Prince', fields: [ 'title', 'title.folded', @@ -107,47 +77,97 @@ const prefixPhraseQuery = { 'formerTitle', 'addedAuthorTitle' ], - type: 'phrase_prefix' + type: 'phrase' } } ] } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'The Tragedy of Hamlet, Prince of Denmark', - fields: [], - type: 'phrase_prefix' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'The Tragedy of Hamlet, Prince of Denmark', - fields: [], - type: 'phrase_prefix' - } - } - ] + nested: { path: 'holdings', query: { bool: { should: [] } } } + } + ] + } + } + ] + } +} + +const prefixPhraseQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + prefix: { + 'title.keywordLowercasedStripped': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'seriesStatement.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'titleAlt.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'donor.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'parallelTitleAlt.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'parallelSeriesStatement.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'parallelCreatorLiteral.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'uniformTitle.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'parallelUniformTitle.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'addedAuthorTitle.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + placeOfPublication: 'The Tragedy of Hamlet, Prince of Denmark' + } } - } + ] } + }, + { + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -166,28 +186,63 @@ const simpleAnyQuery = { bool: { should: [ { - multi_match: { - query: 'Hamlet Othello', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'cross_fields', - operator: 'or' + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + }, + { + bool: { + should: [ + { + multi_match: { + query: 'Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] } } ] @@ -199,14 +254,8 @@ const simpleAnyQuery = { query: { bool: { should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'or' - } - } + { bool: { should: [] } }, + { bool: { should: [] } } ] } } @@ -218,20 +267,13 @@ const simpleAnyQuery = { query: { bool: { should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'or' - } - } + { bool: { should: [] } }, + { bool: { should: [] } } ] } } } } - ] } } @@ -249,76 +291,127 @@ const anyWithPrefixQuery = { bool: { should: [ { - multi_match: { - query: 'Hamlet Othello', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'cross_fields', - operator: 'or' + bool: { + should: [ + { + prefix: { + 'title.keywordLowercasedStripped': 'Tragedy' + } + }, + { + prefix: { 'seriesStatement.raw': 'Tragedy' } + }, + { prefix: { 'titleAlt.raw': 'Tragedy' } }, + { prefix: { 'donor.raw': 'Tragedy' } }, + { + prefix: { 'parallelTitleAlt.raw': 'Tragedy' } + }, + { + prefix: { 'parallelSeriesStatement.raw': 'Tragedy' } + }, + { + prefix: { 'parallelCreatorLiteral.raw': 'Tragedy' } + }, + { prefix: { 'uniformTitle.raw': 'Tragedy' } }, + { + prefix: { 'parallelUniformTitle.raw': 'Tragedy' } + }, + { + prefix: { 'addedAuthorTitle.raw': 'Tragedy' } + }, + { prefix: { placeOfPublication: 'Tragedy' } } + ] } }, { - multi_match: { - query: 'Tragedy', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'phrase_prefix' + bool: { + should: [ + { + prefix: { + 'title.keywordLowercasedStripped': 'Comedy' + } + }, + { prefix: { 'seriesStatement.raw': 'Comedy' } }, + { prefix: { 'titleAlt.raw': 'Comedy' } }, + { prefix: { 'donor.raw': 'Comedy' } }, + { + prefix: { 'parallelTitleAlt.raw': 'Comedy' } + }, + { + prefix: { 'parallelSeriesStatement.raw': 'Comedy' } + }, + { + prefix: { 'parallelCreatorLiteral.raw': 'Comedy' } + }, + { prefix: { 'uniformTitle.raw': 'Comedy' } }, + { + prefix: { 'parallelUniformTitle.raw': 'Comedy' } + }, + { + prefix: { 'addedAuthorTitle.raw': 'Comedy' } + }, + { prefix: { placeOfPublication: 'Comedy' } } + ] } }, { - multi_match: { - query: 'Comedy', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'phrase_prefix' + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + }, + { + bool: { + should: [ + { + multi_match: { + query: 'Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] } } ] @@ -330,28 +423,10 @@ const anyWithPrefixQuery = { query: { bool: { should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'or' - } - }, - { - multi_match: { - query: 'Tragedy', - fields: [], - type: 'phrase_prefix' - } - }, - { - multi_match: { - query: 'Comedy', - fields: [], - type: 'phrase_prefix' - } - } + { bool: { should: [] } }, + { bool: { should: [] } }, + { bool: { should: [] } }, + { bool: { should: [] } } ] } } @@ -363,28 +438,10 @@ const anyWithPrefixQuery = { query: { bool: { should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'or' - } - }, - { - multi_match: { - query: 'Tragedy', - fields: [], - type: 'phrase_prefix' - } - }, - { - multi_match: { - query: 'Comedy', - fields: [], - type: 'phrase_prefix' - } - } + { bool: { should: [] } }, + { bool: { should: [] } }, + { bool: { should: [] } }, + { bool: { should: [] } } ] } } @@ -405,30 +462,65 @@ const simpleAllQuery = { should: [ { bool: { - should: [ + must: [ { - multi_match: { - query: 'Hamlet Othello', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'cross_fields', - operator: 'and' + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + }, + { + bool: { + should: [ + { + multi_match: { + query: 'Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] } } ] @@ -439,15 +531,9 @@ const simpleAllQuery = { path: 'items', query: { bool: { - should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'and' - } - } + must: [ + { bool: { should: [] } }, + { bool: { should: [] } } ] } } @@ -458,15 +544,9 @@ const simpleAllQuery = { path: 'holdings', query: { bool: { - should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'and' - } - } + must: [ + { bool: { should: [] } }, + { bool: { should: [] } } ] } } @@ -533,38 +613,13 @@ const keywordQueryForBarcode = { path: 'items', query: { bool: { - should: [ - { - multi_match: { - query: '123456', - fields: [ - 'items.idBarcode' - ], - type: 'phrase' - } - } - ] + should: [{ term: { 'items.idBarcode': '123456' } }] } } } }, { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: '123456', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -631,9 +686,7 @@ const keywordQueryForShelfMark = { { multi_match: { query: 'B 12', - fields: [ - 'items.shelfMark' - ], + fields: ['items.shelfMark'], type: 'phrase' } } @@ -643,22 +696,7 @@ const keywordQueryForShelfMark = { } }, { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'B 12', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -713,44 +751,14 @@ const keywordQueryForGeneralTerm = { type: 'phrase' } } - ] - } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet', - fields: [], - type: 'phrase' - } - } - ] - } - } + ] } }, { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -768,26 +776,10 @@ const identifierQuery = { { bool: { should: [ - { - term: { - uri: 'b1234' - } - }, - { - term: { - 'idIsbn.clean': 'b1234' - } - }, - { - term: { - 'idIssn.clean': 'b1234' - } - }, - { - prefix: { - 'identifierV2.value': 'b1234' - } - } + { term: { uri: 'b1234' } }, + { term: { 'idIsbn.clean': 'b1234' } }, + { term: { 'idIssn.clean': 'b1234' } }, + { prefix: { 'identifierV2.value': 'b1234' } } ] } }, @@ -797,11 +789,7 @@ const identifierQuery = { query: { bool: { should: [ - { - term: { - 'items.idBarcode': 'b1234' - } - }, + { term: { 'items.idBarcode': 'b1234' } }, { prefix: { 'items.shelfMark.keywordLowercased': 'b1234' @@ -813,14 +801,7 @@ const identifierQuery = { } }, { - nested: { - path: 'holdings', - query: { - bool: { - should: [] - } - } - } + nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -858,39 +839,12 @@ const binaryBooleanQuery = { } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -902,53 +856,18 @@ const binaryBooleanQuery = { { bool: { should: [ - { - multi_match: { - query: 'English', - fields: [ - 'language.id', - 'language.label' - ], - type: 'phrase' - } - } + { term: { 'language.id': 'English' } }, + { term: { 'language.label': 'English' } } ] } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -995,37 +914,13 @@ const ternaryBooleanQuery = { { nested: { path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -1037,53 +932,21 @@ const ternaryBooleanQuery = { { bool: { should: [ - { - multi_match: { - query: 'English', - fields: [ - 'language.id', - 'language.label' - ], - type: 'phrase' - } - } + { term: { 'language.id': 'English' } }, + { term: { 'language.label': 'English' } } ] } }, { nested: { path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -1101,9 +964,7 @@ const ternaryBooleanQuery = { { multi_match: { query: 'tragedy', - fields: [ - 'genreForm.raw' - ], + fields: ['genreForm'], type: 'phrase' } } @@ -1111,39 +972,12 @@ const ternaryBooleanQuery = { } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'tragedy', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'tragedy', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -1185,39 +1019,12 @@ const queryWithParentheses = { } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -1232,53 +1039,21 @@ const queryWithParentheses = { { bool: { should: [ - { - multi_match: { - query: 'English', - fields: [ - 'language.id', - 'language.label' - ], - type: 'phrase' - } - } + { term: { 'language.id': 'English' } }, + { term: { 'language.label': 'English' } } ] } }, { nested: { path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -1293,9 +1068,7 @@ const queryWithParentheses = { { multi_match: { query: 'tragedy', - fields: [ - 'genreForm.raw' - ], + fields: ['genreForm'], type: 'phrase' } } @@ -1305,37 +1078,13 @@ const queryWithParentheses = { { nested: { path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'tragedy', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'tragedy', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -1380,39 +1129,12 @@ const negationQuery = { } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -1426,53 +1148,18 @@ const negationQuery = { { bool: { should: [ - { - multi_match: { - query: 'English', - fields: [ - 'language.id', - 'language.label' - ], - type: 'phrase' - } - } + { term: { 'language.id': 'English' } }, + { term: { 'language.label': 'English' } } ] } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -1491,24 +1178,16 @@ const dateAfterQuery = { { bool: { should: [ - { - bool: { - should: [ - { - nested: { - path: 'dates', - query: { range: { 'dates.range': { gt: '1990' } } } - } - } - ] + [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { gt: '1990' } } } + } } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } - } + ], + { nested: { path: 'items', query: null } }, + { nested: { path: 'holdings', query: null } } ] } } @@ -1522,24 +1201,16 @@ const dateBeforeQuery = { { bool: { should: [ - { - bool: { - should: [ - { - nested: { - path: 'dates', - query: { range: { 'dates.range': { lt: '1990' } } } - } - } - ] + [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { lt: '1990' } } } + } } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } - } + ], + { nested: { path: 'items', query: null } }, + { nested: { path: 'holdings', query: null } } ] } } @@ -1553,24 +1224,16 @@ const dateBeforeOrOnQuery = { { bool: { should: [ - { - bool: { - should: [ - { - nested: { - path: 'dates', - query: { range: { 'dates.range': { lte: '1990' } } } - } - } - ] + [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { lte: '1990' } } } + } } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } - } + ], + { nested: { path: 'items', query: null } }, + { nested: { path: 'holdings', query: null } } ] } } @@ -1584,24 +1247,16 @@ const dateAfterOrOnQuery = { { bool: { should: [ - { - bool: { - should: [ - { - nested: { - path: 'dates', - query: { range: { 'dates.range': { gte: '1990' } } } - } - } - ] + [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { gte: '1990' } } } + } } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } - } + ], + { nested: { path: 'items', query: null } }, + { nested: { path: 'holdings', query: null } } ] } } @@ -1615,26 +1270,18 @@ const dateWithinQuery = { { bool: { should: [ - { - bool: { - should: [ - { - nested: { - path: 'dates', - query: { - range: { 'dates.range': { gte: '1990', lte: '2000' } } - } - } + [ + { + nested: { + path: 'dates', + query: { + range: { 'dates.range': { gte: '1990', lte: '2000' } } } - ] + } } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } - } + ], + { nested: { path: 'items', query: null } }, + { nested: { path: 'holdings', query: null } } ] } } @@ -1648,26 +1295,18 @@ const dateEnclosesQuery = { { bool: { should: [ - { - bool: { - should: [ - { - nested: { - path: 'dates', - query: { - range: { 'dates.range': { gt: '1990', lt: '2000' } } - } - } + [ + { + nested: { + path: 'dates', + query: { + range: { 'dates.range': { gt: '1990', lt: '2000' } } } - ] + } } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } - } + ], + { nested: { path: 'items', query: null } }, + { nested: { path: 'holdings', query: null } } ] } } @@ -1701,40 +1340,10 @@ const filterQuery = { } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -1773,5 +1382,6 @@ module.exports = { dateAfterOrOnQuery, dateWithinQuery, dateEnclosesQuery, - filterQuery + filterQuery, + multiAdjQuery } From b2ebcabaf713206792c6d570a3166ada07d4e739 Mon Sep 17 00:00:00 2001 From: danamansana Date: Mon, 2 Mar 2026 16:25:19 -0500 Subject: [PATCH 72/82] Add exact match query --- lib/elasticsearch/cql_grammar.js | 2 +- lib/elasticsearch/cql_query_builder.js | 1 - test/cql_query_builder.test.js | 10 +++++- test/fixtures/cql_fixtures.js | 48 +++++++++++++++++++++++++- 4 files changed, 57 insertions(+), 4 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 9f9d7b14..65cda1fc 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -22,7 +22,7 @@ const leftCql = ` scope ::= scope_term whitespace | scope_term relation ::= relation_term whitespace | relation_term scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" - relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" + relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "==" | "=" | "within" | "encloses" quoted_term ::= quote phrase quote phrase ::= phrase whitespace word | word whitespace ::= [#x20#x09#x0A#x0D]+ diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 8555738f..fa28f9e1 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -185,7 +185,6 @@ function buildAtomicNested (name, { fields, relation, terms, term }) { - put all terms in term matches with term fields */ function buildAtomicMain ({ fields, relation, terms, term }) { - console.log('building atomic main ', relation, terms, term) return anyAllQueries({ fields, relation, terms }) || adjEqQueries({ fields, relation, terms, term }) || dateQueries({ fields, relation, terms, term }) diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index cd55ec4a..cf856b5c 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -23,7 +23,8 @@ const { dateWithinQuery, dateEnclosesQuery, filterQuery, - multiAdjQuery + multiAdjQuery, + exactMatchQuery } = require('./fixtures/cql_fixtures') describe('CQL Query Builder', function () { @@ -188,4 +189,11 @@ describe('CQL Query Builder', function () { filterQuery ) }) + + it('Exact match query', function () { + expect(buildEsQuery('author == "William Shakespeare"')) + .to.deep.equal( + exactMatchQuery + ) + }) }) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index 0ca5bae9..555e0a45 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -1362,6 +1362,51 @@ const filterQuery = { } } +const exactMatchQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + term: { + 'creatorLiteral.keywordLowercased': 'William Shakespeare' + } + }, + { + term: { + 'contributorLiteral.keywordLowercased': 'William Shakespeare' + } + }, + { + term: { + 'parallelCreatorLiteral.raw': 'William Shakespeare' + } + }, + { + term: { + 'parallelContributorLiteral.raw': 'William Shakespeare' + } + } + ] + } + }, + { + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } + } + ] + } + } + ] + } +} + module.exports = { simpleAdjQuery, simpleAnyQuery, @@ -1383,5 +1428,6 @@ module.exports = { dateWithinQuery, dateEnclosesQuery, filterQuery, - multiAdjQuery + multiAdjQuery, + exactMatchQuery } From 691b974fd9ee12a5c98ca15998db9b143587ad3c Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 10 Mar 2026 14:55:07 -0400 Subject: [PATCH 73/82] Fix deploy yaml --- .github/workflows/test-and-deploy.yml | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/.github/workflows/test-and-deploy.yml b/.github/workflows/test-and-deploy.yml index db2624d0..28b57be9 100644 --- a/.github/workflows/test-and-deploy.yml +++ b/.github/workflows/test-and-deploy.yml @@ -20,19 +20,10 @@ jobs: id-token: write contents: read runs-on: ubuntu-latest - needs: tests -<<<<<<< HEAD - if: github.ref == 'refs/heads/qa2' - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v2 - with: - role-to-assume: arn:aws:iam::946183545209:role/GithubActionsDeployerRole - aws-region: us-east-1 -======= + needs: + - tests if: github.ref == 'refs/heads/qa' steps: ->>>>>>> main - uses: actions/checkout@v4 - name: Set Node version uses: actions/setup-node@v4 From 36e630203776e16104c113d44da10e4cbad21420 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 13 Mar 2026 15:10:51 -0400 Subject: [PATCH 74/82] Fixes in response to PR comments --- lib/elasticsearch/config.js | 2 +- lib/elasticsearch/cql/mapping-from-es.json | 1120 -------------------- lib/elasticsearch/cql_grammar.js | 13 +- lib/elasticsearch/cql_query_builder.js | 52 +- lib/resources.js | 1 - 5 files changed, 49 insertions(+), 1139 deletions(-) delete mode 100644 lib/elasticsearch/cql/mapping-from-es.json diff --git a/lib/elasticsearch/config.js b/lib/elasticsearch/config.js index 494dbb15..806ed729 100644 --- a/lib/elasticsearch/config.js +++ b/lib/elasticsearch/config.js @@ -74,7 +74,7 @@ const SEARCH_SCOPES = { standard_number: { // We do custom field matching for this search-scope }, - cql: {} + cql: {} // see cql/index_mapping for this search scope } const FILTER_CONFIG = { diff --git a/lib/elasticsearch/cql/mapping-from-es.json b/lib/elasticsearch/cql/mapping-from-es.json deleted file mode 100644 index 614a62e3..00000000 --- a/lib/elasticsearch/cql/mapping-from-es.json +++ /dev/null @@ -1,1120 +0,0 @@ -{ - "resources-2025-07-07": { - "mappings": { - "dynamic": "strict", - "properties": { - "addedAuthorTitle": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "buildingLocationIds": { - "type": "keyword", - "eager_global_ordinals": true - }, - "carrierType": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "carrierType_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "collectionIds": { - "type": "keyword", - "eager_global_ordinals": true - }, - "contentsTitle": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "contributions": { - "type": "keyword" - }, - "contributorLiteral": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "keywordLowercased": { - "type": "keyword", - "normalizer": "lowercase_normalizer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "contributorLiteralNormalized": { - "type": "keyword", - "fields": { - "keywordLowercased": { - "type": "keyword", - "normalizer": "lowercase_normalizer" - } - } - }, - "contributorLiteralWithoutDates": { - "type": "keyword", - "fields": { - "keywordLowercased": { - "type": "keyword", - "normalizer": "lowercase_normalizer" - } - } - }, - "contributorNameRoleTest": { - "type": "keyword" - }, - "contributor_sort": { - "type": "keyword" - }, - "contributors": { - "properties": { - "label": { - "type": "keyword" - }, - "name": { - "type": "keyword" - }, - "nameRole": { - "type": "keyword" - } - } - }, - "contributorsPacked": { - "type": "keyword" - }, - "contributorsTest": { - "properties": { - "label": { - "type": "keyword" - }, - "name": { - "type": "keyword" - }, - "nameRole": { - "type": "keyword" - }, - "title": { - "type": "keyword" - } - } - }, - "created": { - "type": "date", - "index": false - }, - "createdDecade": { - "type": "short" - }, - "createdString": { - "type": "keyword" - }, - "createdYear": { - "type": "short" - }, - "creatorLiteral": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "keywordLowercased": { - "type": "keyword", - "normalizer": "lowercase_normalizer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "creatorLiteralNormalized": { - "type": "keyword", - "fields": { - "keywordLowercased": { - "type": "keyword", - "normalizer": "lowercase_normalizer" - } - } - }, - "creatorLiteralWithoutDates": { - "type": "keyword", - "fields": { - "keywordLowercased": { - "type": "keyword", - "normalizer": "lowercase_normalizer" - } - } - }, - "creator_sort": { - "type": "keyword" - }, - "dateEndDecade": { - "type": "short" - }, - "dateEndString": { - "type": "keyword" - }, - "dateEndYear": { - "type": "short" - }, - "dateStartDecade": { - "type": "short" - }, - "dateStartString": { - "type": "keyword" - }, - "dateStartYear": { - "type": "short" - }, - "dateString": { - "type": "keyword" - }, - "dates": { - "type": "nested", - "properties": { - "range": { - "type": "date_range" - }, - "raw": { - "type": "keyword" - }, - "tag": { - "type": "keyword" - } - } - }, - "depiction": { - "type": "keyword" - }, - "description": { - "type": "text", - "fields": { - "foldedStemmed": { - "type": "text", - "analyzer": "folding_stemming_analyzer" - } - } - }, - "dimensions": { - "type": "keyword" - }, - "donor": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "editionStatement": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "electronicResources": { - "properties": { - "label": { - "type": "keyword" - }, - "url": { - "type": "keyword" - } - } - }, - "extent": { - "type": "keyword" - }, - "formatId": { - "type": "keyword" - }, - "formerTitle": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "genreForm": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "holdings": { - "type": "nested", - "properties": { - "checkInBoxes": { - "type": "nested", - "properties": { - "copies": { - "type": "short" - }, - "coverage": { - "type": "keyword" - }, - "position": { - "type": "short" - }, - "shelfMark": { - "type": "keyword", - "index": false - }, - "status": { - "type": "keyword" - }, - "type": { - "type": "keyword" - } - } - }, - "format": { - "type": "keyword" - }, - "holdingStatement": { - "type": "keyword" - }, - "identifier": { - "properties": { - "identifierStatus": { - "type": "keyword" - }, - "type": { - "type": "keyword" - }, - "value": { - "type": "keyword" - } - } - }, - "location": { - "properties": { - "code": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "notes": { - "type": "keyword" - }, - "physicalLocation": { - "type": "keyword", - "index": false - }, - "shelfMark": { - "type": "keyword" - }, - "uri": { - "type": "keyword" - } - } - }, - "idIsbn": { - "type": "keyword", - "fields": { - "clean": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "identifier_normalizer" - } - } - }, - "idIsbn_clean": { - "type": "keyword" - }, - "idIssn": { - "type": "keyword", - "fields": { - "clean": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "identifier_normalizer" - } - } - }, - "idLcc": { - "type": "keyword" - }, - "idLccSort": { - "type": "keyword" - }, - "idLccn": { - "type": "keyword", - "normalizer": "punctuation_and_lowercase_normalizer" - }, - "idOclc": { - "type": "keyword" - }, - "idOwi": { - "type": "keyword" - }, - "identifier": { - "type": "keyword" - }, - "identifierV2": { - "properties": { - "identifierStatus": { - "type": "keyword", - "normalizer": "punctuation_and_lowercase_normalizer" - }, - "type": { - "type": "keyword" - }, - "value": { - "type": "keyword" - } - } - }, - "issuance": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "issuance_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "items": { - "type": "nested", - "properties": { - "accessMessage": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "accessMessage_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "aeonUrl": { - "type": "keyword" - }, - "catalogItemType": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "catalogItemType_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "dateRange": { - "type": "date_range", - "format": "yyyy-MM-dd||yyyy-MM||yyyy" - }, - "dateRaw": { - "type": "text" - }, - "deliveryLocation": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "deliveryLocation_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "dueDate": { - "type": "date" - }, - "electronicLocator": { - "properties": { - "label": { - "type": "keyword", - "index": false - }, - "url": { - "type": "keyword" - } - } - }, - "enumerationChronology": { - "type": "keyword" - }, - "enumerationChronology_sort": { - "type": "keyword" - }, - "formatLiteral": { - "type": "keyword" - }, - "holdingLocation": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "holdingLocation_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "idBarcode": { - "type": "keyword" - }, - "identifier": { - "type": "keyword" - }, - "identifierV2": { - "properties": { - "type": { - "type": "keyword" - }, - "value": { - "type": "keyword" - } - } - }, - "location": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "m2CustomerCode": { - "type": "keyword" - }, - "owner": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "owner_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "physicalLocation": { - "type": "keyword" - }, - "recapCustomerCode": { - "type": "keyword" - }, - "requestable": { - "type": "boolean" - }, - "shelfMark": { - "type": "text", - "fields": { - "keywordLowercased": { - "type": "keyword", - "normalizer": "shelfmark_normalizer" - }, - "raw": { - "type": "keyword" - } - } - }, - "shelfMark_sort": { - "type": "keyword" - }, - "status": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "status_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "type": { - "type": "keyword" - }, - "uri": { - "type": "keyword" - }, - "volumeRange": { - "type": "integer_range" - }, - "volumeRaw": { - "type": "text" - } - } - }, - "language": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "language_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "lccClassification": { - "type": "keyword" - }, - "materialType": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "materialType_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "mediaType": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "mediaType_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "note": { - "properties": { - "label": { - "type": "text", - "fields": { - "foldedStemmed": { - "type": "text", - "analyzer": "folding_stemming_analyzer" - } - } - }, - "noteType": { - "type": "keyword" - }, - "type": { - "type": "keyword", - "index": false - } - } - }, - "numAvailable": { - "type": "short" - }, - "numCheckinCardItems": { - "type": "short" - }, - "numElectronicResources": { - "type": "short" - }, - "numItemDatesParsed": { - "type": "short" - }, - "numItemVolumesParsed": { - "type": "short" - }, - "numItems": { - "type": "short" - }, - "numItemsTotal": { - "type": "short" - }, - "nyplSource": { - "type": "keyword" - }, - "parallelAddedAuthorTitle": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "parallelContributorLiteral": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "parallelCreatorLiteral": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "parallelDescription": { - "type": "text", - "fields": { - "foldedStemmed": { - "type": "text", - "analyzer": "folding_stemming_analyzer" - } - } - }, - "parallelDisplayField": { - "properties": { - "fieldName": { - "type": "keyword" - }, - "index": { - "type": "short" - }, - "value": { - "type": "text" - } - } - }, - "parallelEditionStatement": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "parallelNote": { - "properties": { - "label": { - "type": "text", - "fields": { - "foldedStemmed": { - "type": "text", - "analyzer": "folding_stemming_analyzer" - } - } - }, - "noteType": { - "type": "keyword" - }, - "type": { - "type": "keyword", - "index": false - } - } - }, - "parallelPlaceOfPublication": { - "type": "keyword", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "parallelPublicationStatement": { - "type": "keyword", - "index": false - }, - "parallelPublisher": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "parallelPublisherLiteral": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "parallelSeries": { - "type": "keyword" - }, - "parallelSeriesStatement": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "parallelSubjectLiteral": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "parallelSummary": { - "type": "text", - "fields": { - "foldedStemmed": { - "type": "text", - "analyzer": "folding_stemming_analyzer" - } - } - }, - "parallelTableOfContents": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "parallelTitle": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "parallelTitleAlt": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "parallelTitleDisplay": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "parallelUniformTitle": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "partOf": { - "type": "keyword" - }, - "physicalDescription": { - "type": "keyword", - "index": false - }, - "placeOfPublication": { - "type": "keyword", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "popularity": { - "type": "short" - }, - "publicDomain": { - "type": "boolean" - }, - "publicationStatement": { - "type": "keyword", - "index": false - }, - "publisherLiteral": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "recordType": { - "type": "keyword" - }, - "recordTypeId": { - "type": "keyword" - }, - "serialPublicationDates": { - "type": "keyword", - "index": false - }, - "series": { - "type": "keyword" - }, - "seriesStatement": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "shelfMark": { - "type": "text", - "fields": { - "keywordLowercased": { - "type": "keyword", - "normalizer": "shelfmark_normalizer" - }, - "raw": { - "type": "keyword" - } - } - }, - "subjectLiteral": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "subjectLiteral_exploded": { - "type": "keyword" - }, - "summary": { - "type": "text", - "fields": { - "foldedStemmed": { - "type": "text", - "analyzer": "folding_stemming_analyzer" - } - } - }, - "supplementaryContent": { - "properties": { - "label": { - "type": "keyword", - "index": false - }, - "url": { - "type": "keyword" - } - } - }, - "suppressed": { - "type": "boolean" - }, - "tableOfContents": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "title": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "foldedStemmed": { - "type": "text", - "analyzer": "folding_stemming_analyzer" - }, - "keyword": { - "type": "keyword", - "ignore_above": 256 - }, - "keywordLowercased": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "lowercase_normalizer" - }, - "keywordLowercasedStripped": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "punctuation_and_lowercase_normalizer" - }, - "shingle": { - "type": "text", - "analyzer": "shingles_analyzer" - } - } - }, - "titleAlt": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "titleDisplay": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "title_sort": { - "type": "keyword" - }, - "type": { - "type": "keyword" - }, - "uniformTitle": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "updatedAt": { - "type": "date" - }, - "uri": { - "type": "keyword" - }, - "uris": { - "type": "keyword" - } - } - } - } -} diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 65cda1fc..87b8c042 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -75,6 +75,7 @@ function reverseString (string) { } function reverseAST (tree) { + if (!tree) return null tree.text = reverseString(tree.text) tree.children = tree.children.map(child => reverseAST(child)).reverse() return tree @@ -82,15 +83,16 @@ function reverseAST (tree) { const rightCqlParser = new Grammars.W3C.Parser(rightCql) +// we want to associate operators to the left, but we have a right parser. +// so: reverse the grammar and the input string, then reverse the output function parseRight (string, parser) { return reverseAST(parser.getAST(reverseString(string))) } - function parseWithRightCql (string) { return parseRight(string, rightCqlParser) } -function display (ast) { +function parsedASTtoNestedArray (ast) { if (!ast.type.includes('query')) { return ast.text } @@ -102,7 +104,7 @@ function display (ast) { const children = ast.children .filter(child => childTypes.includes(child.type)) - .map(child => display(child)) + .map(child => parsedASTtoNestedArray(child)) if (children.length === 1) { return children[0] @@ -111,8 +113,9 @@ function display (ast) { return children } +// we need to reverse the error message since `parseWithRightCql` doesn't function displayParsed (string) { - const parsed = rightCqlParser.getAST(reverseString(string)) + const parsed = parseWithRightCql(string) if (!parsed) return {} if (parsed.errors.length) { return { @@ -121,7 +124,7 @@ function displayParsed (string) { ).join('\n') } } - return { parsed: display(reverseAST(parsed)) } + return { parsed: parsedASTtoNestedArray(reverseAST(parsed)) } } module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql, rightCqlParser, reverseString, displayParsed } diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index fa28f9e1..c497ebad 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -105,10 +105,10 @@ function findTopWords (tree) { } /** - For an object where the keys are arrays, apply the given filter and map + For an object where the values are arrays, apply the given filter and map to each of the arrays */ -function nestedMapAndFilter (obj, filter, map) { +function nestedFilterAndMap (obj, filter, map) { return Object.assign({}, ...(Object.entries(obj) .map(([k, v]) => ({ [k]: v.filter(filter).map(map) })) @@ -131,25 +131,25 @@ function nestedMapAndFilter (obj, filter, map) { */ function buildAtomic ({ scope, relation, terms, term }) { - const allFields = nestedMapAndFilter( + const allFields = nestedFilterAndMap( indexMapping[scope], field => typeof field === 'string' || field.on(term), field => (typeof field === 'string' ? field : field.field) ) - const bibFields = nestedMapAndFilter( + const bibFields = nestedFilterAndMap( allFields, (field) => !['items', 'holdings'].some(prefix => field.startsWith(prefix)), field => field ) - const itemFields = nestedMapAndFilter( + const itemFields = nestedFilterAndMap( allFields, (field) => field.startsWith('items'), field => field ) - const holdingsFields = nestedMapAndFilter( + const holdingsFields = nestedFilterAndMap( allFields, (field) => field.startsWith('holdings'), field => field @@ -185,9 +185,24 @@ function buildAtomicNested (name, { fields, relation, terms, term }) { - put all terms in term matches with term fields */ function buildAtomicMain ({ fields, relation, terms, term }) { - return anyAllQueries({ fields, relation, terms }) || - adjEqQueries({ fields, relation, terms, term }) || - dateQueries({ fields, relation, terms, term }) + switch (relation) { + case 'any': + case 'all': + return anyAllQueries({ fields, relation, terms }) + case '=': + case '==': + case 'adj': + return adjEqQueries({ fields, relation, terms, term }) + case '>': + case '<': + case '<=': + case '>=': + case 'within': + case 'encloses': + return dateQueries({ fields, relation, terms, term }) + default: + break + } } function anyAllQueries ({ fields, relation, terms }) { @@ -206,16 +221,29 @@ function adjEqQueries ({ fields, relation, terms, term }) { return matchTermWithFields(fields, term, type) } -const table = { +// depending on the type of cql query supplied by the user, +// we may need to modify the es query from the type indicated by the index +// mapping. +// e.g. in case the user indicates a prefix query, all `term` queries should be +// mapped to `prefix` queries +// X represents field types that should be excluded e.g. for exact matching, +// exclude regular fields and use matching `exact_fields` instead +const esQueryMappingByCqlQueryType = { exact: { term: 'term', prefix: 'prefix', fields: 'X', exact_fields: 'term' }, prefix: { term: 'prefix', prefix: 'prefix', fields: 'X', exact_fields: 'prefix' }, basic: { term: 'term', prefix: 'prefix', fields: 'multi_match', exact_fields: 'X' } } +// used to turn the above table inside out, e.g. +// in case of queryType = `prefix`, +// will gather together, for a given set of fields, all the query tyoes that +// need to be included under `selector` +// so e.g. `term`, 'prefix', and `exact_fields` fields all need to be included +// in the `prefix` matcher, since they are all mapped to `prefix` in this case const selectFields = (queryType, fields) => (selector) => { return Object.entries(fields) .filter(([fieldType, fieldNames]) => { - return table[queryType][fieldType] === selector + return esQueryMappingByCqlQueryType[queryType][fieldType] === selector }) .map(([fieldType, fieldNames]) => fieldNames) .flat() @@ -306,7 +334,7 @@ module.exports = { buildBoolean, buildAtomic, buildAtomicMain, - nestedMapAndFilter, + nestedFilterAndMap, selectFields, indexMapping } diff --git a/lib/resources.js b/lib/resources.js index 3d7ccf7f..ce1ef036 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -911,7 +911,6 @@ const buildElasticQuery = function (params) { const query = cqlQueryBuilder.buildEsQuery(params.q, request) return query } - console.log('request: ', request) const builder = ElasticQueryBuilder.forApiRequest(request) return builder.query.toJson() From d5ec597185d49be4ac5a3dea203054bdd246923c Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Mar 2026 11:53:44 -0400 Subject: [PATCH 75/82] Add check for whether query has fields before adding --- lib/elasticsearch/cql_query_builder.js | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index c497ebad..542ad83b 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -106,7 +106,7 @@ function findTopWords (tree) { /** For an object where the values are arrays, apply the given filter and map - to each of the arrays + to each of the arrays. */ function nestedFilterAndMap (obj, filter, map) { return Object.assign({}, @@ -115,6 +115,14 @@ function nestedFilterAndMap (obj, filter, map) { ) ) } + +/** + Return truthy value if and only if one of the values is a non-empty array + */ +function hasFields (obj) { + return Object.values(obj).some(arr => arr.length) +} + /** build atomic: - identify the scope fields that match the term @@ -155,13 +163,15 @@ function buildAtomic ({ scope, relation, terms, term }) { field => field ) + console.log('holdingsFields: ', holdingsFields) + return { bool: { should: [ buildAtomicMain({ fields: bibFields, relation, terms, term }), - buildAtomicNested('items', { fields: itemFields, relation, terms, term }), - buildAtomicNested('holdings', { fields: holdingsFields, relation, terms, term }) - ] + (hasFields(itemFields) && buildAtomicNested('items', { fields: itemFields, relation, terms, term })), + (hasFields(holdingsFields) && buildAtomicNested('holdings', { fields: holdingsFields, relation, terms, term })) + ].filter(x => x) } } } From 4f26b201010d2a688c157d4f2cf3d6e5ad35e02b Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Mar 2026 15:21:56 -0400 Subject: [PATCH 76/82] Remove console log --- lib/elasticsearch/cql_query_builder.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 542ad83b..87ef7f84 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -163,8 +163,6 @@ function buildAtomic ({ scope, relation, terms, term }) { field => field ) - console.log('holdingsFields: ', holdingsFields) - return { bool: { should: [ From 94d0d610a14b81bc3262bd2b40ca34b3b7ffc94b Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Mar 2026 15:29:24 -0400 Subject: [PATCH 77/82] Fix tests --- test/fixtures/cql_fixtures.js | 253 +--------------------------------- 1 file changed, 6 insertions(+), 247 deletions(-) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index 555e0a45..bc35e24a 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -33,12 +33,6 @@ const simpleAdjQuery = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -82,12 +76,6 @@ const multiAdjQuery = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -162,12 +150,6 @@ const prefixPhraseQuery = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -247,32 +229,6 @@ const simpleAnyQuery = { } ] } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { bool: { should: [] } }, - { bool: { should: [] } } - ] - } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { bool: { should: [] } }, - { bool: { should: [] } } - ] - } - } - } } ] } @@ -416,36 +372,6 @@ const anyWithPrefixQuery = { } ] } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { bool: { should: [] } }, - { bool: { should: [] } }, - { bool: { should: [] } }, - { bool: { should: [] } } - ] - } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { bool: { should: [] } }, - { bool: { should: [] } }, - { bool: { should: [] } }, - { bool: { should: [] } } - ] - } - } - } } ] } @@ -525,32 +451,6 @@ const simpleAllQuery = { } ] } - }, - { - nested: { - path: 'items', - query: { - bool: { - must: [ - { bool: { should: [] } }, - { bool: { should: [] } } - ] - } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - must: [ - { bool: { should: [] } }, - { bool: { should: [] } } - ] - } - } - } } ] } @@ -617,9 +517,6 @@ const keywordQueryForBarcode = { } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -694,9 +591,6 @@ const keywordQueryForShelfMark = { } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -753,12 +647,6 @@ const keywordQueryForGeneralTerm = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -799,9 +687,6 @@ const identifierQuery = { } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -837,15 +722,6 @@ const binaryBooleanQuery = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -860,15 +736,6 @@ const binaryBooleanQuery = { { term: { 'language.label': 'English' } } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -910,18 +777,6 @@ const ternaryBooleanQuery = { } ] } - }, - { - nested: { - path: 'items', - query: { bool: { should: [] } } - } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -936,18 +791,6 @@ const ternaryBooleanQuery = { { term: { 'language.label': 'English' } } ] } - }, - { - nested: { - path: 'items', - query: { bool: { should: [] } } - } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -970,15 +813,6 @@ const ternaryBooleanQuery = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -1017,15 +851,6 @@ const queryWithParentheses = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -1043,18 +868,6 @@ const queryWithParentheses = { { term: { 'language.label': 'English' } } ] } - }, - { - nested: { - path: 'items', - query: { bool: { should: [] } } - } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -1074,18 +887,6 @@ const queryWithParentheses = { } ] } - }, - { - nested: { - path: 'items', - query: { bool: { should: [] } } - } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -1127,15 +928,6 @@ const negationQuery = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -1152,15 +944,6 @@ const negationQuery = { { term: { 'language.label': 'English' } } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -1185,9 +968,7 @@ const dateAfterQuery = { query: { range: { 'dates.range': { gt: '1990' } } } } } - ], - { nested: { path: 'items', query: null } }, - { nested: { path: 'holdings', query: null } } + ] ] } } @@ -1208,9 +989,7 @@ const dateBeforeQuery = { query: { range: { 'dates.range': { lt: '1990' } } } } } - ], - { nested: { path: 'items', query: null } }, - { nested: { path: 'holdings', query: null } } + ] ] } } @@ -1231,9 +1010,7 @@ const dateBeforeOrOnQuery = { query: { range: { 'dates.range': { lte: '1990' } } } } } - ], - { nested: { path: 'items', query: null } }, - { nested: { path: 'holdings', query: null } } + ] ] } } @@ -1254,9 +1031,7 @@ const dateAfterOrOnQuery = { query: { range: { 'dates.range': { gte: '1990' } } } } } - ], - { nested: { path: 'items', query: null } }, - { nested: { path: 'holdings', query: null } } + ] ] } } @@ -1279,9 +1054,7 @@ const dateWithinQuery = { } } } - ], - { nested: { path: 'items', query: null } }, - { nested: { path: 'holdings', query: null } } + ] ] } } @@ -1304,9 +1077,7 @@ const dateEnclosesQuery = { } } } - ], - { nested: { path: 'items', query: null } }, - { nested: { path: 'holdings', query: null } } + ] ] } } @@ -1338,12 +1109,6 @@ const filterQuery = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -1393,12 +1158,6 @@ const exactMatchQuery = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } From e0e8f6ba4eb3befa53259753fd0b0ca8252b4dee Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Mar 2026 15:49:37 -0400 Subject: [PATCH 78/82] Add reversing strings in nested array --- lib/elasticsearch/cql_grammar.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 87b8c042..84815c35 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -94,7 +94,7 @@ function parseWithRightCql (string) { function parsedASTtoNestedArray (ast) { if (!ast.type.includes('query')) { - return ast.text + return reverseString(ast.text) } const childTypes = [ From d65a3d377cd9d3d4844842c567d2814409ce1936 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Mar 2026 16:56:39 -0400 Subject: [PATCH 79/82] Remove double reversing in display --- lib/elasticsearch/cql_grammar.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 84815c35..81f9aafc 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -124,7 +124,7 @@ function displayParsed (string) { ).join('\n') } } - return { parsed: parsedASTtoNestedArray(reverseAST(parsed)) } + return { parsed: parsedASTtoNestedArray(parsed) } } module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql, rightCqlParser, reverseString, displayParsed } From 330e170359e1af561c2309924f20a3ef9674cb50 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 20 Mar 2026 12:09:47 -0400 Subject: [PATCH 80/82] Fix double nesting of should array for dates --- lib/elasticsearch/cql_query_builder.js | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 87ef7f84..0f10f9bf 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -302,18 +302,16 @@ function dateQueries ({ fields, relation, terms, term }) { break } - return [ - { - nested: { - path: 'dates', - query: { - range: { - 'dates.range': range - } + return { + nested: { + path: 'dates', + query: { + range: { + 'dates.range': range } } } - ] + } } function termQuery (field, term) { From b0f75851200764b5bb3e924b3e67bb98ba345c1d Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 20 Mar 2026 12:12:15 -0400 Subject: [PATCH 81/82] Fix date test fixtures --- test/fixtures/cql_fixtures.js | 76 +++++++++++++++-------------------- 1 file changed, 32 insertions(+), 44 deletions(-) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index bc35e24a..552333c0 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -961,14 +961,12 @@ const dateAfterQuery = { { bool: { should: [ - [ - { - nested: { - path: 'dates', - query: { range: { 'dates.range': { gt: '1990' } } } - } + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { gt: '1990' } } } } - ] + } ] } } @@ -982,14 +980,12 @@ const dateBeforeQuery = { { bool: { should: [ - [ - { - nested: { - path: 'dates', - query: { range: { 'dates.range': { lt: '1990' } } } - } + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { lt: '1990' } } } } - ] + } ] } } @@ -1003,14 +999,12 @@ const dateBeforeOrOnQuery = { { bool: { should: [ - [ - { - nested: { - path: 'dates', - query: { range: { 'dates.range': { lte: '1990' } } } - } + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { lte: '1990' } } } } - ] + } ] } } @@ -1024,14 +1018,12 @@ const dateAfterOrOnQuery = { { bool: { should: [ - [ - { - nested: { - path: 'dates', - query: { range: { 'dates.range': { gte: '1990' } } } - } + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { gte: '1990' } } } } - ] + } ] } } @@ -1045,16 +1037,14 @@ const dateWithinQuery = { { bool: { should: [ - [ - { - nested: { - path: 'dates', - query: { - range: { 'dates.range': { gte: '1990', lte: '2000' } } - } + { + nested: { + path: 'dates', + query: { + range: { 'dates.range': { gte: '1990', lte: '2000' } } } } - ] + } ] } } @@ -1068,16 +1058,14 @@ const dateEnclosesQuery = { { bool: { should: [ - [ - { - nested: { - path: 'dates', - query: { - range: { 'dates.range': { gt: '1990', lt: '2000' } } - } + { + nested: { + path: 'dates', + query: { + range: { 'dates.range': { gt: '1990', lt: '2000' } } } } - ] + } ] } } From c59f33e3aa3dad908262e9f4ccdf6cff297cc8c3 Mon Sep 17 00:00:00 2001 From: Ian O'Connor Date: Fri, 27 Mar 2026 14:33:27 -0400 Subject: [PATCH 82/82] fix merge --- lib/resources.js | 39 --------------------------------------- 1 file changed, 39 deletions(-) diff --git a/lib/resources.js b/lib/resources.js index 43c1e4d5..36b53f43 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -196,45 +196,6 @@ module.exports = function (app, _private = null) { } resp = await ResourceResultsSerializer.serialize(resp, opts) - /** - * Given a params hash, returns an array of ES queries for fetching relevant aggregations. - */ - const aggregationQueriesForParams = (params) => { - // Build the complete set of distinct aggregation queries we need to run - // depending on active filters. We want: - // - one agg representing the counts for all properties _not_ used in filter - // - one agg each for each property that is used in a filter, but counts should exclude that filter - - // If the search query consists of only a single filter (or a couple - // filters of same type), just do a normal self-filtering aggregation - // for all properties: - if (!params.q && Object.keys(params.filters).length === 1) { - return [buildElasticAggregationsBody(params, Object.keys(AGGREGATIONS_SPEC))] - } - - // Build the standard aggregation: - const unfilteredAggregationProps = Object.keys(AGGREGATIONS_SPEC) - // Aggregate on all properties that aren't involved in filters: - .filter((prop) => !Object.keys(params.filters || {}).includes(prop)) - const queries = [buildElasticAggregationsBody(params, unfilteredAggregationProps)] - - // Now append all property-specific aggregation queries (one for each - // distinct property used in a filter): - return queries.concat( - Object.entries(params.filters || {}) - // Only consider filters that are also aggregations: - .filter(([prop, values]) => Object.keys(AGGREGATIONS_SPEC).includes(prop)) - .map(([prop, values]) => { - const aggFilters = structuredClone(params.filters) - // For this aggregation, don't filter on namesake property: - delete aggFilters[prop] - - // Build query for single aggregation: - const modifiedParams = Object.assign({}, params, { filters: aggFilters }) - return buildElasticAggregationsBody(modifiedParams, [prop]) - }) - ) - } const relevanceReport = resp.itemListElement .map(makeRelevanceReport(params))