From 579eec58cbb9cc84e3316cd5cacd3a15c585596f Mon Sep 17 00:00:00 2001 From: danamansana Date: Mon, 1 Dec 2025 11:15:02 -0500 Subject: [PATCH 01/54] Fix query structure --- lib/elasticsearch/config.js | 3 +- lib/elasticsearch/cql_grammar.js | 15 +++++ lib/elasticsearch/cql_query_builder.js | 90 ++++++++++++++++++++++++++ lib/resources.js | 15 ++++- package-lock.json | 10 +++ package.json | 1 + 6 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 lib/elasticsearch/cql_grammar.js create mode 100644 lib/elasticsearch/cql_query_builder.js diff --git a/lib/elasticsearch/config.js b/lib/elasticsearch/config.js index b63c3545..494dbb15 100644 --- a/lib/elasticsearch/config.js +++ b/lib/elasticsearch/config.js @@ -73,7 +73,8 @@ const SEARCH_SCOPES = { }, standard_number: { // We do custom field matching for this search-scope - } + }, + cql: {} } const FILTER_CONFIG = { diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js new file mode 100644 index 00000000..24cfb489 --- /dev/null +++ b/lib/elasticsearch/cql_grammar.js @@ -0,0 +1,15 @@ +const { Grammars } = require('ebnf') + +const cql = ` + query ::= sub_query " " connective " " query | sub_query + connective ::= "and" | "or" + sub_query ::= atomic_query | "(" query ")" + atomic_query ::= scope " " relation " " key | key + scope ::= "title" | "contributor" | "keyword" | "callNumber" | "identifier" | "subject" | "language" | "date" | "center" | "format" + relation ::= "any" | "adj" | "=" | "==" | "within" | "encloses" + key ::= [a-z]* | '"' key '"' +` + +let cqlParser = new Grammars.W3C.Parser(cql) + +module.exports = { cqlParser } diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js new file mode 100644 index 00000000..8ea7498e --- /dev/null +++ b/lib/elasticsearch/cql_query_builder.js @@ -0,0 +1,90 @@ +const { cqlParser } = require('./cql_grammar') +const ElasticQueryBuilder = require('./elastic-query-builder') +const ApiRequest = require('../api-request') + +function buildEsQuery (cqlQuery) { + const tree = cqlParser.getAST(cqlQuery) + console.log('tree: ', tree) + return buildEsQueryFromTree(tree) +} + +/** + this is mostly there but needs to handle exact strings + */ + +function buildEsQueryFromTree (tree) { + switch (tree.type) { + case 'query': + if (tree.children.length > 1) { + return buildBoolean( + buildEsQueryFromTree(tree.children[0]), + tree.children[1].text, + buildEsQueryFromTree(tree.children[2]) + ) + } else { + return buildEsQueryFromTree(tree.children[0]) + } + break + case 'sub_query': + return buildEsQueryFromTree(tree.children.length > 1 ? tree.children[1] : tree.children[0]) + break + case 'atomic_query': + let scope + let relation + let term + if (tree.children.length > 1) { + scope = tree.children[0].text + relation = tree.children[1].text + term = tree.children[2].text + } else { + scope = "all" + relation = "any" + term = tree.children[0].text + } + return buildAtomic(scope, relation, term) + break + default: + break + } +} + +function buildBoolean (queryOne, operator, queryTwo) { + console.log('building boolean ', queryOne, operator, queryTwo) + const esOperator = operator === 'and' ? 'must' : 'should' + return { + "bool": { + [esOperator]: [ + queryOne, + queryTwo + ] + } + } +} + +function buildAtomic (scope, relation, term) { + console.log('building atomic: ', scope, relation, term) + const request = ApiRequest.fromParams({ + q: term, + search_scope: scope + }) + const builder = ElasticQueryBuilder.forApiRequest(request) + // return { + // query: builder.query.toJson() + // } + return builder.query.toJson() + // return { + // "query": { + // "multi_match" : { + // "query": term, + // "fields": [ "subject", "message" ] + // } + // } + // } +} + +module.exports = { + buildEsQuery, + buildEsQueryFromTree, + buildBoolean, + buildAtomic +} diff --git a/lib/resources.js b/lib/resources.js index 57532f96..134abdad 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -19,6 +19,7 @@ const { parseParams, deepValue } = require('../lib/util') const ApiRequest = require('./api-request') const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') +const cqlQueryBuilder = require('./elasticsearch/cql_query_builder') const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC } = require('./elasticsearch/config') const errors = require('./errors') @@ -620,12 +621,19 @@ module.exports = function (app, _private = null) { let body = buildElasticBody(params) + console.log('body: ', body) + // Strip unnecessary _source fields body._source = { excludes: EXCLUDE_FIELDS.concat(['items']) } - body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) + if (params.search_scope !== 'cql') { + body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) + } + + + console.log('body after inner: ', body) app.logger.debug('Resources#search', RESOURCES_INDEX, body) @@ -860,6 +868,11 @@ const buildElasticBody = function (params) { * @return {object} ES query object suitable to be POST'd to ES endpoint */ const buildElasticQuery = function (params) { + if (params.search_scope === 'cql') { + query = cqlQueryBuilder.buildEsQuery(params.q) + console.log('built cql query for query: ', params.q, 'query: ', query) + return query + } const request = ApiRequest.fromParams(params) const builder = ElasticQueryBuilder.forApiRequest(request) diff --git a/package-lock.json b/package-lock.json index 792258c5..193bd7d4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15,6 +15,7 @@ "@nypl/nypl-data-api-client": "^2.0.0", "@nypl/scsb-rest-client": "3.0.0", "dotenv": "^16.4.5", + "ebnf": "^1.9.1", "express": "^4.18.3", "research-catalog-indexer": "git+https://github.com/NYPL/research-catalog-indexer.git#a292adff0d3ed594a0c2996561d9e17b0f9dc04a", "winston": "3.12.0" @@ -9666,6 +9667,15 @@ "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", "optional": true }, + "node_modules/ebnf": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ebnf/-/ebnf-1.9.1.tgz", + "integrity": "sha512-uW2UKSsuty9ANJ3YByIQE4ANkD8nqUPO7r6Fwcc1ADKPe9FRdcPpMl3VEput4JSvKBJ4J86npIC2MLP0pYkCuw==", + "license": "MIT", + "bin": { + "ebnf": "dist/bin.js" + } + }, "node_modules/ecc-jsbn": { "version": "0.1.2", "dev": true, diff --git a/package.json b/package.json index 435131f3..7ffe5b80 100644 --- a/package.json +++ b/package.json @@ -8,6 +8,7 @@ "@nypl/nypl-data-api-client": "^2.0.0", "@nypl/scsb-rest-client": "3.0.0", "dotenv": "^16.4.5", + "ebnf": "^1.9.1", "express": "^4.18.3", "research-catalog-indexer": "git+https://github.com/NYPL/research-catalog-indexer.git#a292adff0d3ed594a0c2996561d9e17b0f9dc04a", "winston": "3.12.0" From ae28b7299c952f55a33e958688f43c335ea00139 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 4 Dec 2025 16:00:39 -0500 Subject: [PATCH 02/54] Add more permissive key structure --- lib/elasticsearch/cql_grammar.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 24cfb489..0de4df11 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -7,7 +7,9 @@ const cql = ` atomic_query ::= scope " " relation " " key | key scope ::= "title" | "contributor" | "keyword" | "callNumber" | "identifier" | "subject" | "language" | "date" | "center" | "format" relation ::= "any" | "adj" | "=" | "==" | "within" | "encloses" - key ::= [a-z]* | '"' key '"' + key ::= NON_WS_KEY | '"' KEYPHRASE '"' + KEYPHRASE ::= [^"]+ + NON_WS_KEY ::= [^#x20#x09#x0A#x0D"]+ ` let cqlParser = new Grammars.W3C.Parser(cql) From 8b6000fd8a551be46f4ea074de60f40452021aaa Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 4 Dec 2025 16:03:10 -0500 Subject: [PATCH 03/54] Remove console logs and commented code --- lib/elasticsearch/cql_query_builder.js | 14 -------------- lib/resources.js | 6 ------ 2 files changed, 20 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 8ea7498e..69f8fe01 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -4,7 +4,6 @@ const ApiRequest = require('../api-request') function buildEsQuery (cqlQuery) { const tree = cqlParser.getAST(cqlQuery) - console.log('tree: ', tree) return buildEsQueryFromTree(tree) } @@ -49,7 +48,6 @@ function buildEsQueryFromTree (tree) { } function buildBoolean (queryOne, operator, queryTwo) { - console.log('building boolean ', queryOne, operator, queryTwo) const esOperator = operator === 'and' ? 'must' : 'should' return { "bool": { @@ -62,24 +60,12 @@ function buildBoolean (queryOne, operator, queryTwo) { } function buildAtomic (scope, relation, term) { - console.log('building atomic: ', scope, relation, term) const request = ApiRequest.fromParams({ q: term, search_scope: scope }) const builder = ElasticQueryBuilder.forApiRequest(request) - // return { - // query: builder.query.toJson() - // } return builder.query.toJson() - // return { - // "query": { - // "multi_match" : { - // "query": term, - // "fields": [ "subject", "message" ] - // } - // } - // } } module.exports = { diff --git a/lib/resources.js b/lib/resources.js index 134abdad..9c5e13cc 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -621,8 +621,6 @@ module.exports = function (app, _private = null) { let body = buildElasticBody(params) - console.log('body: ', body) - // Strip unnecessary _source fields body._source = { excludes: EXCLUDE_FIELDS.concat(['items']) @@ -632,9 +630,6 @@ module.exports = function (app, _private = null) { body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) } - - console.log('body after inner: ', body) - app.logger.debug('Resources#search', RESOURCES_INDEX, body) return app.esClient.search(body) @@ -870,7 +865,6 @@ const buildElasticBody = function (params) { const buildElasticQuery = function (params) { if (params.search_scope === 'cql') { query = cqlQueryBuilder.buildEsQuery(params.q) - console.log('built cql query for query: ', params.q, 'query: ', query) return query } const request = ApiRequest.fromParams(params) From e5b61dc79e0a3fef407ea5f3c411297070264465 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 4 Dec 2025 16:40:12 -0500 Subject: [PATCH 04/54] Fix linter errors --- lib/elasticsearch/cql_grammar.js | 4 ++-- lib/elasticsearch/cql_query_builder.js | 12 +++++------- lib/resources.js | 2 +- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 0de4df11..be8cceca 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -1,4 +1,4 @@ -const { Grammars } = require('ebnf') +const { Grammars } = require('ebnf') const cql = ` query ::= sub_query " " connective " " query | sub_query @@ -12,6 +12,6 @@ const cql = ` NON_WS_KEY ::= [^#x20#x09#x0A#x0D"]+ ` -let cqlParser = new Grammars.W3C.Parser(cql) +const cqlParser = new Grammars.W3C.Parser(cql) module.exports = { cqlParser } diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 69f8fe01..a26c7e38 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -23,11 +23,9 @@ function buildEsQueryFromTree (tree) { } else { return buildEsQueryFromTree(tree.children[0]) } - break case 'sub_query': return buildEsQueryFromTree(tree.children.length > 1 ? tree.children[1] : tree.children[0]) - break - case 'atomic_query': + case 'atomic_query': { let scope let relation let term @@ -36,12 +34,12 @@ function buildEsQueryFromTree (tree) { relation = tree.children[1].text term = tree.children[2].text } else { - scope = "all" - relation = "any" + scope = 'all' + relation = 'any' term = tree.children[0].text } return buildAtomic(scope, relation, term) - break + } default: break } @@ -50,7 +48,7 @@ function buildEsQueryFromTree (tree) { function buildBoolean (queryOne, operator, queryTwo) { const esOperator = operator === 'and' ? 'must' : 'should' return { - "bool": { + bool: { [esOperator]: [ queryOne, queryTwo diff --git a/lib/resources.js b/lib/resources.js index 9c5e13cc..aecf9658 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -864,7 +864,7 @@ const buildElasticBody = function (params) { */ const buildElasticQuery = function (params) { if (params.search_scope === 'cql') { - query = cqlQueryBuilder.buildEsQuery(params.q) + const query = cqlQueryBuilder.buildEsQuery(params.q) return query } const request = ApiRequest.fromParams(params) From 6ca7e33ecf84dc04a44b5a2d5c4237523a77cbe9 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 18 Dec 2025 12:17:15 -0500 Subject: [PATCH 05/54] Exclude parentheses in query term --- lib/elasticsearch/cql_grammar.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index be8cceca..c2b32d06 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -9,7 +9,7 @@ const cql = ` relation ::= "any" | "adj" | "=" | "==" | "within" | "encloses" key ::= NON_WS_KEY | '"' KEYPHRASE '"' KEYPHRASE ::= [^"]+ - NON_WS_KEY ::= [^#x20#x09#x0A#x0D"]+ + NON_WS_KEY ::= [^#x20#x09#x0A#x0D"()]+ ` const cqlParser = new Grammars.W3C.Parser(cql) From 39c09204a786813101e75350fc338dc38e510b63 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 18 Dec 2025 12:35:28 -0500 Subject: [PATCH 06/54] Make keyphrase/non_ws_key lowercase --- lib/elasticsearch/cql_grammar.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index c2b32d06..17844b43 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -7,9 +7,9 @@ const cql = ` atomic_query ::= scope " " relation " " key | key scope ::= "title" | "contributor" | "keyword" | "callNumber" | "identifier" | "subject" | "language" | "date" | "center" | "format" relation ::= "any" | "adj" | "=" | "==" | "within" | "encloses" - key ::= NON_WS_KEY | '"' KEYPHRASE '"' - KEYPHRASE ::= [^"]+ - NON_WS_KEY ::= [^#x20#x09#x0A#x0D"()]+ + key ::= non_ws_key | '"' keyphrase '"' + keyphrase ::= [^"]+ + non_ws_key ::= [^#x20#x09#x0A#x0D"()]+ ` const cqlParser = new Grammars.W3C.Parser(cql) From 6714e246c4b2f5a9e8bac3dcdb242d739187c689 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 18 Dec 2025 13:08:04 -0500 Subject: [PATCH 07/54] Change callNumber to callnumber to enable callnumber searches --- lib/elasticsearch/cql_grammar.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 17844b43..3cf0b63c 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -5,7 +5,7 @@ const cql = ` connective ::= "and" | "or" sub_query ::= atomic_query | "(" query ")" atomic_query ::= scope " " relation " " key | key - scope ::= "title" | "contributor" | "keyword" | "callNumber" | "identifier" | "subject" | "language" | "date" | "center" | "format" + scope ::= "title" | "contributor" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "center" | "format" relation ::= "any" | "adj" | "=" | "==" | "within" | "encloses" key ::= non_ws_key | '"' keyphrase '"' keyphrase ::= [^"]+ From cd4c24f249e6e78e9576b7f73b64b5eb37b430a4 Mon Sep 17 00:00:00 2001 From: danamansana Date: Wed, 7 Jan 2026 13:10:23 -0500 Subject: [PATCH 08/54] Add finding text by key for atomic queries --- lib/elasticsearch/cql_query_builder.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index a26c7e38..f1f2d8c4 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -32,12 +32,12 @@ function buildEsQueryFromTree (tree) { if (tree.children.length > 1) { scope = tree.children[0].text relation = tree.children[1].text - term = tree.children[2].text } else { scope = 'all' relation = 'any' - term = tree.children[0].text } + term = tree.children.find(child => child.type === 'key').children[0].text + return buildAtomic(scope, relation, term) } default: From 8b3e4e17730eb87c123171e84308e819c2c6313e Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 30 Jan 2026 13:34:07 -0500 Subject: [PATCH 09/54] Add initial bnf --- lib/elasticsearch/cql/index-mapping.js | 79 ++++++++++++++++++++++++++ lib/elasticsearch/cql_grammar.js | 40 +++++++++---- 2 files changed, 109 insertions(+), 10 deletions(-) create mode 100644 lib/elasticsearch/cql/index-mapping.js diff --git a/lib/elasticsearch/cql/index-mapping.js b/lib/elasticsearch/cql/index-mapping.js new file mode 100644 index 00000000..40e96af1 --- /dev/null +++ b/lib/elasticsearch/cql/index-mapping.js @@ -0,0 +1,79 @@ +const INDEX_MAPPING = { + keyword: { + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded', + { field: 'items.idBarcode', on: (q) => /\d{6,}/.test(q) }, + // Try to detect shelfmark searches (e.g. JFD 16-5143) + { field: 'items.shelfMark', on: (q) => /^[A-Z]{1,3} \d{2,}/.test(q) } + ] + }, + title: { + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ] + }, + author: { + fields: ['creatorLiteral', 'creatorLiteral.folded', 'contributorLiteral.folded', 'parallelCreatorLiteral.folded', 'parallelContributorLiteral.folded'] + }, + callnumber: {}, + identifier: {}, + subject: { + fields: ['subjectLiteral', 'subjectLiteral.folded', 'parallelSubjectLiteral.folded'] + }, + language: { field: ['language.id', 'language.label'] }, + date: {}, + series: { + fields: ['series', 'parallelSeries'] + }, + genre: { field: ['genreForm.raw'] }, + center: { field: ['buildingLocationIds'] }, + division: { field: ['collectionIds'] }, + format: { field: ['formatId'] } +} + +module.exports = { + INDEX_MAPPING +} diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 3cf0b63c..3f0c6fa2 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -1,17 +1,37 @@ const { Grammars } = require('ebnf') -const cql = ` +let cql = ` query ::= sub_query " " connective " " query | sub_query - connective ::= "and" | "or" - sub_query ::= atomic_query | "(" query ")" - atomic_query ::= scope " " relation " " key | key - scope ::= "title" | "contributor" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "center" | "format" - relation ::= "any" | "adj" | "=" | "==" | "within" | "encloses" - key ::= non_ws_key | '"' keyphrase '"' - keyphrase ::= [^"]+ - non_ws_key ::= [^#x20#x09#x0A#x0D"()]+ + connective ::= "AND" | "OR" + sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" + atomic_query ::= scope " " relation " " quoted_term + scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" + relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" + quoted_term ::= '"' term '"' + term ::= escaped_char term | regular_char term | escaped_char | regular_char + regular_char ::= [^"\\\\] + escaped_char ::= slash char + slash ::= "\\\\" + char ::= [a-z]|[^a-z] + +` + +let alt_cql = ` + query ::= sub_query " " connective " " query | sub_query + connective ::= "AND" | "OR" + sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" + atomic_query ::= scope " " relation " " quoted_term + scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" + relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" + quoted_term ::= '"' TERM '"' + TERM ::= ESCAPED_CHAR TERM | REGULAR_CHAR TERM | ESCAPED_CHAR | REGULAR_CHAR + REGULAR_CHAR ::= [^"\\\\] + ESCAPED_CHAR ::= SLASH CHAR + SLASH ::= "\\\\" + CHAR ::= [a-z]|[^a-z] ` const cqlParser = new Grammars.W3C.Parser(cql) +const alt_cqlParser = new Grammars.W3C.Parser(alt_cql) -module.exports = { cqlParser } +module.exports = { cqlParser, alt_cqlParser } From 87c1e32a4ba3dbb564fec79c8524e256d736d1e6 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 30 Jan 2026 13:34:26 -0500 Subject: [PATCH 10/54] Update packages --- package-lock.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package-lock.json b/package-lock.json index 5e52382b..d253d9a4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3713,6 +3713,7 @@ "version": "0.11.0", "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", + "dev": true, "optional": true, "engines": { "node": ">=14" From a55444dfd220bada1a02e7826514fc9b8316a1fa Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 30 Jan 2026 16:24:57 -0500 Subject: [PATCH 11/54] Add alternate grammars and comment for atomic --- lib/elasticsearch/cql/index-mapping.js | 9 +++++-- lib/elasticsearch/cql_grammar.js | 36 ++++++++++++++++++++++++++ lib/elasticsearch/cql_query_builder.js | 15 +++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/lib/elasticsearch/cql/index-mapping.js b/lib/elasticsearch/cql/index-mapping.js index 40e96af1..39eb0a22 100644 --- a/lib/elasticsearch/cql/index-mapping.js +++ b/lib/elasticsearch/cql/index-mapping.js @@ -58,8 +58,13 @@ const INDEX_MAPPING = { author: { fields: ['creatorLiteral', 'creatorLiteral.folded', 'contributorLiteral.folded', 'parallelCreatorLiteral.folded', 'parallelContributorLiteral.folded'] }, - callnumber: {}, - identifier: {}, + callnumber: { + fields: ['shelfMark.keywordLowercased', 'items.shelfMark.keywordLowercased'] + }, + identifier: { + prefix: ['identifierV2.value', 'items.shelfMark.keywordLowercased' ], + term: ['uri', 'items.idBarcode', 'idIsbn.clean', 'idIssn.clean'] + }, subject: { fields: ['subjectLiteral', 'subjectLiteral.folded', 'parallelSubjectLiteral.folded'] }, diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 3f0c6fa2..986399ca 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -31,6 +31,42 @@ let alt_cql = ` CHAR ::= [a-z]|[^a-z] ` +let word_cql = ` + query ::= sub_query " " connective " " query | sub_query + connective ::= "AND" | "OR" + sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" + atomic_query ::= scope " " relation " " quoted_term + scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" + relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" + quoted_term ::= '"' phrase '"' + phrase ::= word whitespace phrase | word + whitespace ::= [#x20#x09#x0A#x0D]+ + word ::= escaped_char word | regular_char word | escaped_char | regular_char + regular_char ::= [^"\\\\#x20#x09#x0A#x0D] + escaped_char ::= slash char + slash ::= "\\\\" + char ::= [a-z]|[^a-z] + +` + +let convenient_cql = ` + query ::= sub_query " " connective " " query | sub_query + connective ::= "AND" | "OR" + sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" + atomic_query ::= scope " " relation " " quoted_term | quoted_term | word + scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" + relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" + quoted_term ::= '"' phrase '"' + phrase ::= word whitespace phrase | word + whitespace ::= [#x20#x09#x0A#x0D]+ + word ::= escaped_char word | regular_char word | escaped_char | regular_char + regular_char ::= [^"\\\\#x20#x09#x0A#x0D] + escaped_char ::= slash char + slash ::= "\\\\" + char ::= [a-z]|[^a-z] + +` + const cqlParser = new Grammars.W3C.Parser(cql) const alt_cqlParser = new Grammars.W3C.Parser(alt_cql) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index f1f2d8c4..f11440ad 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -66,6 +66,21 @@ function buildAtomic (scope, relation, term) { return builder.query.toJson() } +/** + build atomic: + - identify the scope fields that match the term + - separate out into main, items, holdings + - boolean(main, items, holdings) + - items/holds = nested(items/holdings, main) + - main: + - if operator is any/all, take all query terms not starting with ^ and put in multi-match with all regular fields + - if operator is any/all, take all query terms starting with ^ and put in prefix with all regular fields + - if operator is =/adj, and query term doesn't start with ^, take all query terms and put in phrase match with all regular fields + - if operator is =/adj and query term starts with ^, strip it out and use phrase_prefix + - put all terms in prefix match with prefix fields + - put all terms in term matches with term fields + */ + module.exports = { buildEsQuery, buildEsQueryFromTree, From 05dfb3f11850e44297c26868f4332e7a4eb368b4 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 5 Feb 2026 15:52:11 -0500 Subject: [PATCH 12/54] Add reverseGrammar and related methods --- lib/elasticsearch/cql/index-mapping.js | 6 +- lib/elasticsearch/cql_grammar.js | 366 +++++++++++++++++++++---- lib/elasticsearch/cql_query_builder.js | 240 +++++++++++++--- test/cql_grammar.test.js | 112 ++++++++ 4 files changed, 626 insertions(+), 98 deletions(-) create mode 100644 test/cql_grammar.test.js diff --git a/lib/elasticsearch/cql/index-mapping.js b/lib/elasticsearch/cql/index-mapping.js index 39eb0a22..f0f19a9a 100644 --- a/lib/elasticsearch/cql/index-mapping.js +++ b/lib/elasticsearch/cql/index-mapping.js @@ -1,4 +1,4 @@ -const INDEX_MAPPING = { +const indexMapping = { keyword: { fields: [ 'title', @@ -62,7 +62,7 @@ const INDEX_MAPPING = { fields: ['shelfMark.keywordLowercased', 'items.shelfMark.keywordLowercased'] }, identifier: { - prefix: ['identifierV2.value', 'items.shelfMark.keywordLowercased' ], + prefix: ['identifierV2.value', 'items.shelfMark.keywordLowercased'], term: ['uri', 'items.idBarcode', 'idIsbn.clean', 'idIssn.clean'] }, subject: { @@ -80,5 +80,5 @@ const INDEX_MAPPING = { } module.exports = { - INDEX_MAPPING + indexMapping } diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 986399ca..aa3450d3 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -1,73 +1,323 @@ const { Grammars } = require('ebnf') -let cql = ` - query ::= sub_query " " connective " " query | sub_query - connective ::= "AND" | "OR" - sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" - atomic_query ::= scope " " relation " " quoted_term - scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" - relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" - quoted_term ::= '"' term '"' - term ::= escaped_char term | regular_char term | escaped_char | regular_char - regular_char ::= [^"\\\\] - escaped_char ::= slash char - slash ::= "\\\\" - char ::= [a-z]|[^a-z] +// let cql = ` +// query ::= sub_query " " connective " " query | sub_query +// connective ::= "AND" | "OR" +// sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" +// atomic_query ::= scope " " relation " " quoted_term +// scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" +// relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" +// quoted_term ::= '"' term '"' +// term ::= escaped_char term | regular_char term | escaped_char | regular_char +// regular_char ::= [^"\\\\] +// escaped_char ::= slash char +// slash ::= "\\\\" +// char ::= [a-z]|[^a-z] +// +// ` +// +// let alt_cql = ` +// query ::= sub_query " " connective " " query | sub_query +// connective ::= "AND" | "OR" +// sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" +// atomic_query ::= scope " " relation " " quoted_term +// scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" +// relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" +// quoted_term ::= '"' TERM '"' +// TERM ::= ESCAPED_CHAR TERM | REGULAR_CHAR TERM | ESCAPED_CHAR | REGULAR_CHAR +// REGULAR_CHAR ::= [^"\\\\] +// ESCAPED_CHAR ::= SLASH CHAR +// SLASH ::= "\\\\" +// CHAR ::= [a-z]|[^a-z] +// ` + +// let word_cql = ` +// query ::= sub_query whitespace connective whitespace query | sub_query +// connective ::= "AND" | "OR" | "NOT" +// sub_query ::= atomic_query | "(" query ")" +// atomic_query ::= scope optional_whitespace relation optional_whitespace quoted_term +// scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" +// relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" +// quoted_term ::= '"' phrase '"' +// phrase ::= word whitespace phrase | word +// optional_whitespace ::= whitespace | "" +// whitespace ::= [#x20#x09#x0A#x0D]+ +// word ::= escaped_char word | regular_char word | escaped_char | regular_char +// regular_char ::= [^"\\\\#x20#x09#x0A#x0D] +// escaped_char ::= slash char +// slash ::= "\\\\" +// char ::= [a-z]|[^a-z] + +// NEED to add some allowed whitespace before and after atomic queries +const ridic = ` + query ::= sub_query whitespace connective whitespace query | sub_query + connective ::= "TON DNA" | "DNA" | "RO" | "NOT" + sub_query ::= atomic_query | ")" query "(" + atomic_query ::= [a-z]+ + whitespace ::= [#x20#x09#x0A#x0D]+ ` -let alt_cql = ` - query ::= sub_query " " connective " " query | sub_query +// const cql = ` +// query ::= sub_query whitespace connective whitespace query | sub_query +// connective ::= "AND NOT" | "AND" | "OR" | "NOT" +// sub_query ::= atomic_query | "(" query ")" +// atomic_query ::= scope relation quoted_term +// scope ::= scope_term whitespace | scope_term +// relation ::= relation_term whitespace | relation_term +// scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" +// relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" +// quoted_term ::= '"' phrase '"' +// phrase ::= word whitespace phrase | word +// whitespace ::= [#x20#x09#x0A#x0D]+ +// word ::= escaped_char word | regular_char word | escaped_char | regular_char +// regular_char ::= [^"\\\\#x20#x09#x0A#x0D] +// escaped_char ::= slash char +// slash ::= "\\\\" +// char ::= [a-z]|[^a-z] +// +// ` + +// function reverseGrammar (grammar) { +// return grammar.split("\n").map(line => +// (line.split("::=").map(side => +// (side.split("|").map(disjunct => +// (disjunct.split(" ").map(word => +// (word => word.includes("\"") ? reverseString(word) : word) +// ).reverse().join(" ")) +// )).join("|") +// )).join("::=") +// ).join("\n") +// } + +function reverseGrammar (grammar) { + return grammar.split("\n") + .map(line => + (line.split("::=") + .map(side => + (side.split("|") + .map(dis => + (dis.split(" ") + .map(word => + (word.includes("\"") ? word.split("").reverse().join("") : word)) + .reverse().join(" ")) + ).join("|"))).join("::= "))).join("\n") +} + + + +const leftTest = ` + query ::= query connective sub_query | sub_query connective ::= "AND" | "OR" - sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" - atomic_query ::= scope " " relation " " quoted_term - scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" - relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" - quoted_term ::= '"' TERM '"' - TERM ::= ESCAPED_CHAR TERM | REGULAR_CHAR TERM | ESCAPED_CHAR | REGULAR_CHAR - REGULAR_CHAR ::= [^"\\\\] - ESCAPED_CHAR ::= SLASH CHAR - SLASH ::= "\\\\" - CHAR ::= [a-z]|[^a-z] + sub_query ::= [a-z]+ ` -let word_cql = ` - query ::= sub_query " " connective " " query | sub_query - connective ::= "AND" | "OR" - sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" - atomic_query ::= scope " " relation " " quoted_term - scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" - relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" - quoted_term ::= '"' phrase '"' - phrase ::= word whitespace phrase | word - whitespace ::= [#x20#x09#x0A#x0D]+ - word ::= escaped_char word | regular_char word | escaped_char | regular_char - regular_char ::= [^"\\\\#x20#x09#x0A#x0D] - escaped_char ::= slash char - slash ::= "\\\\" - char ::= [a-z]|[^a-z] +const leftCql = ` + query ::= query whitespace connective whitespace sub_query | sub_query + connective ::= "AND NOT" | "AND" | "OR" | "NOT" + sub_query ::= atomic_query | "(" query ")" + atomic_query ::= scope relation quoted_term + scope ::= scope_term whitespace | scope_term + relation ::= relation_term whitespace | relation_term + scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" + relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" + quoted_term ::= quote phrase quote + phrase ::= phrase whitespace word | word + whitespace ::= [#x20#x09#x0A#x0D]+ + word ::= word escaped_char | word regular_char | escaped_char | regular_char + regular_char ::= [^#x22#x5c#x20#x09#x0A#x0D] + escaped_char ::= slash char + slash ::= [#x5c] + char ::= [a-z]|[^a-z] + quote ::= [#x22] +` +const cql = ` + query ::= sub_query whitespace connective whitespace query | sub_query + connective ::= "AND NOT" | "AND" | "OR" | "NOT" + sub_query ::= atomic_query | "(" query ")" + atomic_query ::= scope relation quoted_term + scope ::= scope_term whitespace | scope_term + relation ::= relation_term whitespace | relation_term + scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" + relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" + quoted_term ::= quote phrase quote + phrase ::= word whitespace phrase | word + whitespace ::= [#x20#x09#x0A#x0D]+ + word ::= escaped_char word | regular_char word | escaped_char | regular_char + regular_char ::= [^#x22#x5c#x20#x09#x0A#x0D] + escaped_char ::= slash char + slash ::= [#x5c] + char ::= [a-z]|[^a-z] + quote ::= [#x22] ` -let convenient_cql = ` - query ::= sub_query " " connective " " query | sub_query - connective ::= "AND" | "OR" - sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" - atomic_query ::= scope " " relation " " quoted_term | quoted_term | word - scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" - relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" - quoted_term ::= '"' phrase '"' - phrase ::= word whitespace phrase | word - whitespace ::= [#x20#x09#x0A#x0D]+ - word ::= escaped_char word | regular_char word | escaped_char | regular_char - regular_char ::= [^"\\\\#x20#x09#x0A#x0D] - escaped_char ::= slash char - slash ::= "\\\\" - char ::= [a-z]|[^a-z] +// const cql = ` +// query ::= sub_query whitespace connective whitespace query | sub_query +// connective ::= "AND NOT" | "AND" | "OR" | "NOT" +// sub_query ::= atomic_query | "(" query ")" +// atomic_query ::= scope relation quoted_term +// scope ::= scope_term whitespace | scope_term +// relation ::= relation_term whitespace | relation_term +// scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" +// relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" +// quoted_term ::= quote phrase quote +// phrase ::= word whitespace phrase | word +// whitespace ::= [#x20#x09#x0A#x0D]+ +// word ::= escaped_char word | regular_char word | escaped_char | regular_char +// regular_char ::= [^#5c#x22#x20#x09#x0A#x0D] +// escaped_char ::= slash char +// slash ::= [#x5c] +// char ::= [a-z]|[^a-z] +// quote ::= [#x22] +// +// ` -` +// const escaping = ` +// quoted_term ::= quote phrase quote +// phrase ::= word whitespace phrase | word +// whitespace ::= [#x20#x09#x0A#x0D]+ +// word ::= escaped_char word | regular_char word | escaped_char | regular_char +// regular_char ::= [^#5c#x22#x20#x09#x0A#x0D] +// escaped_char ::= slash char +// slash ::= [#x5c] +// char ::= [a-z]|[^a-z] +// quote ::= [#x22] +// ` + +function simplifyRidic (ast) { + if (ast.type === 'atomic_query' || ast.type === 'connective') return ast.text + return ast.children.map(child => simplifyRidic(child)) +} + +function simplify (ast) { + switch (ast.type) { + case 'query': { + console.log('query') + const children = ast.children.filter(child => child.type !== 'whitespace').map(child => simplify(child)) + return children.length > 1 ? children : children[0] + } + case 'connective': + return ast.text + case 'sub_query': + return simplify(ast.children.find(child => child.type.includes('query'))) + case 'atomic_query': + return ast.children.map(child => simplify(child)) + case 'scope': + return simplify(ast.children.find(child => child.type.includes('scope_term'))) + case 'relation': + return simplify(ast.children.find(child => child.type.includes('relation_term'))) + case 'scope_term': + return ast.text + case 'relation_term': + return ast.text + case 'quoted_term': + return simplify(ast.children.find(child => child.type.includes('phrase'))) + case 'phrase': { + const word = ast.children.find(child => child.type === 'word') + const phrase = ast.children.find(child => child.type === 'phrase') + return [simplify(word)].concat(phrase ? simplify(phrase) : []) + } + case 'word': + return ast.text + default: + break + } +} + +function partialSimplify (tree) { + if (['phrase', 'relation_term', 'scope_term', 'connective'].includes(tree.type)) { + return tree.text + } + if (tree.type === 'sub_query') { + return [partialSimplify(tree.children.find(child => child.type.includes('query')))] + } + const simplifiedChildren = tree.children.map(child => partialSimplify(child)) + return simplifiedChildren.length === 1 ? simplifiedChildren[0]: simplifiedChildren + +} + +function rectifyTree (tree) { + // collect all the boolean queries that directly contain boolean queries + const toRotate = [] + const nodeQueue = [tree] + while (nodeQueue.length) { + let node = nodeQueue.shift() + if (node.type === 'query' && node.children.find(child => child.type === 'connective')) { + let rightChild = node.children.find(child => child.type === 'query') + if (rightChild && rightChild.children.find(child => child.type === 'connective')) { + toRotate.push(node) + } + } + node.children.forEach(child => {nodeQueue.push(child)}) + } + console.log('toRotate: ', toRotate) + toRotate.forEach(node => { + console.log('rotating: ', node) + console.dir(tree, {depth: null}) + const lastChild = node.children[node.children.length - 1] + const grandChild = lastChild.children[0] + node.children[node.children.length - 1] = grandChild + lastChild[0] = node + }) +} + +function rectifySkeleton (tree) { + const connectives = ["AND", "OR"] + const toRotate = [] + const nodeQueue = [tree] + while (nodeQueue.length) { + let node = nodeQueue.shift() + if (node.find(child => connectives.includes(child))) { + let rightChild = node[node.length - 1] + if (rightChild.find(child => connectives.includes(child))) { + toRotate.push(node) + } + } + node.forEach(child => {nodeQueue.push(child)}) + } + console.log('toRotate: ', toRotate) + toRotate.forEach(node => { + console.log('rotating: ', node) + console.dir(tree, {depth: null}) + const lastChild = node.pop() + node.push(lastChild.shift()) + lastChild.unshift(node) + }) +} + +function reverseString (string) { + return string.split("").reverse().join("") +} + +function reverseAST (tree) { + tree.text = reverseString(tree.text) + tree.children = tree.children.map(child => reverseAST(child)).reverse() + return tree +} + +// let convenient_cql = ` +// query ::= sub_query " " connective " " query | sub_query +// connective ::= "AND" | "OR" +// sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" +// atomic_query ::= scope " " relation " " quoted_term | quoted_term | word +// scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" +// relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" +// quoted_term ::= '"' phrase '"' +// phrase ::= word whitespace phrase | word +// whitespace ::= [#x20#x09#x0A#x0D]+ +// word ::= escaped_char word | regular_char word | escaped_char | regular_char +// regular_char ::= [^"\\\\#x20#x09#x0A#x0D] +// escaped_char ::= slash char +// slash ::= "\\\\" +// char ::= [a-z]|[^a-z] +// +// ` +// const cqlParser = new Grammars.W3C.Parser(cql) +// const alt_cqlParser = new Grammars.W3C.Parser(alt_cql) const cqlParser = new Grammars.W3C.Parser(cql) -const alt_cqlParser = new Grammars.W3C.Parser(alt_cql) +const ridicParser = new Grammars.W3C.Parser(ridic) +// const escapingParser = new Grammars.W3C.Parser(escaping) -module.exports = { cqlParser, alt_cqlParser } +module.exports = { cqlParser, simplify, rectifyTree, partialSimplify, ridicParser, simplifyRidic, reverseAST, leftTest, reverseGrammar, leftCql, Grammars }//, escapingParser } diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index f11440ad..44fc4e2e 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -1,10 +1,16 @@ -const { cqlParser } = require('./cql_grammar') +const { cqlParser, rectifyTree } = require('./cql_grammar') const ElasticQueryBuilder = require('./elastic-query-builder') const ApiRequest = require('../api-request') +const { indexMapping } = require('./cql/index-mapping') function buildEsQuery (cqlQuery) { - const tree = cqlParser.getAST(cqlQuery) - return buildEsQueryFromTree(tree) + // const tree = cqlParser.getAST(cqlQuery) + // return buildEsQueryFromTree(tree) + return buildEsQueryFromTree( + rectifyTree( + cqlParser.getAST(cqlQuery) + ) + ) } /** @@ -14,56 +20,79 @@ function buildEsQuery (cqlQuery) { function buildEsQueryFromTree (tree) { switch (tree.type) { case 'query': - if (tree.children.length > 1) { - return buildBoolean( - buildEsQueryFromTree(tree.children[0]), - tree.children[1].text, - buildEsQueryFromTree(tree.children[2]) - ) - } else { - return buildEsQueryFromTree(tree.children[0]) + queries = tree.children.filter(child.type.contains('query')) + connectives = tree.children.filter(child => child.type === 'connective') + if (connectives.length) { + return buildBoolean(connectives[0], queries) } + return buildEsQueryFromTree(queries[0]) case 'sub_query': - return buildEsQueryFromTree(tree.children.length > 1 ? tree.children[1] : tree.children[0]) + const query = tree.children.filter(child => child.type.contains('query'))[0] + return buildEsQueryFromTree(query) case 'atomic_query': { - let scope - let relation - let term - if (tree.children.length > 1) { - scope = tree.children[0].text - relation = tree.children[1].text - } else { - scope = 'all' - relation = 'any' - } - term = tree.children.find(child => child.type === 'key').children[0].text - - return buildAtomic(scope, relation, term) + const { scope, relation, term, terms } = atomicQueryParams(query) + return buildAtomic(scope, relation, term, terms) } default: break } } -function buildBoolean (queryOne, operator, queryTwo) { +function buildBoolean (operator, queries) { + if (operator === "NOT") return buildNegation(queries) const esOperator = operator === 'and' ? 'must' : 'should' return { bool: { - [esOperator]: [ - queryOne, - queryTwo - ] + [esOperator]: queries.map(query => buildEsQueryFromTree(query)) + } + } +} + +function buildNegation (queries) { + return { + bool: { + must: [buildEsQueryFromTree(queries[0])], + must_not: [buildEsQueryFromTree(queries[1])] } } } -function buildAtomic (scope, relation, term) { - const request = ApiRequest.fromParams({ - q: term, - search_scope: scope - }) - const builder = ElasticQueryBuilder.forApiRequest(request) - return builder.query.toJson() +function atomicQueryParams (query) { + return { + scope: query.find(child => child.type === 'scope'), + relation: query.find(child => child.type === 'relation'), + term: findTopPhrase(query), + terms: findTopWords(query) + } +} + +function findTopPhrase (tree) { + if (tree.type === 'phrase') return tree.text + const topPhrases = tree.children.map(child => findTopPhrase(child)).filter(x => x) + return topPhrases.length ? topPhrases[0] : null +} + +function findTopWords (tree) { + if (tree.type === 'word') return [tree.text] + return tree.children.map(child => findTopWords(child)).flatten() +} + +// function buildAtomic (scope, relation, term) { +// const request = ApiRequest.fromParams({ +// q: term, +// search_scope: scope +// }) +// const builder = ElasticQueryBuilder.forApiRequest(request) +// return builder.query.toJson() +// } + +function nestedMapAndFilter (obj, filter, map) { + return Object.assign({}, + ...Object.entries( + obj + ).filter(filter) // need to modify this to get full query + .map(map) + ) } /** @@ -81,6 +110,143 @@ function buildAtomic (scope, relation, term) { - put all terms in term matches with term fields */ + function buildAtomic (scope, relation, terms, term) { + const allFields = nestedMapAndFilter( + indexMapping[scope], + (k,v) => typeof v === 'string' || v.on(terms), + ([k,v]) => ({[k] : typeof v === 'string' ? v : v.field}) + ) + + const bibFields = nestedMapAndFilter( + allFields, + ([k, v]) => !['items, holdings'].any(prefix => k.startsWith(prefix)), + ([k, v]) => ({[k]: v}) + ) + + const itemFields = nestedMapAndFilter( + allFields, + ([k, v]) => k.startsWith('items'), + ([k, v]) => ({[k]: v}) + ) + + const holdingsFields = nestedMapAndFilter( + allFields, + ([k, v]) => k.startsWith('holdings'), + ([k, v]) => ({[k]: v}) + ) + + return { + bool: { // should this start with query? + should: [ + buildAtomicMain(bibFields, relation, terms, term), + buildAtomicNested('items', itemFields, relation, terms, term), + buildAtomicNested('holdings', holdingsFields, relation, terms, term) + ] + } + } + } + + function buildAtomicNested(name, fields, relation, terms, term) { + return { + nested: { + path: name, + query: buildAtomicMain(fields, relation, terms, term) + } + } + } + + /** + - main: + - if operator is any/all, take all query terms not starting with ^ and put in multi-match with all regular fields + - if operator is any/all, take all query terms starting with ^ and put in prefix with all regular fields + - if operator is =/adj, and query term doesn't start with ^, take all query terms and put in phrase match with all regular fields + - if operator is =/adj and query term starts with ^, strip it out and use phrase_prefix + - put all terms in prefix match with prefix fields + - put all terms in term matches with term fields + */ + function buildAtomicMain (fields, relation, terms, term) { + return { + bool: { + should: [ + ...anyAllQueries(fields, relation, terms, term), + ...adjEqQueries(fields, relation, terms, term), + ...termQueriesForTermFields(fields, relation, terms, term), + ...prefixQueriesForPrefixFields(fields, relation, terms, term) + ] + } + } + } + + function anyAllQueries (fields, relation, terms, term) { + if (!['any', 'all'].contains(relation)) { return [] } + const fieldsToUse = fields.fields + return [ + multiMatch(fieldsToUse, relation, terms.filter(term => !term.startsWith('^'))), + ...(terms.filter(term => term.startsWith('^')).map(term => prefixQuery(fieldsToUse, term.slice(1)))) + ] + } + + function adjEqQueries (fields, relation, terms, term) { + if (!['=', 'adj'].contains(relation)) { return [] } + const fieldsToUse = fields.fields + return [ + term.startsWith('^') ? + phrasePrefixQuery(fieldsToUse, term.slice(1)) : + phraseQuery(fieldsToUse, term) + ] + } + + function prefixQueriesForPrefixFields (fields, relation, terms, term) { + if (!fields.prefix) return [] + return fields.prefix.map(field => prefixQuery(field, term)) + } + + function termQueriesForTermFields (fields, relation, terms, term) { + if (!fields.term) return [] + return fields.term.map(field => termQuery(field, term)) + } + + function termQuery (field, term) { + return { "term" : { [field] : term } } + } + + function prefixQuery (field, term) { + return { "prefix" : { [field] : term } } + } + + function multiMatch (fields, relation, terms) { + return { + "multi_match": { + "query" : term, + "fields": fields, + "type": "cross_fields", + "operator": relation === "any" ? "or" : "and" + } + } + } + + function phrasePrefixQuery (fields, term) { + return { + "multi_match": { + "query" : term, + "fields": fields, + "type": "phrase_prefix" + } + } + } + + function phraseQuery (fields, term) { + return { + "multi_match": { + "query" : term, + "fields": fields, + "type": "phrase" + } + } + } + + + module.exports = { buildEsQuery, buildEsQueryFromTree, diff --git a/test/cql_grammar.test.js b/test/cql_grammar.test.js new file mode 100644 index 00000000..0a5439b9 --- /dev/null +++ b/test/cql_grammar.test.js @@ -0,0 +1,112 @@ +const { expect } = require('chai') + +const { cqlParser, simplify, rectifyTree } = require('../lib/elasticsearch/cql_grammar') + + +function validateAtomicQuery(parsed, scope, relation, quotedTerm) { + expect(parsed.type).to.equal("query") + expect(parsed.children.length).to.equal(1) + const subQuery = parsed.children[0] + expect(subQuery.type).to.equal("sub_query") + expect(subQuery.children.length).to.equal(1) + const atomicQuery = subQuery.children[0] + expect(atomicQuery.type).to.equal("atomic_query") + const scopeNode = atomicQuery.children.find(child => child.type === "scope") + const scopeTerm = scopeNode.children.find(child => child.type === "scope_term") + expect(scopeTerm.text).to.equal(scope) + const relationNode = atomicQuery.children.find(child => child.type === "relation") + const relationTerm = relationNode.children.find(child => child.type === "relation_term") + expect(relationTerm.text).to.equal(relation) + const quotedTermNode = atomicQuery.children.find(child => child.type === "quoted_term") + expect(quotedTermNode.text).to.equal(quotedTerm) +} + +function validateBooleanQuery(parsed, expected) { + +} + +describe.only('CQL Grammar', function () { + describe('parsing queries', function () { + it('parses atomic queries', function () { + validateAtomicQuery(cqlParser.getAST("title=\"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(cqlParser.getAST("keyword any \"hamlet shakespeare\""), "keyword", "any", "\"hamlet shakespeare\"") + validateAtomicQuery(cqlParser.getAST("subject all \"hamlet shakespeare\""), "subject", "all", "\"hamlet shakespeare\"") + }) + + it('allows whitespace variants', function () { + validateAtomicQuery(cqlParser.getAST("title =\"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(cqlParser.getAST("title= \"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(cqlParser.getAST("title = \"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(cqlParser.getAST("title = \"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(cqlParser.getAST("title = \"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + }) + + it('correctly escapes escape characters', function () { + validateAtomicQuery(cqlParser.getAST("keyword=\"Notes on \\\"The Underground\\\"\""), "keyword", "=", "\"Notes on \\\"The Underground\\\"\"") + validateAtomicQuery(cqlParser.getAST("title=\"This title ends in a slash \\\\\""), "title", "=", "\"This title ends in a slash \\\\\"") + }) + + it('identifies words correctly', function () { + const parsed = cqlParser.getAST("keyword adj \"A multiword keyword\"") + const words = [] + let nodes = [parsed] + while (nodes.length) { + let node = nodes.shift() + if (node.type === "word") { + words.push(node.text) + } else { + nodes = nodes.concat(node.children) + } + } + const expectedWords = ["A", "multiword", "keyword"] + words.forEach(word => { + expect(expectedWords).to.include(word) + }) + expect(words.length).to.equal(3) + }) + + it('parses boolean queries', function () { + expect(simplify(cqlParser.getAST( + "title=\"dogs\" AND keyword=\"cats\"" + ))).to.deep.equal( + [ [ 'title', '=', [ 'dogs' ] ], 'AND', [ 'keyword', '=', [ 'cats' ] ] ] + ) + + expect(simplify(cqlParser.getAST( + "title=\"dogs\" AND keyword=\"cats\" OR author adj \"Bird\"" + ))).to.deep.equal( + [ + [ + "title", "=", ["dogs"] + ], + "AND", + [ + [ + "keyword", "=", ["cats"] + ], + "OR", + [ + "author", "adj", ["Bird"] + ] + ] + ] + ) + }) + + it('parses queries with parentheses', function () { + expect() + .to.deep.equal( + [ + [ [ 'title', '=', ['dogs'] ], 'AND', [ 'keyword', '=', ['cats'] ] ], + 'OR', + [ 'author', 'adj', [ 'Bird' ] ] + ] + ) + }) + }) +}) From f373383f480421667efa42ab6291703b06e674f3 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 5 Feb 2026 15:59:06 -0500 Subject: [PATCH 13/54] Apparently working left associating cql --- lib/elasticsearch/cql_grammar.js | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index aa3450d3..250b1287 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -132,6 +132,8 @@ const leftCql = ` quote ::= [#x22] ` +const rightCql = reverseGrammar(leftCql) + const cql = ` query ::= sub_query whitespace connective whitespace query | sub_query connective ::= "AND NOT" | "AND" | "OR" | "NOT" @@ -318,6 +320,15 @@ function reverseAST (tree) { // const alt_cqlParser = new Grammars.W3C.Parser(alt_cql) const cqlParser = new Grammars.W3C.Parser(cql) const ridicParser = new Grammars.W3C.Parser(ridic) +const rightCqlParser = new Grammars.W3C.Parser(rightCql) + +function parseRight (string, parser) { + return reverseAST(parser.getAST(reverseString(string))) +} + +function parseWithRightCql (string) { + return parseRight(string, rightCqlParser) +} // const escapingParser = new Grammars.W3C.Parser(escaping) -module.exports = { cqlParser, simplify, rectifyTree, partialSimplify, ridicParser, simplifyRidic, reverseAST, leftTest, reverseGrammar, leftCql, Grammars }//, escapingParser } +module.exports = { cqlParser, simplify, rectifyTree, partialSimplify, ridicParser, simplifyRidic, reverseAST, leftTest, reverseGrammar, leftCql, Grammars, parseRight, parseWithRightCql }//, escapingParser } From 951a8e89f65065589923ab8d49fffa12bc3566ff Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 6 Feb 2026 11:54:02 -0500 Subject: [PATCH 14/54] Clean up grammar file --- lib/elasticsearch/cql_grammar.js | 242 +------------------------------ 1 file changed, 1 insertion(+), 241 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 250b1287..c9f73d6c 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -1,95 +1,5 @@ const { Grammars } = require('ebnf') -// let cql = ` -// query ::= sub_query " " connective " " query | sub_query -// connective ::= "AND" | "OR" -// sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" -// atomic_query ::= scope " " relation " " quoted_term -// scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" -// relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" -// quoted_term ::= '"' term '"' -// term ::= escaped_char term | regular_char term | escaped_char | regular_char -// regular_char ::= [^"\\\\] -// escaped_char ::= slash char -// slash ::= "\\\\" -// char ::= [a-z]|[^a-z] -// -// ` -// -// let alt_cql = ` -// query ::= sub_query " " connective " " query | sub_query -// connective ::= "AND" | "OR" -// sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" -// atomic_query ::= scope " " relation " " quoted_term -// scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" -// relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" -// quoted_term ::= '"' TERM '"' -// TERM ::= ESCAPED_CHAR TERM | REGULAR_CHAR TERM | ESCAPED_CHAR | REGULAR_CHAR -// REGULAR_CHAR ::= [^"\\\\] -// ESCAPED_CHAR ::= SLASH CHAR -// SLASH ::= "\\\\" -// CHAR ::= [a-z]|[^a-z] -// ` - -// let word_cql = ` -// query ::= sub_query whitespace connective whitespace query | sub_query -// connective ::= "AND" | "OR" | "NOT" -// sub_query ::= atomic_query | "(" query ")" -// atomic_query ::= scope optional_whitespace relation optional_whitespace quoted_term -// scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" -// relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" -// quoted_term ::= '"' phrase '"' -// phrase ::= word whitespace phrase | word -// optional_whitespace ::= whitespace | "" -// whitespace ::= [#x20#x09#x0A#x0D]+ -// word ::= escaped_char word | regular_char word | escaped_char | regular_char -// regular_char ::= [^"\\\\#x20#x09#x0A#x0D] -// escaped_char ::= slash char -// slash ::= "\\\\" -// char ::= [a-z]|[^a-z] - -// NEED to add some allowed whitespace before and after atomic queries - -const ridic = ` - query ::= sub_query whitespace connective whitespace query | sub_query - connective ::= "TON DNA" | "DNA" | "RO" | "NOT" - sub_query ::= atomic_query | ")" query "(" - atomic_query ::= [a-z]+ - whitespace ::= [#x20#x09#x0A#x0D]+ -` - -// const cql = ` -// query ::= sub_query whitespace connective whitespace query | sub_query -// connective ::= "AND NOT" | "AND" | "OR" | "NOT" -// sub_query ::= atomic_query | "(" query ")" -// atomic_query ::= scope relation quoted_term -// scope ::= scope_term whitespace | scope_term -// relation ::= relation_term whitespace | relation_term -// scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" -// relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" -// quoted_term ::= '"' phrase '"' -// phrase ::= word whitespace phrase | word -// whitespace ::= [#x20#x09#x0A#x0D]+ -// word ::= escaped_char word | regular_char word | escaped_char | regular_char -// regular_char ::= [^"\\\\#x20#x09#x0A#x0D] -// escaped_char ::= slash char -// slash ::= "\\\\" -// char ::= [a-z]|[^a-z] -// -// ` - -// function reverseGrammar (grammar) { -// return grammar.split("\n").map(line => -// (line.split("::=").map(side => -// (side.split("|").map(disjunct => -// (disjunct.split(" ").map(word => -// (word => word.includes("\"") ? reverseString(word) : word) -// ).reverse().join(" ")) -// )).join("|") -// )).join("::=") -// ).join("\n") -// } - function reverseGrammar (grammar) { return grammar.split("\n") .map(line => @@ -104,14 +14,6 @@ function reverseGrammar (grammar) { ).join("|"))).join("::= "))).join("\n") } - - -const leftTest = ` - query ::= query connective sub_query | sub_query - connective ::= "AND" | "OR" - sub_query ::= [a-z]+ -` - const leftCql = ` query ::= query whitespace connective whitespace sub_query | sub_query connective ::= "AND NOT" | "AND" | "OR" | "NOT" @@ -134,64 +36,6 @@ const leftCql = ` const rightCql = reverseGrammar(leftCql) -const cql = ` - query ::= sub_query whitespace connective whitespace query | sub_query - connective ::= "AND NOT" | "AND" | "OR" | "NOT" - sub_query ::= atomic_query | "(" query ")" - atomic_query ::= scope relation quoted_term - scope ::= scope_term whitespace | scope_term - relation ::= relation_term whitespace | relation_term - scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" - relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" - quoted_term ::= quote phrase quote - phrase ::= word whitespace phrase | word - whitespace ::= [#x20#x09#x0A#x0D]+ - word ::= escaped_char word | regular_char word | escaped_char | regular_char - regular_char ::= [^#x22#x5c#x20#x09#x0A#x0D] - escaped_char ::= slash char - slash ::= [#x5c] - char ::= [a-z]|[^a-z] - quote ::= [#x22] -` - -// const cql = ` -// query ::= sub_query whitespace connective whitespace query | sub_query -// connective ::= "AND NOT" | "AND" | "OR" | "NOT" -// sub_query ::= atomic_query | "(" query ")" -// atomic_query ::= scope relation quoted_term -// scope ::= scope_term whitespace | scope_term -// relation ::= relation_term whitespace | relation_term -// scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" -// relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" -// quoted_term ::= quote phrase quote -// phrase ::= word whitespace phrase | word -// whitespace ::= [#x20#x09#x0A#x0D]+ -// word ::= escaped_char word | regular_char word | escaped_char | regular_char -// regular_char ::= [^#5c#x22#x20#x09#x0A#x0D] -// escaped_char ::= slash char -// slash ::= [#x5c] -// char ::= [a-z]|[^a-z] -// quote ::= [#x22] -// -// ` - -// const escaping = ` -// quoted_term ::= quote phrase quote -// phrase ::= word whitespace phrase | word -// whitespace ::= [#x20#x09#x0A#x0D]+ -// word ::= escaped_char word | regular_char word | escaped_char | regular_char -// regular_char ::= [^#5c#x22#x20#x09#x0A#x0D] -// escaped_char ::= slash char -// slash ::= [#x5c] -// char ::= [a-z]|[^a-z] -// quote ::= [#x22] -// ` - -function simplifyRidic (ast) { - if (ast.type === 'atomic_query' || ast.type === 'connective') return ast.text - return ast.children.map(child => simplifyRidic(child)) -} - function simplify (ast) { switch (ast.type) { case 'query': { @@ -227,67 +71,6 @@ function simplify (ast) { } } -function partialSimplify (tree) { - if (['phrase', 'relation_term', 'scope_term', 'connective'].includes(tree.type)) { - return tree.text - } - if (tree.type === 'sub_query') { - return [partialSimplify(tree.children.find(child => child.type.includes('query')))] - } - const simplifiedChildren = tree.children.map(child => partialSimplify(child)) - return simplifiedChildren.length === 1 ? simplifiedChildren[0]: simplifiedChildren - -} - -function rectifyTree (tree) { - // collect all the boolean queries that directly contain boolean queries - const toRotate = [] - const nodeQueue = [tree] - while (nodeQueue.length) { - let node = nodeQueue.shift() - if (node.type === 'query' && node.children.find(child => child.type === 'connective')) { - let rightChild = node.children.find(child => child.type === 'query') - if (rightChild && rightChild.children.find(child => child.type === 'connective')) { - toRotate.push(node) - } - } - node.children.forEach(child => {nodeQueue.push(child)}) - } - console.log('toRotate: ', toRotate) - toRotate.forEach(node => { - console.log('rotating: ', node) - console.dir(tree, {depth: null}) - const lastChild = node.children[node.children.length - 1] - const grandChild = lastChild.children[0] - node.children[node.children.length - 1] = grandChild - lastChild[0] = node - }) -} - -function rectifySkeleton (tree) { - const connectives = ["AND", "OR"] - const toRotate = [] - const nodeQueue = [tree] - while (nodeQueue.length) { - let node = nodeQueue.shift() - if (node.find(child => connectives.includes(child))) { - let rightChild = node[node.length - 1] - if (rightChild.find(child => connectives.includes(child))) { - toRotate.push(node) - } - } - node.forEach(child => {nodeQueue.push(child)}) - } - console.log('toRotate: ', toRotate) - toRotate.forEach(node => { - console.log('rotating: ', node) - console.dir(tree, {depth: null}) - const lastChild = node.pop() - node.push(lastChild.shift()) - lastChild.unshift(node) - }) -} - function reverseString (string) { return string.split("").reverse().join("") } @@ -298,28 +81,6 @@ function reverseAST (tree) { return tree } -// let convenient_cql = ` -// query ::= sub_query " " connective " " query | sub_query -// connective ::= "AND" | "OR" -// sub_query ::= atomic_query | "NOT " atomic_query | "(" query ")" -// atomic_query ::= scope " " relation " " quoted_term | quoted_term | word -// scope ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" -// relation ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" -// quoted_term ::= '"' phrase '"' -// phrase ::= word whitespace phrase | word -// whitespace ::= [#x20#x09#x0A#x0D]+ -// word ::= escaped_char word | regular_char word | escaped_char | regular_char -// regular_char ::= [^"\\\\#x20#x09#x0A#x0D] -// escaped_char ::= slash char -// slash ::= "\\\\" -// char ::= [a-z]|[^a-z] -// -// ` - -// const cqlParser = new Grammars.W3C.Parser(cql) -// const alt_cqlParser = new Grammars.W3C.Parser(alt_cql) -const cqlParser = new Grammars.W3C.Parser(cql) -const ridicParser = new Grammars.W3C.Parser(ridic) const rightCqlParser = new Grammars.W3C.Parser(rightCql) function parseRight (string, parser) { @@ -329,6 +90,5 @@ function parseRight (string, parser) { function parseWithRightCql (string) { return parseRight(string, rightCqlParser) } -// const escapingParser = new Grammars.W3C.Parser(escaping) -module.exports = { cqlParser, simplify, rectifyTree, partialSimplify, ridicParser, simplifyRidic, reverseAST, leftTest, reverseGrammar, leftCql, Grammars, parseRight, parseWithRightCql }//, escapingParser } +module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql } From 3a03d816e30016aa1b9cce8a9a6c0cc0d42bc603 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 6 Feb 2026 11:54:38 -0500 Subject: [PATCH 15/54] Use parseWithRightCql in query builder and tests --- lib/elasticsearch/cql_query_builder.js | 8 +--- test/cql_grammar.test.js | 57 ++++++++++++++------------ 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 44fc4e2e..6c2f5505 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -1,15 +1,11 @@ -const { cqlParser, rectifyTree } = require('./cql_grammar') +const { parseWithRightCql } = require('./cql_grammar') const ElasticQueryBuilder = require('./elastic-query-builder') const ApiRequest = require('../api-request') const { indexMapping } = require('./cql/index-mapping') function buildEsQuery (cqlQuery) { - // const tree = cqlParser.getAST(cqlQuery) - // return buildEsQueryFromTree(tree) return buildEsQueryFromTree( - rectifyTree( - cqlParser.getAST(cqlQuery) - ) + parseWithRightCql(cqlQuery) ) } diff --git a/test/cql_grammar.test.js b/test/cql_grammar.test.js index 0a5439b9..cb3ee69d 100644 --- a/test/cql_grammar.test.js +++ b/test/cql_grammar.test.js @@ -1,6 +1,6 @@ const { expect } = require('chai') -const { cqlParser, simplify, rectifyTree } = require('../lib/elasticsearch/cql_grammar') +const { cqlParser, simplify, parseWithRightCql } = require('../lib/elasticsearch/cql_grammar') function validateAtomicQuery(parsed, scope, relation, quotedTerm) { @@ -28,31 +28,31 @@ function validateBooleanQuery(parsed, expected) { describe.only('CQL Grammar', function () { describe('parsing queries', function () { it('parses atomic queries', function () { - validateAtomicQuery(cqlParser.getAST("title=\"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") - validateAtomicQuery(cqlParser.getAST("keyword any \"hamlet shakespeare\""), "keyword", "any", "\"hamlet shakespeare\"") - validateAtomicQuery(cqlParser.getAST("subject all \"hamlet shakespeare\""), "subject", "all", "\"hamlet shakespeare\"") + validateAtomicQuery(parseWithRightCql("title=\"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(parseWithRightCql("keyword any \"hamlet shakespeare\""), "keyword", "any", "\"hamlet shakespeare\"") + validateAtomicQuery(parseWithRightCql("subject all \"hamlet shakespeare\""), "subject", "all", "\"hamlet shakespeare\"") }) it('allows whitespace variants', function () { - validateAtomicQuery(cqlParser.getAST("title =\"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(cqlParser.getAST("title= \"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(cqlParser.getAST("title = \"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(cqlParser.getAST("title = \"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(cqlParser.getAST("title = \"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") - validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") - validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") - validateAtomicQuery(cqlParser.getAST("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(parseWithRightCql("title =\"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(parseWithRightCql("title= \"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(parseWithRightCql("title = \"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(parseWithRightCql("title = \"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(parseWithRightCql("title = \"hamlet\""), "title", "=", "\"hamlet\"") + validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") }) it('correctly escapes escape characters', function () { - validateAtomicQuery(cqlParser.getAST("keyword=\"Notes on \\\"The Underground\\\"\""), "keyword", "=", "\"Notes on \\\"The Underground\\\"\"") - validateAtomicQuery(cqlParser.getAST("title=\"This title ends in a slash \\\\\""), "title", "=", "\"This title ends in a slash \\\\\"") + validateAtomicQuery(parseWithRightCql("keyword=\"Notes on \\\"The Underground\\\"\""), "keyword", "=", "\"Notes on \\\"The Underground\\\"\"") + validateAtomicQuery(parseWithRightCql("title=\"This title ends in a slash \\\\\""), "title", "=", "\"This title ends in a slash \\\\\"") }) it('identifies words correctly', function () { - const parsed = cqlParser.getAST("keyword adj \"A multiword keyword\"") + const parsed = parseWithRightCql("keyword adj \"A multiword keyword\"") const words = [] let nodes = [parsed] while (nodes.length) { @@ -71,41 +71,44 @@ describe.only('CQL Grammar', function () { }) it('parses boolean queries', function () { - expect(simplify(cqlParser.getAST( + expect(simplify(parseWithRightCql( "title=\"dogs\" AND keyword=\"cats\"" ))).to.deep.equal( [ [ 'title', '=', [ 'dogs' ] ], 'AND', [ 'keyword', '=', [ 'cats' ] ] ] ) - expect(simplify(cqlParser.getAST( + expect(simplify(parseWithRightCql( "title=\"dogs\" AND keyword=\"cats\" OR author adj \"Bird\"" ))).to.deep.equal( [ - [ - "title", "=", ["dogs"] - ], - "AND", [ [ - "keyword", "=", ["cats"] + "title", "=", ["dogs"] ], + "AND", + [ + "keyword", "=", ["cats"] + ] + ], "OR", [ "author", "adj", ["Bird"] ] ] - ] ) }) it('parses queries with parentheses', function () { - expect() + expect(simplify(parseWithRightCql( + "title=\"dogs\" AND (keyword=\"cats\" OR author adj \"Bird\")" + ))) .to.deep.equal( [ - [ [ 'title', '=', ['dogs'] ], 'AND', [ 'keyword', '=', ['cats'] ] ], + [ 'title', '=', ['dogs'] ], 'AND', [[ 'keyword', '=', ['cats'] ], 'OR', [ 'author', 'adj', [ 'Bird' ] ] ] + ] ) }) }) From 195282892d36962475516b2d4c5973054395a8e6 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 6 Feb 2026 11:59:21 -0500 Subject: [PATCH 16/54] Remove console log and commented code' --- lib/elasticsearch/cql_grammar.js | 1 - lib/elasticsearch/cql_query_builder.js | 9 --------- 2 files changed, 10 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index c9f73d6c..ef87de73 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -39,7 +39,6 @@ const rightCql = reverseGrammar(leftCql) function simplify (ast) { switch (ast.type) { case 'query': { - console.log('query') const children = ast.children.filter(child => child.type !== 'whitespace').map(child => simplify(child)) return children.length > 1 ? children : children[0] } diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 6c2f5505..ed57fbfc 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -73,15 +73,6 @@ function findTopWords (tree) { return tree.children.map(child => findTopWords(child)).flatten() } -// function buildAtomic (scope, relation, term) { -// const request = ApiRequest.fromParams({ -// q: term, -// search_scope: scope -// }) -// const builder = ElasticQueryBuilder.forApiRequest(request) -// return builder.query.toJson() -// } - function nestedMapAndFilter (obj, filter, map) { return Object.assign({}, ...Object.entries( From 3a838575532cd1e9483d3171d0a5654413285bc9 Mon Sep 17 00:00:00 2001 From: danamansana Date: Mon, 9 Feb 2026 15:39:00 -0500 Subject: [PATCH 17/54] Fix some param passing and start adding query tests --- lib/elasticsearch/cql_query_builder.js | 114 +++++++++++++++---------- test/cql_query_builder.test.js | 13 +++ test/fixtures/cql_fixtures.js | 78 +++++++++++++++++ 3 files changed, 158 insertions(+), 47 deletions(-) create mode 100644 test/cql_query_builder.test.js create mode 100644 test/fixtures/cql_fixtures.js diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index ed57fbfc..a026a229 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -4,9 +4,12 @@ const ApiRequest = require('../api-request') const { indexMapping } = require('./cql/index-mapping') function buildEsQuery (cqlQuery) { - return buildEsQueryFromTree( - parseWithRightCql(cqlQuery) - ) + return { + query: + buildEsQueryFromTree( + parseWithRightCql(cqlQuery) + ) + } } /** @@ -16,18 +19,17 @@ function buildEsQuery (cqlQuery) { function buildEsQueryFromTree (tree) { switch (tree.type) { case 'query': - queries = tree.children.filter(child.type.contains('query')) + queries = tree.children.filter(child => child.type.includes('query')) connectives = tree.children.filter(child => child.type === 'connective') if (connectives.length) { return buildBoolean(connectives[0], queries) } return buildEsQueryFromTree(queries[0]) case 'sub_query': - const query = tree.children.filter(child => child.type.contains('query'))[0] + const query = tree.children.filter(child => child.type.includes('query'))[0] return buildEsQueryFromTree(query) case 'atomic_query': { - const { scope, relation, term, terms } = atomicQueryParams(query) - return buildAtomic(scope, relation, term, terms) + return buildAtomic(atomicQueryParams(tree)) } default: break @@ -53,35 +55,51 @@ function buildNegation (queries) { } } -function atomicQueryParams (query) { +/** + A convienience method that collect the scope, relation, the full query (i.e term), and + all the separate words in the query (i.e. the terms) + */ +function atomicQueryParams (atomic_query) { return { - scope: query.find(child => child.type === 'scope'), - relation: query.find(child => child.type === 'relation'), - term: findTopPhrase(query), - terms: findTopWords(query) + scope: atomic_query.children.find(child => child.type === 'scope').text, + relation: atomic_query.children.find(child => child.type === 'relation').text, + term: findTopPhrase(atomic_query), + terms: findTopWords(atomic_query) } } +/** + Find the highest (i.e. most inclusive) phrase node and return its text + Ex: if the query was keyword="Hamlet Shakespeare", there will be phrase nodes + for Hamlet Shakespeare, Hamlet, and Shakespeare, and this will return Hamlet Shakespeare + */ function findTopPhrase (tree) { if (tree.type === 'phrase') return tree.text const topPhrases = tree.children.map(child => findTopPhrase(child)).filter(x => x) return topPhrases.length ? topPhrases[0] : null } +/** + Return a list of all the words that aren't fragments of larger words + E.g. Hamlet Shakespeare => [Hamlet, Shakespeare], and doesn't include the text + of word nodes for H, Ha, Ham, etc... + */ function findTopWords (tree) { if (tree.type === 'word') return [tree.text] - return tree.children.map(child => findTopWords(child)).flatten() + return tree.children.map(child => findTopWords(child)).flat() } +/** + For an object where the keys are arrays, apply the given filter and map + to each of the arrays + */ function nestedMapAndFilter (obj, filter, map) { return Object.assign({}, - ...Object.entries( - obj - ).filter(filter) // need to modify this to get full query - .map(map) - ) + ...(Object.entries(obj) + .map(([k,v]) => ({[k]: v.filter(filter).map(map) })) + ) + ) } - /** build atomic: - identify the scope fields that match the term @@ -97,47 +115,47 @@ function nestedMapAndFilter (obj, filter, map) { - put all terms in term matches with term fields */ - function buildAtomic (scope, relation, terms, term) { + function buildAtomic ({scope, relation, terms, term}) { const allFields = nestedMapAndFilter( indexMapping[scope], - (k,v) => typeof v === 'string' || v.on(terms), - ([k,v]) => ({[k] : typeof v === 'string' ? v : v.field}) + field => typeof field === 'string' || field.on(term), + field => (typeof field === 'string' ? field : field.field) ) const bibFields = nestedMapAndFilter( allFields, - ([k, v]) => !['items, holdings'].any(prefix => k.startsWith(prefix)), - ([k, v]) => ({[k]: v}) + (field) => !['items, holdings'].some(prefix => field.startsWith(prefix)), + field => field ) const itemFields = nestedMapAndFilter( allFields, - ([k, v]) => k.startsWith('items'), - ([k, v]) => ({[k]: v}) + (field) => field.startsWith('items'), + field => field ) const holdingsFields = nestedMapAndFilter( allFields, - ([k, v]) => k.startsWith('holdings'), - ([k, v]) => ({[k]: v}) + (field) => field.startsWith('holdings'), + field => field ) return { - bool: { // should this start with query? + bool: { should: [ - buildAtomicMain(bibFields, relation, terms, term), - buildAtomicNested('items', itemFields, relation, terms, term), - buildAtomicNested('holdings', holdingsFields, relation, terms, term) + buildAtomicMain({fields: bibFields, relation, terms, term}), + buildAtomicNested('items', {fields: itemFields, relation, terms, term}), + buildAtomicNested('holdings', { fields: holdingsFields, relation, terms, term}) ] } } } - function buildAtomicNested(name, fields, relation, terms, term) { + function buildAtomicNested(name, {fields, relation, terms, term}) { return { nested: { path: name, - query: buildAtomicMain(fields, relation, terms, term) + query: buildAtomicMain({fields, relation, terms, term}) } } } @@ -151,21 +169,21 @@ function nestedMapAndFilter (obj, filter, map) { - put all terms in prefix match with prefix fields - put all terms in term matches with term fields */ - function buildAtomicMain (fields, relation, terms, term) { + function buildAtomicMain ({fields, relation, terms, term}) { return { bool: { should: [ - ...anyAllQueries(fields, relation, terms, term), - ...adjEqQueries(fields, relation, terms, term), - ...termQueriesForTermFields(fields, relation, terms, term), - ...prefixQueriesForPrefixFields(fields, relation, terms, term) + ...anyAllQueries({fields, relation, terms, term}), + ...adjEqQueries({fields, relation, terms, term}), + ...termQueriesForTermFields({fields, relation, terms, term}), + ...prefixQueriesForPrefixFields({fields, relation, terms, term}) ] } } } - function anyAllQueries (fields, relation, terms, term) { - if (!['any', 'all'].contains(relation)) { return [] } + function anyAllQueries ({fields, relation, terms, term}) { + if (!['any', 'all'].includes(relation)) { return [] } const fieldsToUse = fields.fields return [ multiMatch(fieldsToUse, relation, terms.filter(term => !term.startsWith('^'))), @@ -173,8 +191,8 @@ function nestedMapAndFilter (obj, filter, map) { ] } - function adjEqQueries (fields, relation, terms, term) { - if (!['=', 'adj'].contains(relation)) { return [] } + function adjEqQueries ({fields, relation, terms, term}) { + if (!['=', 'adj'].includes(relation)) { return [] } const fieldsToUse = fields.fields return [ term.startsWith('^') ? @@ -183,12 +201,12 @@ function nestedMapAndFilter (obj, filter, map) { ] } - function prefixQueriesForPrefixFields (fields, relation, terms, term) { + function prefixQueriesForPrefixFields ({fields, relation, terms, term}) { if (!fields.prefix) return [] return fields.prefix.map(field => prefixQuery(field, term)) } - function termQueriesForTermFields (fields, relation, terms, term) { + function termQueriesForTermFields ({fields, relation, terms, term}) { if (!fields.term) return [] return fields.term.map(field => termQuery(field, term)) } @@ -204,7 +222,7 @@ function nestedMapAndFilter (obj, filter, map) { function multiMatch (fields, relation, terms) { return { "multi_match": { - "query" : term, + "query" : terms.join(" "), "fields": fields, "type": "cross_fields", "operator": relation === "any" ? "or" : "and" @@ -238,5 +256,7 @@ module.exports = { buildEsQuery, buildEsQueryFromTree, buildBoolean, - buildAtomic + buildAtomic, + buildAtomicMain, + nestedMapAndFilter } diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js new file mode 100644 index 00000000..3d924c62 --- /dev/null +++ b/test/cql_query_builder.test.js @@ -0,0 +1,13 @@ +const { expect } = require('chai') + +const { buildEsQuery } = require('../lib/elasticsearch/cql_query_builder') +const { simpleAnyQuery } = require('./fixtures/cql_fixtures') + +// describe('CQL Query Builder', function () { +// it('Simple = query', function () { +// expect(buildEsQuery("title=\"Hamlet\"")) +// .to.deep.equal( +// simpleAnyQuery +// ) +// }) +// }) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js new file mode 100644 index 00000000..b8318005 --- /dev/null +++ b/test/fixtures/cql_fixtures.js @@ -0,0 +1,78 @@ +const simpleAnyQuery = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet", + "fields": [ + "title", + "title.folded", + "titleAlt.folded", + "uniformTitle.folded", + "titleDisplay.folded", + "seriesStatement.folded", + "contentsTitle.folded", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelSeriesStatement.folded", + "parallelTitleAlt.folded", + "parallelCreatorLiteral.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } +} + +module.exports = { + simpleAnyQuery +} From d2109f529ceea95a2beb40622531990dd67da5b7 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 10 Feb 2026 12:00:15 -0500 Subject: [PATCH 18/54] Add tests for atomic queries and some small corrections --- lib/elasticsearch/cql_query_builder.js | 15 +- test/cql_grammar.test.js | 2 +- test/cql_query_builder.test.js | 93 +++- test/fixtures/cql_fixtures.js | 732 ++++++++++++++++++++++++- 4 files changed, 823 insertions(+), 19 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index a026a229..0054f5e3 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -61,8 +61,8 @@ function buildNegation (queries) { */ function atomicQueryParams (atomic_query) { return { - scope: atomic_query.children.find(child => child.type === 'scope').text, - relation: atomic_query.children.find(child => child.type === 'relation').text, + scope: atomic_query.children.find(child => child.type === 'scope').text.trim(), + relation: atomic_query.children.find(child => child.type === 'relation').text.trim(), term: findTopPhrase(atomic_query), terms: findTopWords(atomic_query) } @@ -124,7 +124,7 @@ function nestedMapAndFilter (obj, filter, map) { const bibFields = nestedMapAndFilter( allFields, - (field) => !['items, holdings'].some(prefix => field.startsWith(prefix)), + (field) => !['items', 'holdings'].some(prefix => field.startsWith(prefix)), field => field ) @@ -187,8 +187,8 @@ function nestedMapAndFilter (obj, filter, map) { const fieldsToUse = fields.fields return [ multiMatch(fieldsToUse, relation, terms.filter(term => !term.startsWith('^'))), - ...(terms.filter(term => term.startsWith('^')).map(term => prefixQuery(fieldsToUse, term.slice(1)))) - ] + ...(terms.filter(term => term.startsWith('^')).map(term => phrasePrefixQuery(fieldsToUse, term.slice(1)))) + ].filter(q => q) } function adjEqQueries ({fields, relation, terms, term}) { @@ -198,7 +198,7 @@ function nestedMapAndFilter (obj, filter, map) { term.startsWith('^') ? phrasePrefixQuery(fieldsToUse, term.slice(1)) : phraseQuery(fieldsToUse, term) - ] + ].filter(q => q) } function prefixQueriesForPrefixFields ({fields, relation, terms, term}) { @@ -220,6 +220,7 @@ function nestedMapAndFilter (obj, filter, map) { } function multiMatch (fields, relation, terms) { + if (!fields) return return { "multi_match": { "query" : terms.join(" "), @@ -231,6 +232,7 @@ function nestedMapAndFilter (obj, filter, map) { } function phrasePrefixQuery (fields, term) { + if (!fields) return return { "multi_match": { "query" : term, @@ -241,6 +243,7 @@ function nestedMapAndFilter (obj, filter, map) { } function phraseQuery (fields, term) { + if (!fields) return return { "multi_match": { "query" : term, diff --git a/test/cql_grammar.test.js b/test/cql_grammar.test.js index cb3ee69d..38eafd70 100644 --- a/test/cql_grammar.test.js +++ b/test/cql_grammar.test.js @@ -25,7 +25,7 @@ function validateBooleanQuery(parsed, expected) { } -describe.only('CQL Grammar', function () { +describe('CQL Grammar', function () { describe('parsing queries', function () { it('parses atomic queries', function () { validateAtomicQuery(parseWithRightCql("title=\"hamlet\""), "title", "=", "\"hamlet\"") diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 3d924c62..088070e0 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -1,13 +1,86 @@ const { expect } = require('chai') const { buildEsQuery } = require('../lib/elasticsearch/cql_query_builder') -const { simpleAnyQuery } = require('./fixtures/cql_fixtures') - -// describe('CQL Query Builder', function () { -// it('Simple = query', function () { -// expect(buildEsQuery("title=\"Hamlet\"")) -// .to.deep.equal( -// simpleAnyQuery -// ) -// }) -// }) +const { + simpleAdjQuery, + simpleAnyQuery, + simpleAllQuery, + prefixPhraseQuery, + anyWithPrefixQuery, + keywordQueryForBarcode, + keywordQueryForShelfMark, + keywordQueryForGeneralTerm, + identifierQuery +} = require('./fixtures/cql_fixtures') + +describe.only('CQL Query Builder', function () { + it('Simple = query', function () { + expect(buildEsQuery("title=\"Hamlet\"")) + .to.deep.equal( + simpleAdjQuery + ) + }) + + it('Simple adj query', function () { + expect(buildEsQuery("title adj \"Hamlet\"")) + .to.deep.equal( + simpleAdjQuery + ) + }) + + it('Simple any query', function () { + expect(buildEsQuery("title any \"Hamlet Othello\"")) + .to.deep.equal( + simpleAnyQuery + ) + }) + + it('Simple all query', function () { + expect(buildEsQuery("title all \"Hamlet Othello\"")) + .to.deep.equal( + simpleAllQuery + ) + }) + + it('Prefix phrase query', function () { + expect(buildEsQuery("title = \"^The Tragedy of Hamlet, Prince of Denmark\"")) + .to.deep.equal( + prefixPhraseQuery + ) + }) + + it('Prefix queries mixed into any query', function () { + expect(buildEsQuery("title any \"^Tragedy ^Comedy Hamlet Othello\"")) + .to.deep.equal( + anyWithPrefixQuery + ) + }) + + it('Keyword query for barcode', function () { + expect(buildEsQuery("keyword = \"123456\"")) + .to.deep.equal( + keywordQueryForBarcode + ) + }) + + it('Keyword query for shelfMark', function () { + expect(buildEsQuery("keyword = \"B 12\"")) + .to.deep.equal( + keywordQueryForShelfMark + ) + }) + + it('Keyword query for general term', function () { + expect(buildEsQuery("keyword = \"Hamlet\"")) + .to.deep.equal( + keywordQueryForGeneralTerm + ) + }) + + it('Identifier query', function () { + expect(buildEsQuery("identifier = \"b1234\"")) + .to.deep.equal( + identifierQuery + ) + }) +}) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index b8318005..b5fb21eb 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -1,4 +1,4 @@ -const simpleAnyQuery = { +const simpleAdjQuery = { "query": { "bool": { "should": [ @@ -73,6 +73,734 @@ const simpleAnyQuery = { } } +const prefixPhraseQuery = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "The Tragedy of Hamlet, Prince of Denmark", + "fields": [ + "title", + "title.folded", + "titleAlt.folded", + "uniformTitle.folded", + "titleDisplay.folded", + "seriesStatement.folded", + "contentsTitle.folded", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelSeriesStatement.folded", + "parallelTitleAlt.folded", + "parallelCreatorLiteral.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle" + ], + "type": "phrase_prefix" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "The Tragedy of Hamlet, Prince of Denmark", + "fields": [], + "type": "phrase_prefix" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "The Tragedy of Hamlet, Prince of Denmark", + "fields": [], + "type": "phrase_prefix" + } + } + ] + } + } + } + } + ] + } + } +} + +const simpleAnyQuery = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [ + "title", + "title.folded", + "titleAlt.folded", + "uniformTitle.folded", + "titleDisplay.folded", + "seriesStatement.folded", + "contentsTitle.folded", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelSeriesStatement.folded", + "parallelTitleAlt.folded", + "parallelCreatorLiteral.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle" + ], + "type": "cross_fields", + "operator": "or" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [], + "type": "cross_fields", + "operator": "or" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [], + "type": "cross_fields", + "operator": "or" + } + } + ] + } + } + } + } + ] + } + } +} + +const anyWithPrefixQuery = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [ + "title", + "title.folded", + "titleAlt.folded", + "uniformTitle.folded", + "titleDisplay.folded", + "seriesStatement.folded", + "contentsTitle.folded", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelSeriesStatement.folded", + "parallelTitleAlt.folded", + "parallelCreatorLiteral.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle" + ], + "type": "cross_fields", + "operator": "or" + } + }, + { + "multi_match": { + "query": "Tragedy", + "fields": [ + "title", + "title.folded", + "titleAlt.folded", + "uniformTitle.folded", + "titleDisplay.folded", + "seriesStatement.folded", + "contentsTitle.folded", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelSeriesStatement.folded", + "parallelTitleAlt.folded", + "parallelCreatorLiteral.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle" + ], + "type": "phrase_prefix" + } + }, + { + "multi_match": { + "query": "Comedy", + "fields": [ + "title", + "title.folded", + "titleAlt.folded", + "uniformTitle.folded", + "titleDisplay.folded", + "seriesStatement.folded", + "contentsTitle.folded", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelSeriesStatement.folded", + "parallelTitleAlt.folded", + "parallelCreatorLiteral.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle" + ], + "type": "phrase_prefix" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [], + "type": "cross_fields", + "operator": "or" + } + }, + { + "multi_match": { + "query": "Tragedy", + "fields": [], + "type": "phrase_prefix" + } + }, + { + "multi_match": { + "query": "Comedy", + "fields": [], + "type": "phrase_prefix" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [], + "type": "cross_fields", + "operator": "or" + } + }, + { + "multi_match": { + "query": "Tragedy", + "fields": [], + "type": "phrase_prefix" + } + }, + { + "multi_match": { + "query": "Comedy", + "fields": [], + "type": "phrase_prefix" + } + } + ] + } + } + } + } + ] + } + } +} + +const simpleAllQuery = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [ + "title", + "title.folded", + "titleAlt.folded", + "uniformTitle.folded", + "titleDisplay.folded", + "seriesStatement.folded", + "contentsTitle.folded", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelSeriesStatement.folded", + "parallelTitleAlt.folded", + "parallelCreatorLiteral.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle" + ], + "type": "cross_fields", + "operator": "and" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [], + "type": "cross_fields", + "operator": "and" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet Othello", + "fields": [], + "type": "cross_fields", + "operator": "and" + } + } + ] + } + } + } + } + ] + } + } +} + +const keywordQueryForBarcode = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "123456", + "fields": [ + "title", + "title.folded", + "description.foldedStemmed", + "subjectLiteral", + "subjectLiteral.folded", + "creatorLiteral", + "creatorLiteral.folded", + "contributorLiteral.folded", + "note.label.foldedStemmed", + "publisherLiteral.folded", + "seriesStatement.folded", + "titleAlt.folded", + "titleDisplay.folded", + "contentsTitle.folded", + "tableOfContents.folded", + "genreForm", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelTitleAlt.folded", + "parallelSeriesStatement.folded", + "parallelCreatorLiteral.folded", + "parallelPublisher", + "parallelPublisherLiteral", + "uniformTitle.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle", + "placeOfPublication.folded" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "123456", + "fields": [ + "items.idBarcode" + ], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "123456", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } +} + +const keywordQueryForShelfMark = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "B 12", + "fields": [ + "title", + "title.folded", + "description.foldedStemmed", + "subjectLiteral", + "subjectLiteral.folded", + "creatorLiteral", + "creatorLiteral.folded", + "contributorLiteral.folded", + "note.label.foldedStemmed", + "publisherLiteral.folded", + "seriesStatement.folded", + "titleAlt.folded", + "titleDisplay.folded", + "contentsTitle.folded", + "tableOfContents.folded", + "genreForm", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelTitleAlt.folded", + "parallelSeriesStatement.folded", + "parallelCreatorLiteral.folded", + "parallelPublisher", + "parallelPublisherLiteral", + "uniformTitle.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle", + "placeOfPublication.folded" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "B 12", + "fields": [ + "items.shelfMark" + ], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "B 12", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } +} + +const keywordQueryForGeneralTerm = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet", + "fields": [ + "title", + "title.folded", + "description.foldedStemmed", + "subjectLiteral", + "subjectLiteral.folded", + "creatorLiteral", + "creatorLiteral.folded", + "contributorLiteral.folded", + "note.label.foldedStemmed", + "publisherLiteral.folded", + "seriesStatement.folded", + "titleAlt.folded", + "titleDisplay.folded", + "contentsTitle.folded", + "tableOfContents.folded", + "genreForm", + "donor.folded", + "parallelTitle.folded", + "parallelTitleDisplay.folded", + "parallelTitleAlt.folded", + "parallelSeriesStatement.folded", + "parallelCreatorLiteral.folded", + "parallelPublisher", + "parallelPublisherLiteral", + "uniformTitle.folded", + "parallelUniformTitle", + "formerTitle", + "addedAuthorTitle", + "placeOfPublication.folded" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Hamlet", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } +} + +const identifierQuery = { + "query": { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "term": { + "uri": "b1234" + } + }, + { + "term": { + "idIsbn.clean": "b1234" + } + }, + { + "term": { + "idIssn.clean": "b1234" + } + }, + { + "prefix": { + "identifierV2.value": "b1234" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "term": { + "items.idBarcode": "b1234" + } + }, + { + "prefix": { + "items.shelfMark.keywordLowercased": "b1234" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [] + } + } + } + } + ] + } + } +} + + module.exports = { - simpleAnyQuery + simpleAdjQuery, + simpleAnyQuery, + simpleAllQuery, + prefixPhraseQuery, + anyWithPrefixQuery, + keywordQueryForBarcode, + keywordQueryForShelfMark, + keywordQueryForGeneralTerm, + identifierQuery } From baf803c0b99d37d6dc13fef66a16061b23fbb5dc Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 10 Feb 2026 12:56:34 -0500 Subject: [PATCH 19/54] Add initial boolean tests --- lib/elasticsearch/cql/index-mapping.js | 10 +- lib/elasticsearch/cql_query_builder.js | 4 +- test/cql_query_builder.test.js | 30 +- test/fixtures/cql_fixtures.js | 514 ++++++++++++++++++++++++- 4 files changed, 549 insertions(+), 9 deletions(-) diff --git a/lib/elasticsearch/cql/index-mapping.js b/lib/elasticsearch/cql/index-mapping.js index f0f19a9a..758deddc 100644 --- a/lib/elasticsearch/cql/index-mapping.js +++ b/lib/elasticsearch/cql/index-mapping.js @@ -68,15 +68,15 @@ const indexMapping = { subject: { fields: ['subjectLiteral', 'subjectLiteral.folded', 'parallelSubjectLiteral.folded'] }, - language: { field: ['language.id', 'language.label'] }, + language: { fields: ['language.id', 'language.label'] }, date: {}, series: { fields: ['series', 'parallelSeries'] }, - genre: { field: ['genreForm.raw'] }, - center: { field: ['buildingLocationIds'] }, - division: { field: ['collectionIds'] }, - format: { field: ['formatId'] } + genre: { fields: ['genreForm.raw'] }, + center: { fields: ['buildingLocationIds'] }, + division: { fields: ['collectionIds'] }, + format: { fields: ['formatId'] } } module.exports = { diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 0054f5e3..d2fa32e9 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -22,7 +22,7 @@ function buildEsQueryFromTree (tree) { queries = tree.children.filter(child => child.type.includes('query')) connectives = tree.children.filter(child => child.type === 'connective') if (connectives.length) { - return buildBoolean(connectives[0], queries) + return buildBoolean(connectives[0].text, queries) } return buildEsQueryFromTree(queries[0]) case 'sub_query': @@ -38,7 +38,7 @@ function buildEsQueryFromTree (tree) { function buildBoolean (operator, queries) { if (operator === "NOT") return buildNegation(queries) - const esOperator = operator === 'and' ? 'must' : 'should' + const esOperator = operator === 'AND' ? 'must' : 'should' return { bool: { [esOperator]: queries.map(query => buildEsQueryFromTree(query)) diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 088070e0..14f8e47b 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -10,7 +10,10 @@ const { keywordQueryForBarcode, keywordQueryForShelfMark, keywordQueryForGeneralTerm, - identifierQuery + identifierQuery, + binaryBooleanQuery, + ternaryBooleanQuery, + queryWithParentheses } = require('./fixtures/cql_fixtures') describe.only('CQL Query Builder', function () { @@ -83,4 +86,29 @@ describe.only('CQL Query Builder', function () { identifierQuery ) }) + + it('Binary boolean query', function () { + expect(buildEsQuery("author = \"Shakespeare\" AND language = \"English\"")) + .to.deep.equal( + binaryBooleanQuery + ) + }) + + it('Ternary boolean query', function () { + expect(buildEsQuery("author = \"Shakespeare\" AND language = \"English\" OR genre = \"tragedy\"")) + .to.deep.equal( + ternaryBooleanQuery + ) + }) + + it('Boolean query with parentheses', function () { + expect(buildEsQuery("author = \"Shakespeare\" AND (language = \"English\" OR genre = \"tragedy\")")) + .to.deep.equal( + queryWithParentheses + ) + }) + + it('Query with NOT') + + it('Query with AND NOT') }) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index b5fb21eb..e4183d3e 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -792,6 +792,515 @@ const identifierQuery = { } } +const binaryBooleanQuery = { + "query": { + "bool": { + "must": [ + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [ + "creatorLiteral", + "creatorLiteral.folded", + "contributorLiteral.folded", + "parallelCreatorLiteral.folded", + "parallelContributorLiteral.folded" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + }, + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [ + "language.id", + "language.label" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } + ] + } + } +} + +const ternaryBooleanQuery = { + "query": { + "bool": { + "should": [ + { + "bool": { + "must": [ + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [ + "creatorLiteral", + "creatorLiteral.folded", + "contributorLiteral.folded", + "parallelCreatorLiteral.folded", + "parallelContributorLiteral.folded" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + }, + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [ + "language.id", + "language.label" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } + ] + } + }, + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "tragedy", + "fields": [ + "genreForm.raw" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "tragedy", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "tragedy", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } + ] + } + } +} + +const queryWithParentheses = { + "query": { + "bool": { + "must": [ + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [ + "creatorLiteral", + "creatorLiteral.folded", + "contributorLiteral.folded", + "parallelCreatorLiteral.folded", + "parallelContributorLiteral.folded" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + }, + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [ + "language.id", + "language.label" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + }, + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "tragedy", + "fields": [ + "genreForm.raw" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "tragedy", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "tragedy", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } + ] + } + } + ] + } + } +} module.exports = { simpleAdjQuery, @@ -802,5 +1311,8 @@ module.exports = { keywordQueryForBarcode, keywordQueryForShelfMark, keywordQueryForGeneralTerm, - identifierQuery + identifierQuery, + binaryBooleanQuery, + ternaryBooleanQuery, + queryWithParentheses } From 8842a881fea2b438741f3c25ad4450df35bbd7ac Mon Sep 17 00:00:00 2001 From: danamansana Date: Wed, 11 Feb 2026 14:07:31 -0500 Subject: [PATCH 20/54] Add tests for negation --- test/cql_query_builder.test.js | 17 ++++- test/fixtures/cql_fixtures.js | 133 ++++++++++++++++++++++++++++++++- 2 files changed, 146 insertions(+), 4 deletions(-) diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 14f8e47b..33f088d4 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -13,7 +13,8 @@ const { identifierQuery, binaryBooleanQuery, ternaryBooleanQuery, - queryWithParentheses + queryWithParentheses, + negationQuery } = require('./fixtures/cql_fixtures') describe.only('CQL Query Builder', function () { @@ -108,7 +109,17 @@ describe.only('CQL Query Builder', function () { ) }) - it('Query with NOT') + it('Query with NOT', function () { + expect(buildEsQuery("author = \"Shakespeare\" NOT language = \"English\"")) + .to.deep.equal( + negationQuery + ) + }) - it('Query with AND NOT') + it('Query with AND NOT', function () { + expect(buildEsQuery("author = \"Shakespeare\" AND NOT language = \"English\"")) + .to.deep.equal( + negationQuery + ) + }) }) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index e4183d3e..101ff7be 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -1302,6 +1302,136 @@ const queryWithParentheses = { } } +const negationQuery = { + "query": { + "bool": { + "must": [ + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [ + "creatorLiteral", + "creatorLiteral.folded", + "contributorLiteral.folded", + "parallelCreatorLiteral.folded", + "parallelContributorLiteral.folded" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "Shakespeare", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } + ], + "must_not": [ + { + "bool": { + "should": [ + { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [ + "language.id", + "language.label" + ], + "type": "phrase" + } + } + ] + } + }, + { + "nested": { + "path": "items", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + }, + { + "nested": { + "path": "holdings", + "query": { + "bool": { + "should": [ + { + "multi_match": { + "query": "English", + "fields": [], + "type": "phrase" + } + } + ] + } + } + } + } + ] + } + } + ] + } + } +} + module.exports = { simpleAdjQuery, simpleAnyQuery, @@ -1314,5 +1444,6 @@ module.exports = { identifierQuery, binaryBooleanQuery, ternaryBooleanQuery, - queryWithParentheses + queryWithParentheses, + negationQuery } From fd8ff377447f249ddf14c7be557f858859b47781 Mon Sep 17 00:00:00 2001 From: danamansana Date: Wed, 11 Feb 2026 14:30:10 -0500 Subject: [PATCH 21/54] Fix linting/tests/small errors --- lib/elasticsearch/cql_grammar.js | 16 +- lib/elasticsearch/cql_query_builder.js | 250 ++-- test/cql_grammar.test.js | 95 +- test/cql_query_builder.test.js | 32 +- test/fixtures/cql_fixtures.js | 1570 ++++++++++++------------ 5 files changed, 978 insertions(+), 985 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index ef87de73..9d7700bb 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -1,17 +1,17 @@ const { Grammars } = require('ebnf') function reverseGrammar (grammar) { - return grammar.split("\n") + return grammar.split('\n') .map(line => - (line.split("::=") + (line.split('::=') .map(side => - (side.split("|") + (side.split('|') .map(dis => - (dis.split(" ") + (dis.split(' ') .map(word => - (word.includes("\"") ? word.split("").reverse().join("") : word)) - .reverse().join(" ")) - ).join("|"))).join("::= "))).join("\n") + (word.includes('"') ? word.split('').reverse().join('') : word)) + .reverse().join(' ')) + ).join('|'))).join('::= '))).join('\n') } const leftCql = ` @@ -71,7 +71,7 @@ function simplify (ast) { } function reverseString (string) { - return string.split("").reverse().join("") + return string.split('').reverse().join('') } function reverseAST (tree) { diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index d2fa32e9..fc5643eb 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -1,6 +1,4 @@ const { parseWithRightCql } = require('./cql_grammar') -const ElasticQueryBuilder = require('./elastic-query-builder') -const ApiRequest = require('../api-request') const { indexMapping } = require('./cql/index-mapping') function buildEsQuery (cqlQuery) { @@ -18,16 +16,18 @@ function buildEsQuery (cqlQuery) { function buildEsQueryFromTree (tree) { switch (tree.type) { - case 'query': - queries = tree.children.filter(child => child.type.includes('query')) - connectives = tree.children.filter(child => child.type === 'connective') + case 'query': { + const queries = tree.children.filter(child => child.type.includes('query')) + const connectives = tree.children.filter(child => child.type === 'connective') if (connectives.length) { return buildBoolean(connectives[0].text, queries) } return buildEsQueryFromTree(queries[0]) - case 'sub_query': + } + case 'sub_query': { const query = tree.children.filter(child => child.type.includes('query'))[0] return buildEsQueryFromTree(query) + } case 'atomic_query': { return buildAtomic(atomicQueryParams(tree)) } @@ -37,7 +37,7 @@ function buildEsQueryFromTree (tree) { } function buildBoolean (operator, queries) { - if (operator === "NOT") return buildNegation(queries) + if (['NOT', 'AND NOT'].includes(operator)) return buildNegation(queries) const esOperator = operator === 'AND' ? 'must' : 'should' return { bool: { @@ -59,12 +59,12 @@ function buildNegation (queries) { A convienience method that collect the scope, relation, the full query (i.e term), and all the separate words in the query (i.e. the terms) */ -function atomicQueryParams (atomic_query) { +function atomicQueryParams (atomicQuery) { return { - scope: atomic_query.children.find(child => child.type === 'scope').text.trim(), - relation: atomic_query.children.find(child => child.type === 'relation').text.trim(), - term: findTopPhrase(atomic_query), - terms: findTopWords(atomic_query) + scope: atomicQuery.children.find(child => child.type === 'scope').text.trim(), + relation: atomicQuery.children.find(child => child.type === 'relation').text.trim(), + term: findTopPhrase(atomicQuery), + terms: findTopWords(atomicQuery) } } @@ -96,9 +96,9 @@ function findTopWords (tree) { function nestedMapAndFilter (obj, filter, map) { return Object.assign({}, ...(Object.entries(obj) - .map(([k,v]) => ({[k]: v.filter(filter).map(map) })) - ) + .map(([k, v]) => ({ [k]: v.filter(filter).map(map) })) ) + ) } /** build atomic: @@ -115,52 +115,52 @@ function nestedMapAndFilter (obj, filter, map) { - put all terms in term matches with term fields */ - function buildAtomic ({scope, relation, terms, term}) { - const allFields = nestedMapAndFilter( - indexMapping[scope], - field => typeof field === 'string' || field.on(term), - field => (typeof field === 'string' ? field : field.field) - ) +function buildAtomic ({ scope, relation, terms, term }) { + const allFields = nestedMapAndFilter( + indexMapping[scope], + field => typeof field === 'string' || field.on(term), + field => (typeof field === 'string' ? field : field.field) + ) - const bibFields = nestedMapAndFilter( - allFields, - (field) => !['items', 'holdings'].some(prefix => field.startsWith(prefix)), - field => field - ) + const bibFields = nestedMapAndFilter( + allFields, + (field) => !['items', 'holdings'].some(prefix => field.startsWith(prefix)), + field => field + ) - const itemFields = nestedMapAndFilter( - allFields, - (field) => field.startsWith('items'), - field => field - ) + const itemFields = nestedMapAndFilter( + allFields, + (field) => field.startsWith('items'), + field => field + ) - const holdingsFields = nestedMapAndFilter( - allFields, - (field) => field.startsWith('holdings'), - field => field - ) + const holdingsFields = nestedMapAndFilter( + allFields, + (field) => field.startsWith('holdings'), + field => field + ) - return { - bool: { - should: [ - buildAtomicMain({fields: bibFields, relation, terms, term}), - buildAtomicNested('items', {fields: itemFields, relation, terms, term}), - buildAtomicNested('holdings', { fields: holdingsFields, relation, terms, term}) - ] - } + return { + bool: { + should: [ + buildAtomicMain({ fields: bibFields, relation, terms, term }), + buildAtomicNested('items', { fields: itemFields, relation, terms, term }), + buildAtomicNested('holdings', { fields: holdingsFields, relation, terms, term }) + ] } - } + } +} - function buildAtomicNested(name, {fields, relation, terms, term}) { - return { - nested: { - path: name, - query: buildAtomicMain({fields, relation, terms, term}) - } - } - } +function buildAtomicNested (name, { fields, relation, terms, term }) { + return { + nested: { + path: name, + query: buildAtomicMain({ fields, relation, terms, term }) + } + } +} - /** +/** - main: - if operator is any/all, take all query terms not starting with ^ and put in multi-match with all regular fields - if operator is any/all, take all query terms starting with ^ and put in prefix with all regular fields @@ -169,91 +169,89 @@ function nestedMapAndFilter (obj, filter, map) { - put all terms in prefix match with prefix fields - put all terms in term matches with term fields */ - function buildAtomicMain ({fields, relation, terms, term}) { - return { - bool: { - should: [ - ...anyAllQueries({fields, relation, terms, term}), - ...adjEqQueries({fields, relation, terms, term}), - ...termQueriesForTermFields({fields, relation, terms, term}), - ...prefixQueriesForPrefixFields({fields, relation, terms, term}) - ] - } - } - } +function buildAtomicMain ({ fields, relation, terms, term }) { + return { + bool: { + should: [ + ...anyAllQueries({ fields, relation, terms, term }), + ...adjEqQueries({ fields, relation, terms, term }), + ...termQueriesForTermFields({ fields, relation, terms, term }), + ...prefixQueriesForPrefixFields({ fields, relation, terms, term }) + ] + } + } +} - function anyAllQueries ({fields, relation, terms, term}) { - if (!['any', 'all'].includes(relation)) { return [] } - const fieldsToUse = fields.fields - return [ - multiMatch(fieldsToUse, relation, terms.filter(term => !term.startsWith('^'))), - ...(terms.filter(term => term.startsWith('^')).map(term => phrasePrefixQuery(fieldsToUse, term.slice(1)))) - ].filter(q => q) - } +function anyAllQueries ({ fields, relation, terms, term }) { + if (!['any', 'all'].includes(relation)) { return [] } + const fieldsToUse = fields.fields + return [ + multiMatch(fieldsToUse, relation, terms.filter(term => !term.startsWith('^'))), + ...(terms.filter(term => term.startsWith('^')).map(term => phrasePrefixQuery(fieldsToUse, term.slice(1)))) + ].filter(q => q) +} - function adjEqQueries ({fields, relation, terms, term}) { - if (!['=', 'adj'].includes(relation)) { return [] } - const fieldsToUse = fields.fields - return [ - term.startsWith('^') ? - phrasePrefixQuery(fieldsToUse, term.slice(1)) : - phraseQuery(fieldsToUse, term) - ].filter(q => q) - } +function adjEqQueries ({ fields, relation, terms, term }) { + if (!['=', 'adj'].includes(relation)) { return [] } + const fieldsToUse = fields.fields + return [ + term.startsWith('^') + ? phrasePrefixQuery(fieldsToUse, term.slice(1)) + : phraseQuery(fieldsToUse, term) + ].filter(q => q) +} - function prefixQueriesForPrefixFields ({fields, relation, terms, term}) { - if (!fields.prefix) return [] - return fields.prefix.map(field => prefixQuery(field, term)) - } +function prefixQueriesForPrefixFields ({ fields, relation, terms, term }) { + if (!fields.prefix) return [] + return fields.prefix.map(field => prefixQuery(field, term)) +} - function termQueriesForTermFields ({fields, relation, terms, term}) { - if (!fields.term) return [] - return fields.term.map(field => termQuery(field, term)) - } +function termQueriesForTermFields ({ fields, relation, terms, term }) { + if (!fields.term) return [] + return fields.term.map(field => termQuery(field, term)) +} - function termQuery (field, term) { - return { "term" : { [field] : term } } - } +function termQuery (field, term) { + return { term: { [field]: term } } +} - function prefixQuery (field, term) { - return { "prefix" : { [field] : term } } - } +function prefixQuery (field, term) { + return { prefix: { [field]: term } } +} - function multiMatch (fields, relation, terms) { +function multiMatch (fields, relation, terms) { if (!fields) return - return { - "multi_match": { - "query" : terms.join(" "), - "fields": fields, - "type": "cross_fields", - "operator": relation === "any" ? "or" : "and" - } - } - } + return { + multi_match: { + query: terms.join(' '), + fields, + type: 'cross_fields', + operator: relation === 'any' ? 'or' : 'and' + } + } +} - function phrasePrefixQuery (fields, term) { +function phrasePrefixQuery (fields, term) { if (!fields) return - return { - "multi_match": { - "query" : term, - "fields": fields, - "type": "phrase_prefix" - } - } - } + return { + multi_match: { + query: term, + fields, + type: 'phrase_prefix' + } + } +} - function phraseQuery (fields, term) { +function phraseQuery (fields, term) { if (!fields) return - return { - "multi_match": { - "query" : term, - "fields": fields, - "type": "phrase" - } - } - } - - + return { + multi_match: { + query: term, + fields, + type: 'phrase' + } + } +} module.exports = { buildEsQuery, diff --git a/test/cql_grammar.test.js b/test/cql_grammar.test.js index 38eafd70..24d5d5d6 100644 --- a/test/cql_grammar.test.js +++ b/test/cql_grammar.test.js @@ -1,69 +1,64 @@ const { expect } = require('chai') -const { cqlParser, simplify, parseWithRightCql } = require('../lib/elasticsearch/cql_grammar') +const { simplify, parseWithRightCql } = require('../lib/elasticsearch/cql_grammar') - -function validateAtomicQuery(parsed, scope, relation, quotedTerm) { - expect(parsed.type).to.equal("query") +function validateAtomicQuery (parsed, scope, relation, quotedTerm) { + expect(parsed.type).to.equal('query') expect(parsed.children.length).to.equal(1) const subQuery = parsed.children[0] - expect(subQuery.type).to.equal("sub_query") + expect(subQuery.type).to.equal('sub_query') expect(subQuery.children.length).to.equal(1) const atomicQuery = subQuery.children[0] - expect(atomicQuery.type).to.equal("atomic_query") - const scopeNode = atomicQuery.children.find(child => child.type === "scope") - const scopeTerm = scopeNode.children.find(child => child.type === "scope_term") + expect(atomicQuery.type).to.equal('atomic_query') + const scopeNode = atomicQuery.children.find(child => child.type === 'scope') + const scopeTerm = scopeNode.children.find(child => child.type === 'scope_term') expect(scopeTerm.text).to.equal(scope) - const relationNode = atomicQuery.children.find(child => child.type === "relation") - const relationTerm = relationNode.children.find(child => child.type === "relation_term") + const relationNode = atomicQuery.children.find(child => child.type === 'relation') + const relationTerm = relationNode.children.find(child => child.type === 'relation_term') expect(relationTerm.text).to.equal(relation) - const quotedTermNode = atomicQuery.children.find(child => child.type === "quoted_term") + const quotedTermNode = atomicQuery.children.find(child => child.type === 'quoted_term') expect(quotedTermNode.text).to.equal(quotedTerm) } -function validateBooleanQuery(parsed, expected) { - -} - -describe('CQL Grammar', function () { +describe('CQL Grammar', function () { describe('parsing queries', function () { it('parses atomic queries', function () { - validateAtomicQuery(parseWithRightCql("title=\"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") - validateAtomicQuery(parseWithRightCql("keyword any \"hamlet shakespeare\""), "keyword", "any", "\"hamlet shakespeare\"") - validateAtomicQuery(parseWithRightCql("subject all \"hamlet shakespeare\""), "subject", "all", "\"hamlet shakespeare\"") + validateAtomicQuery(parseWithRightCql('title="hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + validateAtomicQuery(parseWithRightCql('keyword any "hamlet shakespeare"'), 'keyword', 'any', '"hamlet shakespeare"') + validateAtomicQuery(parseWithRightCql('subject all "hamlet shakespeare"'), 'subject', 'all', '"hamlet shakespeare"') }) it('allows whitespace variants', function () { - validateAtomicQuery(parseWithRightCql("title =\"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(parseWithRightCql("title= \"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(parseWithRightCql("title = \"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(parseWithRightCql("title = \"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(parseWithRightCql("title = \"hamlet\""), "title", "=", "\"hamlet\"") - validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") - validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") - validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") - validateAtomicQuery(parseWithRightCql("author adj \"shakespeare\""), "author", "adj", "\"shakespeare\"") + validateAtomicQuery(parseWithRightCql('title ="hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('title= "hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('title = "hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('title = "hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('title = "hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') }) it('correctly escapes escape characters', function () { - validateAtomicQuery(parseWithRightCql("keyword=\"Notes on \\\"The Underground\\\"\""), "keyword", "=", "\"Notes on \\\"The Underground\\\"\"") - validateAtomicQuery(parseWithRightCql("title=\"This title ends in a slash \\\\\""), "title", "=", "\"This title ends in a slash \\\\\"") + validateAtomicQuery(parseWithRightCql('keyword="Notes on \\"The Underground\\""'), 'keyword', '=', '"Notes on \\"The Underground\\""') + validateAtomicQuery(parseWithRightCql('title="This title ends in a slash \\\\"'), 'title', '=', '"This title ends in a slash \\\\"') }) it('identifies words correctly', function () { - const parsed = parseWithRightCql("keyword adj \"A multiword keyword\"") + const parsed = parseWithRightCql('keyword adj "A multiword keyword"') const words = [] let nodes = [parsed] while (nodes.length) { - let node = nodes.shift() - if (node.type === "word") { + const node = nodes.shift() + if (node.type === 'word') { words.push(node.text) } else { nodes = nodes.concat(node.children) } } - const expectedWords = ["A", "multiword", "keyword"] + const expectedWords = ['A', 'multiword', 'keyword'] words.forEach(word => { expect(expectedWords).to.include(word) }) @@ -72,43 +67,43 @@ describe('CQL Grammar', function () { it('parses boolean queries', function () { expect(simplify(parseWithRightCql( - "title=\"dogs\" AND keyword=\"cats\"" + 'title="dogs" AND keyword="cats"' ))).to.deep.equal( - [ [ 'title', '=', [ 'dogs' ] ], 'AND', [ 'keyword', '=', [ 'cats' ] ] ] + [['title', '=', ['dogs']], 'AND', ['keyword', '=', ['cats']]] ) expect(simplify(parseWithRightCql( - "title=\"dogs\" AND keyword=\"cats\" OR author adj \"Bird\"" + 'title="dogs" AND keyword="cats" OR author adj "Bird"' ))).to.deep.equal( [ [ [ - "title", "=", ["dogs"] + 'title', '=', ['dogs'] ], - "AND", + 'AND', [ - "keyword", "=", ["cats"] + 'keyword', '=', ['cats'] ] ], - "OR", - [ - "author", "adj", ["Bird"] - ] + 'OR', + [ + 'author', 'adj', ['Bird'] ] + ] ) }) it('parses queries with parentheses', function () { expect(simplify(parseWithRightCql( - "title=\"dogs\" AND (keyword=\"cats\" OR author adj \"Bird\")" + 'title="dogs" AND (keyword="cats" OR author adj "Bird")' ))) .to.deep.equal( [ - [ 'title', '=', ['dogs'] ], 'AND', [[ 'keyword', '=', ['cats'] ], - 'OR', - [ 'author', 'adj', [ 'Bird' ] ] + ['title', '=', ['dogs']], 'AND', [['keyword', '=', ['cats']], + 'OR', + ['author', 'adj', ['Bird']] + ] ] - ] ) }) }) diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 33f088d4..0dfdc599 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -17,107 +17,107 @@ const { negationQuery } = require('./fixtures/cql_fixtures') -describe.only('CQL Query Builder', function () { +describe('CQL Query Builder', function () { it('Simple = query', function () { - expect(buildEsQuery("title=\"Hamlet\"")) + expect(buildEsQuery('title="Hamlet"')) .to.deep.equal( simpleAdjQuery ) }) it('Simple adj query', function () { - expect(buildEsQuery("title adj \"Hamlet\"")) + expect(buildEsQuery('title adj "Hamlet"')) .to.deep.equal( simpleAdjQuery ) }) it('Simple any query', function () { - expect(buildEsQuery("title any \"Hamlet Othello\"")) + expect(buildEsQuery('title any "Hamlet Othello"')) .to.deep.equal( simpleAnyQuery ) }) it('Simple all query', function () { - expect(buildEsQuery("title all \"Hamlet Othello\"")) + expect(buildEsQuery('title all "Hamlet Othello"')) .to.deep.equal( simpleAllQuery ) }) it('Prefix phrase query', function () { - expect(buildEsQuery("title = \"^The Tragedy of Hamlet, Prince of Denmark\"")) + expect(buildEsQuery('title = "^The Tragedy of Hamlet, Prince of Denmark"')) .to.deep.equal( prefixPhraseQuery ) }) it('Prefix queries mixed into any query', function () { - expect(buildEsQuery("title any \"^Tragedy ^Comedy Hamlet Othello\"")) + expect(buildEsQuery('title any "^Tragedy ^Comedy Hamlet Othello"')) .to.deep.equal( anyWithPrefixQuery ) }) it('Keyword query for barcode', function () { - expect(buildEsQuery("keyword = \"123456\"")) + expect(buildEsQuery('keyword = "123456"')) .to.deep.equal( keywordQueryForBarcode ) }) it('Keyword query for shelfMark', function () { - expect(buildEsQuery("keyword = \"B 12\"")) + expect(buildEsQuery('keyword = "B 12"')) .to.deep.equal( keywordQueryForShelfMark ) }) it('Keyword query for general term', function () { - expect(buildEsQuery("keyword = \"Hamlet\"")) + expect(buildEsQuery('keyword = "Hamlet"')) .to.deep.equal( keywordQueryForGeneralTerm ) }) it('Identifier query', function () { - expect(buildEsQuery("identifier = \"b1234\"")) + expect(buildEsQuery('identifier = "b1234"')) .to.deep.equal( identifierQuery ) }) it('Binary boolean query', function () { - expect(buildEsQuery("author = \"Shakespeare\" AND language = \"English\"")) + expect(buildEsQuery('author = "Shakespeare" AND language = "English"')) .to.deep.equal( binaryBooleanQuery ) }) it('Ternary boolean query', function () { - expect(buildEsQuery("author = \"Shakespeare\" AND language = \"English\" OR genre = \"tragedy\"")) + expect(buildEsQuery('author = "Shakespeare" AND language = "English" OR genre = "tragedy"')) .to.deep.equal( ternaryBooleanQuery ) }) it('Boolean query with parentheses', function () { - expect(buildEsQuery("author = \"Shakespeare\" AND (language = \"English\" OR genre = \"tragedy\")")) + expect(buildEsQuery('author = "Shakespeare" AND (language = "English" OR genre = "tragedy")')) .to.deep.equal( queryWithParentheses ) }) it('Query with NOT', function () { - expect(buildEsQuery("author = \"Shakespeare\" NOT language = \"English\"")) + expect(buildEsQuery('author = "Shakespeare" NOT language = "English"')) .to.deep.equal( negationQuery ) }) it('Query with AND NOT', function () { - expect(buildEsQuery("author = \"Shakespeare\" AND NOT language = \"English\"")) + expect(buildEsQuery('author = "Shakespeare" AND NOT language = "English"')) .to.deep.equal( negationQuery ) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index 101ff7be..3c702c1f 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -1,48 +1,48 @@ const simpleAdjQuery = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Hamlet", - "fields": [ - "title", - "title.folded", - "titleAlt.folded", - "uniformTitle.folded", - "titleDisplay.folded", - "seriesStatement.folded", - "contentsTitle.folded", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelSeriesStatement.folded", - "parallelTitleAlt.folded", - "parallelCreatorLiteral.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle" + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Hamlet', + fields: [], + type: 'phrase' } } ] @@ -51,16 +51,16 @@ const simpleAdjQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Hamlet', + fields: [], + type: 'phrase' } } ] @@ -74,50 +74,50 @@ const simpleAdjQuery = { } const prefixPhraseQuery = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "The Tragedy of Hamlet, Prince of Denmark", - "fields": [ - "title", - "title.folded", - "titleAlt.folded", - "uniformTitle.folded", - "titleDisplay.folded", - "seriesStatement.folded", - "contentsTitle.folded", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelSeriesStatement.folded", - "parallelTitleAlt.folded", - "parallelCreatorLiteral.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle" + multi_match: { + query: 'The Tragedy of Hamlet, Prince of Denmark', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' ], - "type": "phrase_prefix" + type: 'phrase_prefix' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "The Tragedy of Hamlet, Prince of Denmark", - "fields": [], - "type": "phrase_prefix" + multi_match: { + query: 'The Tragedy of Hamlet, Prince of Denmark', + fields: [], + type: 'phrase_prefix' } } ] @@ -126,16 +126,16 @@ const prefixPhraseQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "The Tragedy of Hamlet, Prince of Denmark", - "fields": [], - "type": "phrase_prefix" + multi_match: { + query: 'The Tragedy of Hamlet, Prince of Denmark', + fields: [], + type: 'phrase_prefix' } } ] @@ -149,52 +149,52 @@ const prefixPhraseQuery = { } const simpleAnyQuery = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [ - "title", - "title.folded", - "titleAlt.folded", - "uniformTitle.folded", - "titleDisplay.folded", - "seriesStatement.folded", - "contentsTitle.folded", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelSeriesStatement.folded", - "parallelTitleAlt.folded", - "parallelCreatorLiteral.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle" + multi_match: { + query: 'Hamlet Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' ], - "type": "cross_fields", - "operator": "or" + type: 'cross_fields', + operator: 'or' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [], - "type": "cross_fields", - "operator": "or" + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'or' } } ] @@ -203,17 +203,17 @@ const simpleAnyQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [], - "type": "cross_fields", - "operator": "or" + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'or' } } ] @@ -227,114 +227,114 @@ const simpleAnyQuery = { } const anyWithPrefixQuery = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [ - "title", - "title.folded", - "titleAlt.folded", - "uniformTitle.folded", - "titleDisplay.folded", - "seriesStatement.folded", - "contentsTitle.folded", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelSeriesStatement.folded", - "parallelTitleAlt.folded", - "parallelCreatorLiteral.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle" + multi_match: { + query: 'Hamlet Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' ], - "type": "cross_fields", - "operator": "or" + type: 'cross_fields', + operator: 'or' } }, { - "multi_match": { - "query": "Tragedy", - "fields": [ - "title", - "title.folded", - "titleAlt.folded", - "uniformTitle.folded", - "titleDisplay.folded", - "seriesStatement.folded", - "contentsTitle.folded", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelSeriesStatement.folded", - "parallelTitleAlt.folded", - "parallelCreatorLiteral.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle" + multi_match: { + query: 'Tragedy', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' ], - "type": "phrase_prefix" + type: 'phrase_prefix' } }, { - "multi_match": { - "query": "Comedy", - "fields": [ - "title", - "title.folded", - "titleAlt.folded", - "uniformTitle.folded", - "titleDisplay.folded", - "seriesStatement.folded", - "contentsTitle.folded", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelSeriesStatement.folded", - "parallelTitleAlt.folded", - "parallelCreatorLiteral.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle" + multi_match: { + query: 'Comedy', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' ], - "type": "phrase_prefix" + type: 'phrase_prefix' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [], - "type": "cross_fields", - "operator": "or" + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'or' } }, { - "multi_match": { - "query": "Tragedy", - "fields": [], - "type": "phrase_prefix" + multi_match: { + query: 'Tragedy', + fields: [], + type: 'phrase_prefix' } }, { - "multi_match": { - "query": "Comedy", - "fields": [], - "type": "phrase_prefix" + multi_match: { + query: 'Comedy', + fields: [], + type: 'phrase_prefix' } } ] @@ -343,31 +343,31 @@ const anyWithPrefixQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [], - "type": "cross_fields", - "operator": "or" + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'or' } }, { - "multi_match": { - "query": "Tragedy", - "fields": [], - "type": "phrase_prefix" + multi_match: { + query: 'Tragedy', + fields: [], + type: 'phrase_prefix' } }, { - "multi_match": { - "query": "Comedy", - "fields": [], - "type": "phrase_prefix" + multi_match: { + query: 'Comedy', + fields: [], + type: 'phrase_prefix' } } ] @@ -381,52 +381,52 @@ const anyWithPrefixQuery = { } const simpleAllQuery = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [ - "title", - "title.folded", - "titleAlt.folded", - "uniformTitle.folded", - "titleDisplay.folded", - "seriesStatement.folded", - "contentsTitle.folded", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelSeriesStatement.folded", - "parallelTitleAlt.folded", - "parallelCreatorLiteral.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle" + multi_match: { + query: 'Hamlet Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' ], - "type": "cross_fields", - "operator": "and" + type: 'cross_fields', + operator: 'and' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [], - "type": "cross_fields", - "operator": "and" + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'and' } } ] @@ -435,17 +435,17 @@ const simpleAllQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet Othello", - "fields": [], - "type": "cross_fields", - "operator": "and" + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'and' } } ] @@ -459,65 +459,65 @@ const simpleAllQuery = { } const keywordQueryForBarcode = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "123456", - "fields": [ - "title", - "title.folded", - "description.foldedStemmed", - "subjectLiteral", - "subjectLiteral.folded", - "creatorLiteral", - "creatorLiteral.folded", - "contributorLiteral.folded", - "note.label.foldedStemmed", - "publisherLiteral.folded", - "seriesStatement.folded", - "titleAlt.folded", - "titleDisplay.folded", - "contentsTitle.folded", - "tableOfContents.folded", - "genreForm", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelTitleAlt.folded", - "parallelSeriesStatement.folded", - "parallelCreatorLiteral.folded", - "parallelPublisher", - "parallelPublisherLiteral", - "uniformTitle.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle", - "placeOfPublication.folded" + multi_match: { + query: '123456', + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "123456", - "fields": [ - "items.idBarcode" + multi_match: { + query: '123456', + fields: [ + 'items.idBarcode' ], - "type": "phrase" + type: 'phrase' } } ] @@ -526,16 +526,16 @@ const keywordQueryForBarcode = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "123456", - "fields": [], - "type": "phrase" + multi_match: { + query: '123456', + fields: [], + type: 'phrase' } } ] @@ -549,65 +549,65 @@ const keywordQueryForBarcode = { } const keywordQueryForShelfMark = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "B 12", - "fields": [ - "title", - "title.folded", - "description.foldedStemmed", - "subjectLiteral", - "subjectLiteral.folded", - "creatorLiteral", - "creatorLiteral.folded", - "contributorLiteral.folded", - "note.label.foldedStemmed", - "publisherLiteral.folded", - "seriesStatement.folded", - "titleAlt.folded", - "titleDisplay.folded", - "contentsTitle.folded", - "tableOfContents.folded", - "genreForm", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelTitleAlt.folded", - "parallelSeriesStatement.folded", - "parallelCreatorLiteral.folded", - "parallelPublisher", - "parallelPublisherLiteral", - "uniformTitle.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle", - "placeOfPublication.folded" + multi_match: { + query: 'B 12', + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "B 12", - "fields": [ - "items.shelfMark" + multi_match: { + query: 'B 12', + fields: [ + 'items.shelfMark' ], - "type": "phrase" + type: 'phrase' } } ] @@ -616,16 +616,16 @@ const keywordQueryForShelfMark = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "B 12", - "fields": [], - "type": "phrase" + multi_match: { + query: 'B 12', + fields: [], + type: 'phrase' } } ] @@ -639,63 +639,63 @@ const keywordQueryForShelfMark = { } const keywordQueryForGeneralTerm = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Hamlet", - "fields": [ - "title", - "title.folded", - "description.foldedStemmed", - "subjectLiteral", - "subjectLiteral.folded", - "creatorLiteral", - "creatorLiteral.folded", - "contributorLiteral.folded", - "note.label.foldedStemmed", - "publisherLiteral.folded", - "seriesStatement.folded", - "titleAlt.folded", - "titleDisplay.folded", - "contentsTitle.folded", - "tableOfContents.folded", - "genreForm", - "donor.folded", - "parallelTitle.folded", - "parallelTitleDisplay.folded", - "parallelTitleAlt.folded", - "parallelSeriesStatement.folded", - "parallelCreatorLiteral.folded", - "parallelPublisher", - "parallelPublisherLiteral", - "uniformTitle.folded", - "parallelUniformTitle", - "formerTitle", - "addedAuthorTitle", - "placeOfPublication.folded" + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Hamlet', + fields: [], + type: 'phrase' } } ] @@ -704,16 +704,16 @@ const keywordQueryForGeneralTerm = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Hamlet", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Hamlet', + fields: [], + type: 'phrase' } } ] @@ -727,49 +727,49 @@ const keywordQueryForGeneralTerm = { } const identifierQuery = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "term": { - "uri": "b1234" + term: { + uri: 'b1234' } }, { - "term": { - "idIsbn.clean": "b1234" + term: { + 'idIsbn.clean': 'b1234' } }, { - "term": { - "idIssn.clean": "b1234" + term: { + 'idIssn.clean': 'b1234' } }, { - "prefix": { - "identifierV2.value": "b1234" + prefix: { + 'identifierV2.value': 'b1234' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "term": { - "items.idBarcode": "b1234" + term: { + 'items.idBarcode': 'b1234' } }, { - "prefix": { - "items.shelfMark.keywordLowercased": "b1234" + prefix: { + 'items.shelfMark.keywordLowercased': 'b1234' } } ] @@ -778,11 +778,11 @@ const identifierQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [] + nested: { + path: 'holdings', + query: { + bool: { + should: [] } } } @@ -793,42 +793,42 @@ const identifierQuery = { } const binaryBooleanQuery = { - "query": { - "bool": { - "must": [ + query: { + bool: { + must: [ { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [ - "creatorLiteral", - "creatorLiteral.folded", - "contributorLiteral.folded", - "parallelCreatorLiteral.folded", - "parallelContributorLiteral.folded" + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } } ] @@ -837,16 +837,16 @@ const binaryBooleanQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } } ] @@ -858,35 +858,35 @@ const binaryBooleanQuery = { } }, { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [ - "language.id", - "language.label" + multi_match: { + query: 'English', + fields: [ + 'language.id', + 'language.label' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [], - "type": "phrase" + multi_match: { + query: 'English', + fields: [], + type: 'phrase' } } ] @@ -895,16 +895,16 @@ const binaryBooleanQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [], - "type": "phrase" + multi_match: { + query: 'English', + fields: [], + type: 'phrase' } } ] @@ -921,45 +921,45 @@ const binaryBooleanQuery = { } const ternaryBooleanQuery = { - "query": { - "bool": { - "should": [ + query: { + bool: { + should: [ { - "bool": { - "must": [ + bool: { + must: [ { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [ - "creatorLiteral", - "creatorLiteral.folded", - "contributorLiteral.folded", - "parallelCreatorLiteral.folded", - "parallelContributorLiteral.folded" + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } } ] @@ -968,16 +968,16 @@ const ternaryBooleanQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } } ] @@ -989,35 +989,35 @@ const ternaryBooleanQuery = { } }, { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [ - "language.id", - "language.label" + multi_match: { + query: 'English', + fields: [ + 'language.id', + 'language.label' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [], - "type": "phrase" + multi_match: { + query: 'English', + fields: [], + type: 'phrase' } } ] @@ -1026,16 +1026,16 @@ const ternaryBooleanQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [], - "type": "phrase" + multi_match: { + query: 'English', + fields: [], + type: 'phrase' } } ] @@ -1050,34 +1050,34 @@ const ternaryBooleanQuery = { } }, { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "tragedy", - "fields": [ - "genreForm.raw" + multi_match: { + query: 'tragedy', + fields: [ + 'genreForm.raw' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "tragedy", - "fields": [], - "type": "phrase" + multi_match: { + query: 'tragedy', + fields: [], + type: 'phrase' } } ] @@ -1086,16 +1086,16 @@ const ternaryBooleanQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "tragedy", - "fields": [], - "type": "phrase" + multi_match: { + query: 'tragedy', + fields: [], + type: 'phrase' } } ] @@ -1112,42 +1112,42 @@ const ternaryBooleanQuery = { } const queryWithParentheses = { - "query": { - "bool": { - "must": [ + query: { + bool: { + must: [ { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [ - "creatorLiteral", - "creatorLiteral.folded", - "contributorLiteral.folded", - "parallelCreatorLiteral.folded", - "parallelContributorLiteral.folded" + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } } ] @@ -1156,16 +1156,16 @@ const queryWithParentheses = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } } ] @@ -1177,38 +1177,38 @@ const queryWithParentheses = { } }, { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [ - "language.id", - "language.label" + multi_match: { + query: 'English', + fields: [ + 'language.id', + 'language.label' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [], - "type": "phrase" + multi_match: { + query: 'English', + fields: [], + type: 'phrase' } } ] @@ -1217,16 +1217,16 @@ const queryWithParentheses = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [], - "type": "phrase" + multi_match: { + query: 'English', + fields: [], + type: 'phrase' } } ] @@ -1238,34 +1238,34 @@ const queryWithParentheses = { } }, { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "tragedy", - "fields": [ - "genreForm.raw" + multi_match: { + query: 'tragedy', + fields: [ + 'genreForm.raw' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "tragedy", - "fields": [], - "type": "phrase" + multi_match: { + query: 'tragedy', + fields: [], + type: 'phrase' } } ] @@ -1274,16 +1274,16 @@ const queryWithParentheses = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "tragedy", - "fields": [], - "type": "phrase" + multi_match: { + query: 'tragedy', + fields: [], + type: 'phrase' } } ] @@ -1303,42 +1303,42 @@ const queryWithParentheses = { } const negationQuery = { - "query": { - "bool": { - "must": [ + query: { + bool: { + must: [ { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [ - "creatorLiteral", - "creatorLiteral.folded", - "contributorLiteral.folded", - "parallelCreatorLiteral.folded", - "parallelContributorLiteral.folded" + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } } ] @@ -1347,16 +1347,16 @@ const negationQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "Shakespeare", - "fields": [], - "type": "phrase" + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } } ] @@ -1368,37 +1368,37 @@ const negationQuery = { } } ], - "must_not": [ + must_not: [ { - "bool": { - "should": [ + bool: { + should: [ { - "bool": { - "should": [ + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [ - "language.id", - "language.label" + multi_match: { + query: 'English', + fields: [ + 'language.id', + 'language.label' ], - "type": "phrase" + type: 'phrase' } } ] } }, { - "nested": { - "path": "items", - "query": { - "bool": { - "should": [ + nested: { + path: 'items', + query: { + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [], - "type": "phrase" + multi_match: { + query: 'English', + fields: [], + type: 'phrase' } } ] @@ -1407,16 +1407,16 @@ const negationQuery = { } }, { - "nested": { - "path": "holdings", - "query": { - "bool": { - "should": [ + nested: { + path: 'holdings', + query: { + bool: { + should: [ { - "multi_match": { - "query": "English", - "fields": [], - "type": "phrase" + multi_match: { + query: 'English', + fields: [], + type: 'phrase' } } ] @@ -1432,7 +1432,7 @@ const negationQuery = { } } -module.exports = { +module.exports = { simpleAdjQuery, simpleAnyQuery, simpleAllQuery, From 271c692c34c36f7d9c93a6f1ebea52b98612073a Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Feb 2026 11:10:22 -0500 Subject: [PATCH 22/54] Add date queries --- lib/elasticsearch/cql/index-mapping.js | 2 +- lib/elasticsearch/cql_query_builder.js | 37 +++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/lib/elasticsearch/cql/index-mapping.js b/lib/elasticsearch/cql/index-mapping.js index 758deddc..a73f1e55 100644 --- a/lib/elasticsearch/cql/index-mapping.js +++ b/lib/elasticsearch/cql/index-mapping.js @@ -69,7 +69,7 @@ const indexMapping = { fields: ['subjectLiteral', 'subjectLiteral.folded', 'parallelSubjectLiteral.folded'] }, language: { fields: ['language.id', 'language.label'] }, - date: {}, + date: { fields: ['dates.range'] }, series: { fields: ['series', 'parallelSeries'] }, diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index fc5643eb..e9985b95 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -176,7 +176,8 @@ function buildAtomicMain ({ fields, relation, terms, term }) { ...anyAllQueries({ fields, relation, terms, term }), ...adjEqQueries({ fields, relation, terms, term }), ...termQueriesForTermFields({ fields, relation, terms, term }), - ...prefixQueriesForPrefixFields({ fields, relation, terms, term }) + ...prefixQueriesForPrefixFields({ fields, relation, terms, term }), + ...dateQueries({ fields, relation, terms, term }) ] } } @@ -201,6 +202,40 @@ function adjEqQueries ({ fields, relation, terms, term }) { ].filter(q => q) } +function dateQueries ({ fields, relation, terms, term }) { + if (!fields.some(field => field.includes('date'))) { return [] } + let range + switch (relation) { + case "<": + range = { lt: terms[0] } + case ">": + range = { gt: terms[0] } + case ">=": + range = { gte: terms[0] } + case "<=": + range = { lte: terms[0] } + case "encloses": + range = { gt: terms[0], lt: terms[1] } + case "within": + range = { gte: terms[0], lte: terms[1] } + default: + break + } + + return [ + { + nested: { + path: 'dates', + query: { + range: { + 'dates.range': range + } + } + } + } + ] +} + function prefixQueriesForPrefixFields ({ fields, relation, terms, term }) { if (!fields.prefix) return [] return fields.prefix.map(field => prefixQuery(field, term)) From ffe585f221da4585f801de2d00baabfd8b171153 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Feb 2026 11:49:09 -0500 Subject: [PATCH 23/54] Add filters to cql query builder --- lib/elasticsearch/cql_query_builder.js | 26 +++++++++++++++++----- lib/elasticsearch/elastic-query-builder.js | 2 ++ lib/resources.js | 5 +++-- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index e9985b95..7bf98705 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -1,13 +1,29 @@ const { parseWithRightCql } = require('./cql_grammar') const { indexMapping } = require('./cql/index-mapping') +const ElasticQueryBuilder = require('./elastic-query-builder') -function buildEsQuery (cqlQuery) { +function buildEsQuery (cqlQuery, request) { + const filterQuery = filterQuery(request) return { - query: - buildEsQueryFromTree( - parseWithRightCql(cqlQuery) - ) + query: { + bool: { + should: [ + buildEsQueryFromTree( + parseWithRightCql(cqlQuery) + ) + ] + }, + ...filterQuery + } + } +} + +function filterQuery (request) { + const queryJson = ElasticQueryBuilder.forApiRequest(request).query.toJson() + if (queryJson.bool && queryJson.bool.filter) { + return { filter: queryJson.bool.filter } } + return {} } /** diff --git a/lib/elasticsearch/elastic-query-builder.js b/lib/elasticsearch/elastic-query-builder.js index d561795a..28336c9d 100644 --- a/lib/elasticsearch/elastic-query-builder.js +++ b/lib/elasticsearch/elastic-query-builder.js @@ -36,6 +36,8 @@ class ElasticQueryBuilder { case 'callnumber': this.buildCallnumberQuery() break + case 'cql': + break case 'all': default: this.buildAllQuery() diff --git a/lib/resources.js b/lib/resources.js index d6a8ec3c..5ea637e7 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -888,11 +888,12 @@ const buildElasticBody = function (params) { * @return {object} ES query object suitable to be POST'd to ES endpoint */ const buildElasticQuery = function (params) { + const request = ApiRequest.fromParams(params) if (params.search_scope === 'cql') { - const query = cqlQueryBuilder.buildEsQuery(params.q) + const query = cqlQueryBuilder.buildEsQuery(params.q, request) return query } - const request = ApiRequest.fromParams(params) + console.log('request: ', request) const builder = ElasticQueryBuilder.forApiRequest(request) return builder.query.toJson() From 1473e3d874cb2b7f06e53c57446a6e6bb071199a Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Feb 2026 14:52:27 -0500 Subject: [PATCH 24/54] Fix some small errors --- lib/elasticsearch/cql_query_builder.js | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 7bf98705..cc5c36ef 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -3,22 +3,20 @@ const { indexMapping } = require('./cql/index-mapping') const ElasticQueryBuilder = require('./elastic-query-builder') function buildEsQuery (cqlQuery, request) { - const filterQuery = filterQuery(request) + const filterQuery = buildFilterQuery(request) return { - query: { bool: { should: [ buildEsQueryFromTree( - parseWithRightCql(cqlQuery) + parseWithRightCql(cqlQuery.trim()) ) ] }, ...filterQuery - } } } -function filterQuery (request) { +function buildFilterQuery (request) { const queryJson = ElasticQueryBuilder.forApiRequest(request).query.toJson() if (queryJson.bool && queryJson.bool.filter) { return { filter: queryJson.bool.filter } @@ -219,7 +217,7 @@ function adjEqQueries ({ fields, relation, terms, term }) { } function dateQueries ({ fields, relation, terms, term }) { - if (!fields.some(field => field.includes('date'))) { return [] } + if (!fields.fields.some(field => field.includes('date'))) { return [] } let range switch (relation) { case "<": From ed4c14a0306234029a82a5eb1d5d22da4a21159b Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 20 Feb 2026 11:22:20 -0500 Subject: [PATCH 25/54] Add initial filter implementation for cql --- lib/elasticsearch/cql_query_builder.js | 37 +++++++++++++++----------- test/cql_query_builder.test.js | 2 +- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index cc5c36ef..d3144b46 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -2,21 +2,22 @@ const { parseWithRightCql } = require('./cql_grammar') const { indexMapping } = require('./cql/index-mapping') const ElasticQueryBuilder = require('./elastic-query-builder') -function buildEsQuery (cqlQuery, request) { +function buildEsQuery (cqlQuery, request = null) { const filterQuery = buildFilterQuery(request) return { - bool: { - should: [ - buildEsQueryFromTree( - parseWithRightCql(cqlQuery.trim()) - ) - ] - }, - ...filterQuery + bool: { + should: [ + buildEsQueryFromTree( + parseWithRightCql(cqlQuery.trim()) + ) + ] + }, + ...filterQuery } } function buildFilterQuery (request) { + if (!request) return {} const queryJson = ElasticQueryBuilder.forApiRequest(request).query.toJson() if (queryJson.bool && queryJson.bool.filter) { return { filter: queryJson.bool.filter } @@ -220,18 +221,24 @@ function dateQueries ({ fields, relation, terms, term }) { if (!fields.fields.some(field => field.includes('date'))) { return [] } let range switch (relation) { - case "<": + case '<': range = { lt: terms[0] } - case ">": + break + case '>': range = { gt: terms[0] } - case ">=": + break + case '>=': range = { gte: terms[0] } - case "<=": + break + case '<=': range = { lte: terms[0] } - case "encloses": + break + case 'encloses': range = { gt: terms[0], lt: terms[1] } - case "within": + break + case 'within': range = { gte: terms[0], lte: terms[1] } + break default: break } diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 0dfdc599..ace8a839 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -18,7 +18,7 @@ const { } = require('./fixtures/cql_fixtures') describe('CQL Query Builder', function () { - it('Simple = query', function () { + it.only('Simple = query', function () { expect(buildEsQuery('title="Hamlet"')) .to.deep.equal( simpleAdjQuery From 74f97a4aa5823ebcf03341c3fd8a994c43b4be49 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 20 Feb 2026 11:46:27 -0500 Subject: [PATCH 26/54] Fix tests --- lib/elasticsearch/cql_query_builder.js | 2 +- test/cql_query_builder.test.js | 2 +- test/fixtures/cql_fixtures.js | 2231 ++++++++++++------------ 3 files changed, 1144 insertions(+), 1091 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index d3144b46..494eab8f 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -218,7 +218,7 @@ function adjEqQueries ({ fields, relation, terms, term }) { } function dateQueries ({ fields, relation, terms, term }) { - if (!fields.fields.some(field => field.includes('date'))) { return [] } + if (!Object.values(fields).some(fieldType => fieldType.some(field => field.includes('date')))) { return [] } let range switch (relation) { case '<': diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index ace8a839..0dfdc599 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -18,7 +18,7 @@ const { } = require('./fixtures/cql_fixtures') describe('CQL Query Builder', function () { - it.only('Simple = query', function () { + it('Simple = query', function () { expect(buildEsQuery('title="Hamlet"')) .to.deep.equal( simpleAdjQuery diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index 3c702c1f..ec2a7cd7 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -1,324 +1,274 @@ const simpleAdjQuery = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Hamlet', - fields: [], + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], type: 'phrase' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [], + type: 'phrase' + } + } + ] } - ] + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [], + type: 'phrase' + } + } + ] + } + } } } - } + ] } - ] - } + } + ] } } const prefixPhraseQuery = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'The Tragedy of Hamlet, Prince of Denmark', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'phrase_prefix' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'The Tragedy of Hamlet, Prince of Denmark', - fields: [], + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], type: 'phrase_prefix' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'The Tragedy of Hamlet, Prince of Denmark', - fields: [], - type: 'phrase_prefix' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'The Tragedy of Hamlet, Prince of Denmark', + fields: [], + type: 'phrase_prefix' + } + } + ] } - ] + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'The Tragedy of Hamlet, Prince of Denmark', + fields: [], + type: 'phrase_prefix' + } + } + ] + } + } } } - } + ] } - ] - } + } + ] } } const simpleAnyQuery = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'cross_fields', - operator: 'or' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Hamlet Othello', - fields: [], + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], type: 'cross_fields', operator: 'or' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'or' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'or' + } + } + ] } - ] + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'or' + } + } + ] + } + } } } - } + + ] } - ] - } + } + ] } } const anyWithPrefixQuery = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'cross_fields', - operator: 'or' - } - }, - { - multi_match: { - query: 'Tragedy', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'phrase_prefix' - } - }, - { - multi_match: { - query: 'Comedy', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'phrase_prefix' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Hamlet Othello', - fields: [], + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], type: 'cross_fields', operator: 'or' } @@ -326,1109 +276,1212 @@ const anyWithPrefixQuery = { { multi_match: { query: 'Tragedy', - fields: [], + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], type: 'phrase_prefix' } }, { multi_match: { query: 'Comedy', - fields: [], + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], type: 'phrase_prefix' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'or' - } - }, - { - multi_match: { - query: 'Tragedy', - fields: [], - type: 'phrase_prefix' - } - }, - { - multi_match: { - query: 'Comedy', - fields: [], - type: 'phrase_prefix' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'or' + } + }, + { + multi_match: { + query: 'Tragedy', + fields: [], + type: 'phrase_prefix' + } + }, + { + multi_match: { + query: 'Comedy', + fields: [], + type: 'phrase_prefix' + } + } + ] } - ] + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'or' + } + }, + { + multi_match: { + query: 'Tragedy', + fields: [], + type: 'phrase_prefix' + } + }, + { + multi_match: { + query: 'Comedy', + fields: [], + type: 'phrase_prefix' + } + } + ] + } + } } } - } + ] } - ] - } + } + ] } } const simpleAllQuery = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'cross_fields', - operator: 'and' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Hamlet Othello', - fields: [], + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], type: 'cross_fields', operator: 'and' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'and' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'and' + } + } + ] } - ] + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet Othello', + fields: [], + type: 'cross_fields', + operator: 'and' + } + } + ] + } + } } } - } + ] } - ] - } + } + ] } } const keywordQueryForBarcode = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: '123456', - fields: [ - 'title', - 'title.folded', - 'description.foldedStemmed', - 'subjectLiteral', - 'subjectLiteral.folded', - 'creatorLiteral', - 'creatorLiteral.folded', - 'contributorLiteral.folded', - 'note.label.foldedStemmed', - 'publisherLiteral.folded', - 'seriesStatement.folded', - 'titleAlt.folded', - 'titleDisplay.folded', - 'contentsTitle.folded', - 'tableOfContents.folded', - 'genreForm', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelTitleAlt.folded', - 'parallelSeriesStatement.folded', - 'parallelCreatorLiteral.folded', - 'parallelPublisher', - 'parallelPublisherLiteral', - 'uniformTitle.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle', - 'placeOfPublication.folded' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: '123456', fields: [ - 'items.idBarcode' + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' ], type: 'phrase' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: '123456', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: '123456', + fields: [ + 'items.idBarcode' + ], + type: 'phrase' + } + } + ] } - ] + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: '123456', + fields: [], + type: 'phrase' + } + } + ] + } + } } } - } + ] } - ] - } + } + ] } } const keywordQueryForShelfMark = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'B 12', - fields: [ - 'title', - 'title.folded', - 'description.foldedStemmed', - 'subjectLiteral', - 'subjectLiteral.folded', - 'creatorLiteral', - 'creatorLiteral.folded', - 'contributorLiteral.folded', - 'note.label.foldedStemmed', - 'publisherLiteral.folded', - 'seriesStatement.folded', - 'titleAlt.folded', - 'titleDisplay.folded', - 'contentsTitle.folded', - 'tableOfContents.folded', - 'genreForm', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelTitleAlt.folded', - 'parallelSeriesStatement.folded', - 'parallelCreatorLiteral.folded', - 'parallelPublisher', - 'parallelPublisherLiteral', - 'uniformTitle.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle', - 'placeOfPublication.folded' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'B 12', fields: [ - 'items.shelfMark' + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' ], type: 'phrase' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'B 12', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'B 12', + fields: [ + 'items.shelfMark' + ], + type: 'phrase' + } + } + ] } - ] + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'B 12', + fields: [], + type: 'phrase' + } + } + ] + } + } } } - } + ] } - ] - } + } + ] } } const keywordQueryForGeneralTerm = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet', - fields: [ - 'title', - 'title.folded', - 'description.foldedStemmed', - 'subjectLiteral', - 'subjectLiteral.folded', - 'creatorLiteral', - 'creatorLiteral.folded', - 'contributorLiteral.folded', - 'note.label.foldedStemmed', - 'publisherLiteral.folded', - 'seriesStatement.folded', - 'titleAlt.folded', - 'titleDisplay.folded', - 'contentsTitle.folded', - 'tableOfContents.folded', - 'genreForm', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelTitleAlt.folded', - 'parallelSeriesStatement.folded', - 'parallelCreatorLiteral.folded', - 'parallelPublisher', - 'parallelPublisherLiteral', - 'uniformTitle.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle', - 'placeOfPublication.folded' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Hamlet', - fields: [], + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' + ], type: 'phrase' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [], + type: 'phrase' + } + } + ] } - ] + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [], + type: 'phrase' + } + } + ] + } + } } } - } + ] } - ] - } + } + ] } } const identifierQuery = { - query: { - bool: { - should: [ - { - bool: { - should: [ - { - term: { - uri: 'b1234' - } - }, - { - term: { - 'idIsbn.clean': 'b1234' - } - }, - { - term: { - 'idIssn.clean': 'b1234' - } - }, - { - prefix: { - 'identifierV2.value': 'b1234' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { term: { - 'items.idBarcode': 'b1234' + uri: 'b1234' + } + }, + { + term: { + 'idIsbn.clean': 'b1234' + } + }, + { + term: { + 'idIssn.clean': 'b1234' } }, { prefix: { - 'items.shelfMark.keywordLowercased': 'b1234' + 'identifierV2.value': 'b1234' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [] + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + term: { + 'items.idBarcode': 'b1234' + } + }, + { + prefix: { + 'items.shelfMark.keywordLowercased': 'b1234' + } + } + ] + } + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [] + } + } } } - } + ] } - ] - } + } + ] } } const binaryBooleanQuery = { - query: { - bool: { - must: [ - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [ - 'creatorLiteral', - 'creatorLiteral.folded', - 'contributorLiteral.folded', - 'parallelCreatorLiteral.folded', - 'parallelContributorLiteral.folded' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Shakespeare', - fields: [], + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], type: 'phrase' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } + } + ] } - ] - } - } - } - } - ] - } - }, - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [ - 'language.id', - 'language.label' - ], - type: 'phrase' } } - ] - } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } + } + ] } - ] + } } } - } - }, - { - nested: { - path: 'holdings', - query: { + ] + } + }, + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'English', - fields: [], + fields: [ + 'language.id', + 'language.label' + ], type: 'phrase' } } ] } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'English', + fields: [], + type: 'phrase' + } + } + ] + } + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'English', + fields: [], + type: 'phrase' + } + } + ] + } + } + } } - } + ] } - ] - } + } + ] } - ] - } + } + ] } } const ternaryBooleanQuery = { - query: { - bool: { - should: [ - { - bool: { - must: [ - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [ - 'creatorLiteral', - 'creatorLiteral.folded', - 'contributorLiteral.folded', - 'parallelCreatorLiteral.folded', - 'parallelContributorLiteral.folded' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Shakespeare', - fields: [], + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], type: 'phrase' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } + } + ] } - ] - } - } - } - } - ] - } - }, - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [ - 'language.id', - 'language.label' - ], - type: 'phrase' } } - ] - } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } + } + ] } - ] + } } } - } - }, - { - nested: { - path: 'holdings', - query: { + ] + } + }, + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'English', - fields: [], + fields: [ + 'language.id', + 'language.label' + ], type: 'phrase' } } ] } - } - } - } - ] - } - } - ] - } - }, - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'tragedy', - fields: [ - 'genreForm.raw' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ + }, { - multi_match: { - query: 'tragedy', - fields: [], - type: 'phrase' + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'English', + fields: [], + type: 'phrase' + } + } + ] + } + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'English', + fields: [], + type: 'phrase' + } + } + ] + } + } } } ] } } - } - }, - { - nested: { - path: 'holdings', - query: { + ] + } + }, + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'tragedy', - fields: [], + fields: [ + 'genreForm.raw' + ], type: 'phrase' } } ] } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'tragedy', + fields: [], + type: 'phrase' + } + } + ] + } + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'tragedy', + fields: [], + type: 'phrase' + } + } + ] + } + } + } } - } + ] } - ] - } + } + ] } - ] - } + } + ] } } const queryWithParentheses = { - query: { - bool: { - must: [ - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [ - 'creatorLiteral', - 'creatorLiteral.folded', - 'contributorLiteral.folded', - 'parallelCreatorLiteral.folded', - 'parallelContributorLiteral.folded' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } - } - }, - { - nested: { - path: 'holdings', - query: { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Shakespeare', - fields: [], + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], type: 'phrase' } } ] } - } - } - } - ] - } - }, - { - bool: { - should: [ - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [ - 'language.id', - 'language.label' - ], - type: 'phrase' + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } } - } - ] + ] + } } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' } - ] - } + } + ] } } - }, - { - nested: { - path: 'holdings', - query: { + } + } + ] + } + }, + { + bool: { + should: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'English', - fields: [], + fields: [ + 'language.id', + 'language.label' + ], type: 'phrase' } } ] } - } - } - } - ] - } - }, - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'tragedy', - fields: [ - 'genreForm.raw' - ], - type: 'phrase' + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'English', + fields: [], + type: 'phrase' + } + } + ] + } } } - ] - } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'tragedy', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'English', + fields: [], + type: 'phrase' + } + } + ] } - ] + } } } - } - }, - { - nested: { - path: 'holdings', - query: { + ] + } + }, + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'tragedy', - fields: [], + fields: [ + 'genreForm.raw' + ], type: 'phrase' } } ] } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'tragedy', + fields: [], + type: 'phrase' + } + } + ] + } + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'tragedy', + fields: [], + type: 'phrase' + } + } + ] + } + } + } } - } + ] } - ] - } + } + ] } - ] - } + } + ] } - ] - } + } + ] } } const negationQuery = { - query: { - bool: { - must: [ - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [ - 'creatorLiteral', - 'creatorLiteral.folded', - 'contributorLiteral.folded', - 'parallelCreatorLiteral.folded', - 'parallelContributorLiteral.folded' - ], - type: 'phrase' - } - } - ] - } - }, - { - nested: { - path: 'items', - query: { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'Shakespeare', - fields: [], + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], type: 'phrase' } } ] } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } + } + ] } - ] - } - } - } - } - ] - } - } - ], - must_not: [ - { - bool: { - should: [ - { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [ - 'language.id', - 'language.label' - ], - type: 'phrase' } } - ] - } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } + } + ] } - ] + } } } - } - }, - { - nested: { - path: 'holdings', - query: { + ] + } + } + ], + must_not: [ + { + bool: { + should: [ + { bool: { should: [ { multi_match: { query: 'English', - fields: [], + fields: [ + 'language.id', + 'language.label' + ], type: 'phrase' } } ] } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'English', + fields: [], + type: 'phrase' + } + } + ] + } + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'English', + fields: [], + type: 'phrase' + } + } + ] + } + } + } } - } + ] } - ] - } + } + ] } - ] - } + } + ] } } From d5cee7fae647fe6e862371766ede7666098d5c9c Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 20 Feb 2026 12:15:09 -0500 Subject: [PATCH 27/54] Add date and filter features to cql --- lib/elasticsearch/cql_query_builder.js | 6 +- test/cql_query_builder.test.js | 60 +++++- test/fixtures/cql_fixtures.js | 277 ++++++++++++++++++++++++- 3 files changed, 338 insertions(+), 5 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 494eab8f..fe16e03e 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -10,9 +10,9 @@ function buildEsQuery (cqlQuery, request = null) { buildEsQueryFromTree( parseWithRightCql(cqlQuery.trim()) ) - ] - }, - ...filterQuery + ], + ...filterQuery + } } } diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 0dfdc599..3139eda9 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -1,6 +1,7 @@ const { expect } = require('chai') const { buildEsQuery } = require('../lib/elasticsearch/cql_query_builder') +const ApiRequest = require('../lib/api-request') const { simpleAdjQuery, simpleAnyQuery, @@ -14,7 +15,14 @@ const { binaryBooleanQuery, ternaryBooleanQuery, queryWithParentheses, - negationQuery + negationQuery, + dateBeforeQuery, + dateBeforeOrOnQuery, + dateAfterQuery, + dateAfterOrOnQuery, + dateWithinQuery, + dateEnclosesQuery, + filterQuery } = require('./fixtures/cql_fixtures') describe('CQL Query Builder', function () { @@ -122,4 +130,54 @@ describe('CQL Query Builder', function () { negationQuery ) }) + + it('Date after query', function () { + expect(buildEsQuery('date > "1990"')) + .to.deep.equal( + dateAfterQuery + ) + }) + + it('Date after or on query', function () { + expect(buildEsQuery('date >= "1990"')) + .to.deep.equal( + dateAfterOrOnQuery + ) + }) + + it('Date before query', function () { + expect(buildEsQuery('date < "1990"')) + .to.deep.equal( + dateBeforeQuery + ) + }) + + it('Date dateBeforeOrOnQuery query', function () { + expect(buildEsQuery('date <= "1990"')) + .to.deep.equal( + dateBeforeOrOnQuery + ) + }) + + it('Date within query', function () { + expect(buildEsQuery('date within "1990 2000"')) + .to.deep.equal( + dateWithinQuery + ) + }) + + it('Date encloses query', function () { + expect(buildEsQuery('date encloses "1990 2000"')) + .to.deep.equal( + dateEnclosesQuery + ) + }) + + it('Query with applied filters', function () { + const apiRequest = new ApiRequest({ filters: { language: ['Klingon'] }, search_scope: 'cql' }) + expect(buildEsQuery('author="Shakespeare"', apiRequest)) + .to.deep.equal( + filterQuery + ) + }) }) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index ec2a7cd7..9c59d2d2 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -1485,6 +1485,274 @@ const negationQuery = { } } +const dateAfterQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { gt: '1990' } } } + } + } + ] + } + }, + { + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } + } + ] + } + } + ] + } +} + +const dateBeforeQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { lt: '1990' } } } + } + } + ] + } + }, + { + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } + } + ] + } + } + ] + } +} + +const dateBeforeOrOnQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { lte: '1990' } } } + } + } + ] + } + }, + { + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } + } + ] + } + } + ] + } +} + +const dateAfterOrOnQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { gte: '1990' } } } + } + } + ] + } + }, + { + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } + } + ] + } + } + ] + } +} + +const dateWithinQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { + range: { 'dates.range': { gte: '1990', lte: '2000' } } + } + } + } + ] + } + }, + { + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } + } + ] + } + } + ] + } +} + +const dateEnclosesQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { + range: { 'dates.range': { gt: '1990', lt: '2000' } } + } + } + } + ] + } + }, + { + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } + } + ] + } + } + ] + } +} + +const filterQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], + type: 'phrase' + } + } + ] + } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } + } + ] + } + } + } + }, + { + nested: { + path: 'holdings', + query: { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [], + type: 'phrase' + } + } + ] + } + } + } + } + ] + } + } + ], + filter: [ + { + bool: { + should: [ + { term: { 'language.id': 'Klingon' } }, + { term: { 'language.label': 'Klingon' } } + ] + } + } + ] + } +} + module.exports = { simpleAdjQuery, simpleAnyQuery, @@ -1498,5 +1766,12 @@ module.exports = { binaryBooleanQuery, ternaryBooleanQuery, queryWithParentheses, - negationQuery + negationQuery, + dateBeforeQuery, + dateBeforeOrOnQuery, + dateAfterQuery, + dateAfterOrOnQuery, + dateWithinQuery, + dateEnclosesQuery, + filterQuery } From e2e43d00f09c5f180a898083a86a46cfd2d41b51 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 20 Feb 2026 13:32:43 -0500 Subject: [PATCH 28/54] Add some more useful display of parsing and errors --- lib/elasticsearch/cql_grammar.js | 36 +++++++++++++++++++++++++++++++- lib/resources.js | 20 +++++++++++++++++- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 9d7700bb..011dfa97 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -90,4 +90,38 @@ function parseWithRightCql (string) { return parseRight(string, rightCqlParser) } -module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql } +function display (ast) { + if (!ast.type.includes('query')) { + return ast.text + } + + const childTypes = [ + 'atomic_query', 'sub_query', 'query', 'connective', + 'scope', 'relation', 'quoted_term' + ] + + const children = ast.children + .filter(child => childTypes.includes(child.type)) + .map(child => display(child)) + + if (children.length === 1) { + return children[0] + } + + return children +} + +function displayParsed (string) { + const parsed = rightCqlParser.getAST(reverseString(string)) + if (!parsed) return {} + if (parsed.errors.length) { + return { + error: parsed.errors.map(error => + `Parsing error likely near end of "${reverseString(error.token.rest)}"` + ).join("\n") + } + } + return { parsed: display(reverseAST(parsed)) } +} + +module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql, rightCqlParser, reverseString, displayParsed } diff --git a/lib/resources.js b/lib/resources.js index 5ea637e7..83dd2421 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -20,6 +20,7 @@ const { parseParams, deepValue } = require('../lib/util') const ApiRequest = require('./api-request') const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') const cqlQueryBuilder = require('./elasticsearch/cql_query_builder') +const { displayParsed } = require('./elasticsearch/cql_grammar') const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC } = require('./elasticsearch/config') const errors = require('./errors') @@ -644,6 +645,22 @@ module.exports = function (app, _private = null) { app.logger.debug('Parsed params: ', params) + let parsed = {} + + if (params.search_scope === 'cql') { + try { + parsed = displayParsed(params.q) //? + } catch (e) { + throw new IndexSearchError('Unknown parsing error. Error most likely near end of string') + } + if (parsed.error) { + throw new IndexSearchError(parsed.error) + } + if (!parsed.parsed) { + throw new IndexSearchError('Unknown parsing error. Error most likely near end of string') + } + } + let body = buildElasticBody(params) // Strip unnecessary _source fields @@ -682,7 +699,8 @@ module.exports = function (app, _private = null) { resp.debug = { relevanceReport, - query: body + query: body, + ...parsed } return resp }) From 93c042e734c4d35c374e3e1dd89604c6b3ecae58 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 20 Feb 2026 15:52:16 -0500 Subject: [PATCH 29/54] Fix linting --- lib/elasticsearch/cql_grammar.js | 2 +- lib/resources.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 011dfa97..9f9d7b14 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -118,7 +118,7 @@ function displayParsed (string) { return { error: parsed.errors.map(error => `Parsing error likely near end of "${reverseString(error.token.rest)}"` - ).join("\n") + ).join('\n') } } return { parsed: display(reverseAST(parsed)) } diff --git a/lib/resources.js b/lib/resources.js index 83dd2421..3d7ccf7f 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -649,7 +649,7 @@ module.exports = function (app, _private = null) { if (params.search_scope === 'cql') { try { - parsed = displayParsed(params.q) //? + parsed = displayParsed(params.q) // ? } catch (e) { throw new IndexSearchError('Unknown parsing error. Error most likely near end of string') } From 0ebe3bd4b465aae525459334d3fe11b923fb657a Mon Sep 17 00:00:00 2001 From: danamansana Date: Mon, 2 Mar 2026 16:19:56 -0500 Subject: [PATCH 30/54] Add new strategy for handling keyword vs text fields --- lib/elasticsearch/cql/index-mapping.js | 74 +- lib/elasticsearch/cql/mapping-from-es.json | 1120 +++++++++++++++++ lib/elasticsearch/cql_query_builder.js | 116 +- test/cql_query_builder.test.js | 10 +- test/fixtures/cql_fixtures.js | 1266 +++++++------------- 5 files changed, 1686 insertions(+), 900 deletions(-) create mode 100644 lib/elasticsearch/cql/mapping-from-es.json diff --git a/lib/elasticsearch/cql/index-mapping.js b/lib/elasticsearch/cql/index-mapping.js index a73f1e55..0befbc87 100644 --- a/lib/elasticsearch/cql/index-mapping.js +++ b/lib/elasticsearch/cql/index-mapping.js @@ -30,9 +30,39 @@ const indexMapping = { 'formerTitle', 'addedAuthorTitle', 'placeOfPublication.folded', - { field: 'items.idBarcode', on: (q) => /\d{6,}/.test(q) }, // Try to detect shelfmark searches (e.g. JFD 16-5143) { field: 'items.shelfMark', on: (q) => /^[A-Z]{1,3} \d{2,}/.test(q) } + ], + exact_fields: [ + 'title.keywordLowercasedStripped', + // missing description + 'subjectLiteral.raw', + 'creatorLiteral.keywordLowercased', + 'contributorLiteral.keywordLowercased', + // note.label is missing + 'publisherLiteral.raw', + 'seriesStatement.raw', + 'titleAlt.raw', + // titleDisplay missing + // contentsTitle missing + // tableOfContents missing + 'genreForm.raw', + 'donor.raw', + // parallelTitle missing + // parallelTitleDisplay missing + 'parallelTitleAlt.raw', + 'parallelSeriesStatement.raw', + 'parallelCreatorLiteral.raw', + // parallelPublisher/parallelPublisherLiteral missing + 'uniformTitle.raw', + 'parallelUniformTitle.raw', + // formerTitle missing + 'addedAuthorTitle.raw', + 'placeOfPublication', + { field: 'items.shelfMark.raw', on: (q) => /^[A-Z]{1,3} \d{2,}/.test(q) } + ], + term: [ + { field: 'items.idBarcode', on: (q) => /\d{6,}/.test(q) } ] }, title: { @@ -53,30 +83,54 @@ const indexMapping = { 'parallelUniformTitle', 'formerTitle', 'addedAuthorTitle' + ], + exact_fields: [ + 'title.keywordLowercasedStripped', + 'seriesStatement.raw', + 'titleAlt.raw', + // titleDisplay missing + // contentsTitle missing + // tableOfContents missing + 'donor.raw', + // parallelTitle missing + // parallelTitleDisplay missing + 'parallelTitleAlt.raw', + 'parallelSeriesStatement.raw', + 'parallelCreatorLiteral.raw', + 'uniformTitle.raw', + 'parallelUniformTitle.raw', + // formerTitle missing + 'addedAuthorTitle.raw', + 'placeOfPublication' ] }, author: { - fields: ['creatorLiteral', 'creatorLiteral.folded', 'contributorLiteral.folded', 'parallelCreatorLiteral.folded', 'parallelContributorLiteral.folded'] + fields: ['creatorLiteral', 'creatorLiteral.folded', 'contributorLiteral.folded', 'parallelCreatorLiteral.folded', 'parallelContributorLiteral.folded'], + exact_fields: [ + 'creatorLiteral.keywordLowercased', 'contributorLiteral.keywordLowercased', + 'parallelCreatorLiteral.raw', 'parallelContributorLiteral.raw' + ] }, callnumber: { - fields: ['shelfMark.keywordLowercased', 'items.shelfMark.keywordLowercased'] + term: ['shelfMark.keywordLowercased', 'items.shelfMark.keywordLowercased'] }, identifier: { prefix: ['identifierV2.value', 'items.shelfMark.keywordLowercased'], term: ['uri', 'items.idBarcode', 'idIsbn.clean', 'idIssn.clean'] }, subject: { - fields: ['subjectLiteral', 'subjectLiteral.folded', 'parallelSubjectLiteral.folded'] + fields: ['subjectLiteral', 'subjectLiteral.folded', 'parallelSubjectLiteral.folded'], + exact_fields: ['subjectLiteral.raw'] }, - language: { fields: ['language.id', 'language.label'] }, + language: { term: ['language.id', 'language.label'] }, date: { fields: ['dates.range'] }, series: { - fields: ['series', 'parallelSeries'] + term: ['series', 'parallelSeries'] }, - genre: { fields: ['genreForm.raw'] }, - center: { fields: ['buildingLocationIds'] }, - division: { fields: ['collectionIds'] }, - format: { fields: ['formatId'] } + genre: { fields: ['genreForm'], exact_fields: ['genreForm.raw'] }, + center: { term: ['buildingLocationIds'] }, + division: { term: ['collectionIds'] }, + format: { term: ['formatId'] } } module.exports = { diff --git a/lib/elasticsearch/cql/mapping-from-es.json b/lib/elasticsearch/cql/mapping-from-es.json new file mode 100644 index 00000000..614a62e3 --- /dev/null +++ b/lib/elasticsearch/cql/mapping-from-es.json @@ -0,0 +1,1120 @@ +{ + "resources-2025-07-07": { + "mappings": { + "dynamic": "strict", + "properties": { + "addedAuthorTitle": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "buildingLocationIds": { + "type": "keyword", + "eager_global_ordinals": true + }, + "carrierType": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "carrierType_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "collectionIds": { + "type": "keyword", + "eager_global_ordinals": true + }, + "contentsTitle": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "contributions": { + "type": "keyword" + }, + "contributorLiteral": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "keywordLowercased": { + "type": "keyword", + "normalizer": "lowercase_normalizer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "contributorLiteralNormalized": { + "type": "keyword", + "fields": { + "keywordLowercased": { + "type": "keyword", + "normalizer": "lowercase_normalizer" + } + } + }, + "contributorLiteralWithoutDates": { + "type": "keyword", + "fields": { + "keywordLowercased": { + "type": "keyword", + "normalizer": "lowercase_normalizer" + } + } + }, + "contributorNameRoleTest": { + "type": "keyword" + }, + "contributor_sort": { + "type": "keyword" + }, + "contributors": { + "properties": { + "label": { + "type": "keyword" + }, + "name": { + "type": "keyword" + }, + "nameRole": { + "type": "keyword" + } + } + }, + "contributorsPacked": { + "type": "keyword" + }, + "contributorsTest": { + "properties": { + "label": { + "type": "keyword" + }, + "name": { + "type": "keyword" + }, + "nameRole": { + "type": "keyword" + }, + "title": { + "type": "keyword" + } + } + }, + "created": { + "type": "date", + "index": false + }, + "createdDecade": { + "type": "short" + }, + "createdString": { + "type": "keyword" + }, + "createdYear": { + "type": "short" + }, + "creatorLiteral": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "keywordLowercased": { + "type": "keyword", + "normalizer": "lowercase_normalizer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "creatorLiteralNormalized": { + "type": "keyword", + "fields": { + "keywordLowercased": { + "type": "keyword", + "normalizer": "lowercase_normalizer" + } + } + }, + "creatorLiteralWithoutDates": { + "type": "keyword", + "fields": { + "keywordLowercased": { + "type": "keyword", + "normalizer": "lowercase_normalizer" + } + } + }, + "creator_sort": { + "type": "keyword" + }, + "dateEndDecade": { + "type": "short" + }, + "dateEndString": { + "type": "keyword" + }, + "dateEndYear": { + "type": "short" + }, + "dateStartDecade": { + "type": "short" + }, + "dateStartString": { + "type": "keyword" + }, + "dateStartYear": { + "type": "short" + }, + "dateString": { + "type": "keyword" + }, + "dates": { + "type": "nested", + "properties": { + "range": { + "type": "date_range" + }, + "raw": { + "type": "keyword" + }, + "tag": { + "type": "keyword" + } + } + }, + "depiction": { + "type": "keyword" + }, + "description": { + "type": "text", + "fields": { + "foldedStemmed": { + "type": "text", + "analyzer": "folding_stemming_analyzer" + } + } + }, + "dimensions": { + "type": "keyword" + }, + "donor": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "editionStatement": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "electronicResources": { + "properties": { + "label": { + "type": "keyword" + }, + "url": { + "type": "keyword" + } + } + }, + "extent": { + "type": "keyword" + }, + "formatId": { + "type": "keyword" + }, + "formerTitle": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "genreForm": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "holdings": { + "type": "nested", + "properties": { + "checkInBoxes": { + "type": "nested", + "properties": { + "copies": { + "type": "short" + }, + "coverage": { + "type": "keyword" + }, + "position": { + "type": "short" + }, + "shelfMark": { + "type": "keyword", + "index": false + }, + "status": { + "type": "keyword" + }, + "type": { + "type": "keyword" + } + } + }, + "format": { + "type": "keyword" + }, + "holdingStatement": { + "type": "keyword" + }, + "identifier": { + "properties": { + "identifierStatus": { + "type": "keyword" + }, + "type": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "location": { + "properties": { + "code": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "notes": { + "type": "keyword" + }, + "physicalLocation": { + "type": "keyword", + "index": false + }, + "shelfMark": { + "type": "keyword" + }, + "uri": { + "type": "keyword" + } + } + }, + "idIsbn": { + "type": "keyword", + "fields": { + "clean": { + "type": "keyword", + "ignore_above": 256, + "normalizer": "identifier_normalizer" + } + } + }, + "idIsbn_clean": { + "type": "keyword" + }, + "idIssn": { + "type": "keyword", + "fields": { + "clean": { + "type": "keyword", + "ignore_above": 256, + "normalizer": "identifier_normalizer" + } + } + }, + "idLcc": { + "type": "keyword" + }, + "idLccSort": { + "type": "keyword" + }, + "idLccn": { + "type": "keyword", + "normalizer": "punctuation_and_lowercase_normalizer" + }, + "idOclc": { + "type": "keyword" + }, + "idOwi": { + "type": "keyword" + }, + "identifier": { + "type": "keyword" + }, + "identifierV2": { + "properties": { + "identifierStatus": { + "type": "keyword", + "normalizer": "punctuation_and_lowercase_normalizer" + }, + "type": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "issuance": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "issuance_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "items": { + "type": "nested", + "properties": { + "accessMessage": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "accessMessage_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "aeonUrl": { + "type": "keyword" + }, + "catalogItemType": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "catalogItemType_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "dateRange": { + "type": "date_range", + "format": "yyyy-MM-dd||yyyy-MM||yyyy" + }, + "dateRaw": { + "type": "text" + }, + "deliveryLocation": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "deliveryLocation_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "dueDate": { + "type": "date" + }, + "electronicLocator": { + "properties": { + "label": { + "type": "keyword", + "index": false + }, + "url": { + "type": "keyword" + } + } + }, + "enumerationChronology": { + "type": "keyword" + }, + "enumerationChronology_sort": { + "type": "keyword" + }, + "formatLiteral": { + "type": "keyword" + }, + "holdingLocation": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "holdingLocation_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "idBarcode": { + "type": "keyword" + }, + "identifier": { + "type": "keyword" + }, + "identifierV2": { + "properties": { + "type": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "location": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "m2CustomerCode": { + "type": "keyword" + }, + "owner": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "owner_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "physicalLocation": { + "type": "keyword" + }, + "recapCustomerCode": { + "type": "keyword" + }, + "requestable": { + "type": "boolean" + }, + "shelfMark": { + "type": "text", + "fields": { + "keywordLowercased": { + "type": "keyword", + "normalizer": "shelfmark_normalizer" + }, + "raw": { + "type": "keyword" + } + } + }, + "shelfMark_sort": { + "type": "keyword" + }, + "status": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "status_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "type": { + "type": "keyword" + }, + "uri": { + "type": "keyword" + }, + "volumeRange": { + "type": "integer_range" + }, + "volumeRaw": { + "type": "text" + } + } + }, + "language": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "language_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "lccClassification": { + "type": "keyword" + }, + "materialType": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "materialType_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "mediaType": { + "properties": { + "id": { + "type": "keyword" + }, + "label": { + "type": "keyword" + } + } + }, + "mediaType_packed": { + "type": "keyword", + "eager_global_ordinals": true + }, + "note": { + "properties": { + "label": { + "type": "text", + "fields": { + "foldedStemmed": { + "type": "text", + "analyzer": "folding_stemming_analyzer" + } + } + }, + "noteType": { + "type": "keyword" + }, + "type": { + "type": "keyword", + "index": false + } + } + }, + "numAvailable": { + "type": "short" + }, + "numCheckinCardItems": { + "type": "short" + }, + "numElectronicResources": { + "type": "short" + }, + "numItemDatesParsed": { + "type": "short" + }, + "numItemVolumesParsed": { + "type": "short" + }, + "numItems": { + "type": "short" + }, + "numItemsTotal": { + "type": "short" + }, + "nyplSource": { + "type": "keyword" + }, + "parallelAddedAuthorTitle": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "parallelContributorLiteral": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "parallelCreatorLiteral": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "parallelDescription": { + "type": "text", + "fields": { + "foldedStemmed": { + "type": "text", + "analyzer": "folding_stemming_analyzer" + } + } + }, + "parallelDisplayField": { + "properties": { + "fieldName": { + "type": "keyword" + }, + "index": { + "type": "short" + }, + "value": { + "type": "text" + } + } + }, + "parallelEditionStatement": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "parallelNote": { + "properties": { + "label": { + "type": "text", + "fields": { + "foldedStemmed": { + "type": "text", + "analyzer": "folding_stemming_analyzer" + } + } + }, + "noteType": { + "type": "keyword" + }, + "type": { + "type": "keyword", + "index": false + } + } + }, + "parallelPlaceOfPublication": { + "type": "keyword", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "parallelPublicationStatement": { + "type": "keyword", + "index": false + }, + "parallelPublisher": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "parallelPublisherLiteral": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "parallelSeries": { + "type": "keyword" + }, + "parallelSeriesStatement": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "parallelSubjectLiteral": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "parallelSummary": { + "type": "text", + "fields": { + "foldedStemmed": { + "type": "text", + "analyzer": "folding_stemming_analyzer" + } + } + }, + "parallelTableOfContents": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "parallelTitle": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "parallelTitleAlt": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "parallelTitleDisplay": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "parallelUniformTitle": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "partOf": { + "type": "keyword" + }, + "physicalDescription": { + "type": "keyword", + "index": false + }, + "placeOfPublication": { + "type": "keyword", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "popularity": { + "type": "short" + }, + "publicDomain": { + "type": "boolean" + }, + "publicationStatement": { + "type": "keyword", + "index": false + }, + "publisherLiteral": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "recordType": { + "type": "keyword" + }, + "recordTypeId": { + "type": "keyword" + }, + "serialPublicationDates": { + "type": "keyword", + "index": false + }, + "series": { + "type": "keyword" + }, + "seriesStatement": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "shelfMark": { + "type": "text", + "fields": { + "keywordLowercased": { + "type": "keyword", + "normalizer": "shelfmark_normalizer" + }, + "raw": { + "type": "keyword" + } + } + }, + "subjectLiteral": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword", + "eager_global_ordinals": true + } + } + }, + "subjectLiteral_exploded": { + "type": "keyword" + }, + "summary": { + "type": "text", + "fields": { + "foldedStemmed": { + "type": "text", + "analyzer": "folding_stemming_analyzer" + } + } + }, + "supplementaryContent": { + "properties": { + "label": { + "type": "keyword", + "index": false + }, + "url": { + "type": "keyword" + } + } + }, + "suppressed": { + "type": "boolean" + }, + "tableOfContents": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "title": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "foldedStemmed": { + "type": "text", + "analyzer": "folding_stemming_analyzer" + }, + "keyword": { + "type": "keyword", + "ignore_above": 256 + }, + "keywordLowercased": { + "type": "keyword", + "ignore_above": 256, + "normalizer": "lowercase_normalizer" + }, + "keywordLowercasedStripped": { + "type": "keyword", + "ignore_above": 256, + "normalizer": "punctuation_and_lowercase_normalizer" + }, + "shingle": { + "type": "text", + "analyzer": "shingles_analyzer" + } + } + }, + "titleAlt": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "titleDisplay": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + } + } + }, + "title_sort": { + "type": "keyword" + }, + "type": { + "type": "keyword" + }, + "uniformTitle": { + "type": "text", + "fields": { + "folded": { + "type": "text", + "analyzer": "folding_analyzer" + }, + "raw": { + "type": "keyword" + } + } + }, + "updatedAt": { + "type": "date" + }, + "uri": { + "type": "keyword" + }, + "uris": { + "type": "keyword" + } + } + } + } +} diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index fe16e03e..8555738f 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -185,40 +185,64 @@ function buildAtomicNested (name, { fields, relation, terms, term }) { - put all terms in term matches with term fields */ function buildAtomicMain ({ fields, relation, terms, term }) { + console.log('building atomic main ', relation, terms, term) + return anyAllQueries({ fields, relation, terms }) || + adjEqQueries({ fields, relation, terms, term }) || + dateQueries({ fields, relation, terms, term }) +} + +function anyAllQueries ({ fields, relation, terms }) { + if (!['any', 'all'].includes(relation)) { return null } + const operator = (relation === 'any' ? 'should' : 'must') return { bool: { - should: [ - ...anyAllQueries({ fields, relation, terms, term }), - ...adjEqQueries({ fields, relation, terms, term }), - ...termQueriesForTermFields({ fields, relation, terms, term }), - ...prefixQueriesForPrefixFields({ fields, relation, terms, term }), - ...dateQueries({ fields, relation, terms, term }) - ] + [operator]: terms.map(term => matchTermWithFields(fields, term, 'cross_fields')) } } } -function anyAllQueries ({ fields, relation, terms, term }) { - if (!['any', 'all'].includes(relation)) { return [] } - const fieldsToUse = fields.fields - return [ - multiMatch(fieldsToUse, relation, terms.filter(term => !term.startsWith('^'))), - ...(terms.filter(term => term.startsWith('^')).map(term => phrasePrefixQuery(fieldsToUse, term.slice(1)))) - ].filter(q => q) +function adjEqQueries ({ fields, relation, terms, term }) { + if (!['=', '==', 'adj'].includes(relation)) { return null } + const type = (relation === '==') ? 'exact' : 'phrase' + return matchTermWithFields(fields, term, type) } -function adjEqQueries ({ fields, relation, terms, term }) { - if (!['=', 'adj'].includes(relation)) { return [] } - const fieldsToUse = fields.fields - return [ - term.startsWith('^') - ? phrasePrefixQuery(fieldsToUse, term.slice(1)) - : phraseQuery(fieldsToUse, term) - ].filter(q => q) +const table = { + exact: { term: 'term', prefix: 'prefix', fields: 'X', exact_fields: 'term' }, + prefix: { term: 'prefix', prefix: 'prefix', fields: 'X', exact_fields: 'prefix' }, + basic: { term: 'term', prefix: 'prefix', fields: 'multi_match', exact_fields: 'X' } +} + +const selectFields = (queryType, fields) => (selector) => { + return Object.entries(fields) + .filter(([fieldType, fieldNames]) => { + return table[queryType][fieldType] === selector + }) + .map(([fieldType, fieldNames]) => fieldNames) + .flat() +} + +function matchTermWithFields (fields, term, type) { + const queryType = term[0] === '^' ? 'prefix' : (type === 'exact' ? 'exact' : 'basic') + if (term[0] === '^') term = term.slice(1) + + const selector = selectFields(queryType, fields) + + const queries = [ + ...multiMatch(selector('multi_match'), term, type), + ...(selector('term').map(termField => termQuery(termField, term))), + ...(selector('prefix').map(prefixField => prefixQuery(prefixField, term))) + ] + + return { + bool: { + should: queries + } + } } function dateQueries ({ fields, relation, terms, term }) { - if (!Object.values(fields).some(fieldType => fieldType.some(field => field.includes('date')))) { return [] } + if (!Object.values(fields).some(fieldType => fieldType.some(field => field.includes('date')))) { return null } let range switch (relation) { case '<': @@ -257,16 +281,6 @@ function dateQueries ({ fields, relation, terms, term }) { ] } -function prefixQueriesForPrefixFields ({ fields, relation, terms, term }) { - if (!fields.prefix) return [] - return fields.prefix.map(field => prefixQuery(field, term)) -} - -function termQueriesForTermFields ({ fields, relation, terms, term }) { - if (!fields.term) return [] - return fields.term.map(field => termQuery(field, term)) -} - function termQuery (field, term) { return { term: { [field]: term } } } @@ -275,38 +289,16 @@ function prefixQuery (field, term) { return { prefix: { [field]: term } } } -function multiMatch (fields, relation, terms) { - if (!fields) return - return { - multi_match: { - query: terms.join(' '), - fields, - type: 'cross_fields', - operator: relation === 'any' ? 'or' : 'and' - } - } -} - -function phrasePrefixQuery (fields, term) { - if (!fields) return - return { - multi_match: { - query: term, - fields, - type: 'phrase_prefix' - } - } -} +function multiMatch (fields, term, type) { + if (!fields || !fields.length) return [] -function phraseQuery (fields, term) { - if (!fields) return - return { + return [{ multi_match: { query: term, fields, - type: 'phrase' + type } - } + }] } module.exports = { @@ -315,5 +307,7 @@ module.exports = { buildBoolean, buildAtomic, buildAtomicMain, - nestedMapAndFilter + nestedMapAndFilter, + selectFields, + indexMapping } diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 3139eda9..cd55ec4a 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -22,7 +22,8 @@ const { dateAfterOrOnQuery, dateWithinQuery, dateEnclosesQuery, - filterQuery + filterQuery, + multiAdjQuery } = require('./fixtures/cql_fixtures') describe('CQL Query Builder', function () { @@ -40,6 +41,13 @@ describe('CQL Query Builder', function () { ) }) + it('Multi-word adj query', function () { + expect(buildEsQuery('title adj "Hamlet, Prince"')) + .to.deep.equal( + multiAdjQuery + ) + }) + it('Simple any query', function () { expect(buildEsQuery('title any "Hamlet Othello"')) .to.deep.equal( diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index 9c59d2d2..0ca5bae9 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -35,40 +35,10 @@ const simpleAdjQuery = { } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -77,7 +47,7 @@ const simpleAdjQuery = { } } -const prefixPhraseQuery = { +const multiAdjQuery = { bool: { should: [ { @@ -88,7 +58,7 @@ const prefixPhraseQuery = { should: [ { multi_match: { - query: 'The Tragedy of Hamlet, Prince of Denmark', + query: 'Hamlet, Prince', fields: [ 'title', 'title.folded', @@ -107,47 +77,97 @@ const prefixPhraseQuery = { 'formerTitle', 'addedAuthorTitle' ], - type: 'phrase_prefix' + type: 'phrase' } } ] } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'The Tragedy of Hamlet, Prince of Denmark', - fields: [], - type: 'phrase_prefix' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'The Tragedy of Hamlet, Prince of Denmark', - fields: [], - type: 'phrase_prefix' - } - } - ] + nested: { path: 'holdings', query: { bool: { should: [] } } } + } + ] + } + } + ] + } +} + +const prefixPhraseQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + prefix: { + 'title.keywordLowercasedStripped': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'seriesStatement.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'titleAlt.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'donor.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'parallelTitleAlt.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'parallelSeriesStatement.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'parallelCreatorLiteral.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'uniformTitle.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'parallelUniformTitle.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'addedAuthorTitle.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + placeOfPublication: 'The Tragedy of Hamlet, Prince of Denmark' + } } - } + ] } + }, + { + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -166,28 +186,63 @@ const simpleAnyQuery = { bool: { should: [ { - multi_match: { - query: 'Hamlet Othello', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'cross_fields', - operator: 'or' + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + }, + { + bool: { + should: [ + { + multi_match: { + query: 'Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] } } ] @@ -199,14 +254,8 @@ const simpleAnyQuery = { query: { bool: { should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'or' - } - } + { bool: { should: [] } }, + { bool: { should: [] } } ] } } @@ -218,20 +267,13 @@ const simpleAnyQuery = { query: { bool: { should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'or' - } - } + { bool: { should: [] } }, + { bool: { should: [] } } ] } } } } - ] } } @@ -249,76 +291,127 @@ const anyWithPrefixQuery = { bool: { should: [ { - multi_match: { - query: 'Hamlet Othello', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'cross_fields', - operator: 'or' + bool: { + should: [ + { + prefix: { + 'title.keywordLowercasedStripped': 'Tragedy' + } + }, + { + prefix: { 'seriesStatement.raw': 'Tragedy' } + }, + { prefix: { 'titleAlt.raw': 'Tragedy' } }, + { prefix: { 'donor.raw': 'Tragedy' } }, + { + prefix: { 'parallelTitleAlt.raw': 'Tragedy' } + }, + { + prefix: { 'parallelSeriesStatement.raw': 'Tragedy' } + }, + { + prefix: { 'parallelCreatorLiteral.raw': 'Tragedy' } + }, + { prefix: { 'uniformTitle.raw': 'Tragedy' } }, + { + prefix: { 'parallelUniformTitle.raw': 'Tragedy' } + }, + { + prefix: { 'addedAuthorTitle.raw': 'Tragedy' } + }, + { prefix: { placeOfPublication: 'Tragedy' } } + ] } }, { - multi_match: { - query: 'Tragedy', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'phrase_prefix' + bool: { + should: [ + { + prefix: { + 'title.keywordLowercasedStripped': 'Comedy' + } + }, + { prefix: { 'seriesStatement.raw': 'Comedy' } }, + { prefix: { 'titleAlt.raw': 'Comedy' } }, + { prefix: { 'donor.raw': 'Comedy' } }, + { + prefix: { 'parallelTitleAlt.raw': 'Comedy' } + }, + { + prefix: { 'parallelSeriesStatement.raw': 'Comedy' } + }, + { + prefix: { 'parallelCreatorLiteral.raw': 'Comedy' } + }, + { prefix: { 'uniformTitle.raw': 'Comedy' } }, + { + prefix: { 'parallelUniformTitle.raw': 'Comedy' } + }, + { + prefix: { 'addedAuthorTitle.raw': 'Comedy' } + }, + { prefix: { placeOfPublication: 'Comedy' } } + ] } }, { - multi_match: { - query: 'Comedy', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'phrase_prefix' + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + }, + { + bool: { + should: [ + { + multi_match: { + query: 'Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] } } ] @@ -330,28 +423,10 @@ const anyWithPrefixQuery = { query: { bool: { should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'or' - } - }, - { - multi_match: { - query: 'Tragedy', - fields: [], - type: 'phrase_prefix' - } - }, - { - multi_match: { - query: 'Comedy', - fields: [], - type: 'phrase_prefix' - } - } + { bool: { should: [] } }, + { bool: { should: [] } }, + { bool: { should: [] } }, + { bool: { should: [] } } ] } } @@ -363,28 +438,10 @@ const anyWithPrefixQuery = { query: { bool: { should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'or' - } - }, - { - multi_match: { - query: 'Tragedy', - fields: [], - type: 'phrase_prefix' - } - }, - { - multi_match: { - query: 'Comedy', - fields: [], - type: 'phrase_prefix' - } - } + { bool: { should: [] } }, + { bool: { should: [] } }, + { bool: { should: [] } }, + { bool: { should: [] } } ] } } @@ -405,30 +462,65 @@ const simpleAllQuery = { should: [ { bool: { - should: [ + must: [ { - multi_match: { - query: 'Hamlet Othello', - fields: [ - 'title', - 'title.folded', - 'titleAlt.folded', - 'uniformTitle.folded', - 'titleDisplay.folded', - 'seriesStatement.folded', - 'contentsTitle.folded', - 'donor.folded', - 'parallelTitle.folded', - 'parallelTitleDisplay.folded', - 'parallelSeriesStatement.folded', - 'parallelTitleAlt.folded', - 'parallelCreatorLiteral.folded', - 'parallelUniformTitle', - 'formerTitle', - 'addedAuthorTitle' - ], - type: 'cross_fields', - operator: 'and' + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + }, + { + bool: { + should: [ + { + multi_match: { + query: 'Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] } } ] @@ -439,15 +531,9 @@ const simpleAllQuery = { path: 'items', query: { bool: { - should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'and' - } - } + must: [ + { bool: { should: [] } }, + { bool: { should: [] } } ] } } @@ -458,15 +544,9 @@ const simpleAllQuery = { path: 'holdings', query: { bool: { - should: [ - { - multi_match: { - query: 'Hamlet Othello', - fields: [], - type: 'cross_fields', - operator: 'and' - } - } + must: [ + { bool: { should: [] } }, + { bool: { should: [] } } ] } } @@ -533,38 +613,13 @@ const keywordQueryForBarcode = { path: 'items', query: { bool: { - should: [ - { - multi_match: { - query: '123456', - fields: [ - 'items.idBarcode' - ], - type: 'phrase' - } - } - ] + should: [{ term: { 'items.idBarcode': '123456' } }] } } } }, { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: '123456', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -631,9 +686,7 @@ const keywordQueryForShelfMark = { { multi_match: { query: 'B 12', - fields: [ - 'items.shelfMark' - ], + fields: ['items.shelfMark'], type: 'phrase' } } @@ -643,22 +696,7 @@ const keywordQueryForShelfMark = { } }, { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'B 12', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -713,44 +751,14 @@ const keywordQueryForGeneralTerm = { type: 'phrase' } } - ] - } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet', - fields: [], - type: 'phrase' - } - } - ] - } - } + ] } }, { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Hamlet', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -768,26 +776,10 @@ const identifierQuery = { { bool: { should: [ - { - term: { - uri: 'b1234' - } - }, - { - term: { - 'idIsbn.clean': 'b1234' - } - }, - { - term: { - 'idIssn.clean': 'b1234' - } - }, - { - prefix: { - 'identifierV2.value': 'b1234' - } - } + { term: { uri: 'b1234' } }, + { term: { 'idIsbn.clean': 'b1234' } }, + { term: { 'idIssn.clean': 'b1234' } }, + { prefix: { 'identifierV2.value': 'b1234' } } ] } }, @@ -797,11 +789,7 @@ const identifierQuery = { query: { bool: { should: [ - { - term: { - 'items.idBarcode': 'b1234' - } - }, + { term: { 'items.idBarcode': 'b1234' } }, { prefix: { 'items.shelfMark.keywordLowercased': 'b1234' @@ -813,14 +801,7 @@ const identifierQuery = { } }, { - nested: { - path: 'holdings', - query: { - bool: { - should: [] - } - } - } + nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -858,39 +839,12 @@ const binaryBooleanQuery = { } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -902,53 +856,18 @@ const binaryBooleanQuery = { { bool: { should: [ - { - multi_match: { - query: 'English', - fields: [ - 'language.id', - 'language.label' - ], - type: 'phrase' - } - } + { term: { 'language.id': 'English' } }, + { term: { 'language.label': 'English' } } ] } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -995,37 +914,13 @@ const ternaryBooleanQuery = { { nested: { path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -1037,53 +932,21 @@ const ternaryBooleanQuery = { { bool: { should: [ - { - multi_match: { - query: 'English', - fields: [ - 'language.id', - 'language.label' - ], - type: 'phrase' - } - } + { term: { 'language.id': 'English' } }, + { term: { 'language.label': 'English' } } ] } }, { nested: { path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -1101,9 +964,7 @@ const ternaryBooleanQuery = { { multi_match: { query: 'tragedy', - fields: [ - 'genreForm.raw' - ], + fields: ['genreForm'], type: 'phrase' } } @@ -1111,39 +972,12 @@ const ternaryBooleanQuery = { } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'tragedy', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'tragedy', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -1185,39 +1019,12 @@ const queryWithParentheses = { } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -1232,53 +1039,21 @@ const queryWithParentheses = { { bool: { should: [ - { - multi_match: { - query: 'English', - fields: [ - 'language.id', - 'language.label' - ], - type: 'phrase' - } - } + { term: { 'language.id': 'English' } }, + { term: { 'language.label': 'English' } } ] } }, { nested: { path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -1293,9 +1068,7 @@ const queryWithParentheses = { { multi_match: { query: 'tragedy', - fields: [ - 'genreForm.raw' - ], + fields: ['genreForm'], type: 'phrase' } } @@ -1305,37 +1078,13 @@ const queryWithParentheses = { { nested: { path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'tragedy', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'tragedy', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -1380,39 +1129,12 @@ const negationQuery = { } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -1426,53 +1148,18 @@ const negationQuery = { { bool: { should: [ - { - multi_match: { - query: 'English', - fields: [ - 'language.id', - 'language.label' - ], - type: 'phrase' - } - } + { term: { 'language.id': 'English' } }, + { term: { 'language.label': 'English' } } ] } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { nested: { path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'English', - fields: [], - type: 'phrase' - } - } - ] - } - } + query: { bool: { should: [] } } } } ] @@ -1491,24 +1178,16 @@ const dateAfterQuery = { { bool: { should: [ - { - bool: { - should: [ - { - nested: { - path: 'dates', - query: { range: { 'dates.range': { gt: '1990' } } } - } - } - ] + [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { gt: '1990' } } } + } } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } - } + ], + { nested: { path: 'items', query: null } }, + { nested: { path: 'holdings', query: null } } ] } } @@ -1522,24 +1201,16 @@ const dateBeforeQuery = { { bool: { should: [ - { - bool: { - should: [ - { - nested: { - path: 'dates', - query: { range: { 'dates.range': { lt: '1990' } } } - } - } - ] + [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { lt: '1990' } } } + } } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } - } + ], + { nested: { path: 'items', query: null } }, + { nested: { path: 'holdings', query: null } } ] } } @@ -1553,24 +1224,16 @@ const dateBeforeOrOnQuery = { { bool: { should: [ - { - bool: { - should: [ - { - nested: { - path: 'dates', - query: { range: { 'dates.range': { lte: '1990' } } } - } - } - ] + [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { lte: '1990' } } } + } } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } - } + ], + { nested: { path: 'items', query: null } }, + { nested: { path: 'holdings', query: null } } ] } } @@ -1584,24 +1247,16 @@ const dateAfterOrOnQuery = { { bool: { should: [ - { - bool: { - should: [ - { - nested: { - path: 'dates', - query: { range: { 'dates.range': { gte: '1990' } } } - } - } - ] + [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { gte: '1990' } } } + } } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } - } + ], + { nested: { path: 'items', query: null } }, + { nested: { path: 'holdings', query: null } } ] } } @@ -1615,26 +1270,18 @@ const dateWithinQuery = { { bool: { should: [ - { - bool: { - should: [ - { - nested: { - path: 'dates', - query: { - range: { 'dates.range': { gte: '1990', lte: '2000' } } - } - } + [ + { + nested: { + path: 'dates', + query: { + range: { 'dates.range': { gte: '1990', lte: '2000' } } } - ] + } } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } - } + ], + { nested: { path: 'items', query: null } }, + { nested: { path: 'holdings', query: null } } ] } } @@ -1648,26 +1295,18 @@ const dateEnclosesQuery = { { bool: { should: [ - { - bool: { - should: [ - { - nested: { - path: 'dates', - query: { - range: { 'dates.range': { gt: '1990', lt: '2000' } } - } - } + [ + { + nested: { + path: 'dates', + query: { + range: { 'dates.range': { gt: '1990', lt: '2000' } } } - ] + } } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } - } + ], + { nested: { path: 'items', query: null } }, + { nested: { path: 'holdings', query: null } } ] } } @@ -1701,40 +1340,10 @@ const filterQuery = { } }, { - nested: { - path: 'items', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'items', query: { bool: { should: [] } } } }, { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { - multi_match: { - query: 'Shakespeare', - fields: [], - type: 'phrase' - } - } - ] - } - } - } + nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -1773,5 +1382,6 @@ module.exports = { dateAfterOrOnQuery, dateWithinQuery, dateEnclosesQuery, - filterQuery + filterQuery, + multiAdjQuery } From b2ebcabaf713206792c6d570a3166ada07d4e739 Mon Sep 17 00:00:00 2001 From: danamansana Date: Mon, 2 Mar 2026 16:25:19 -0500 Subject: [PATCH 31/54] Add exact match query --- lib/elasticsearch/cql_grammar.js | 2 +- lib/elasticsearch/cql_query_builder.js | 1 - test/cql_query_builder.test.js | 10 +++++- test/fixtures/cql_fixtures.js | 48 +++++++++++++++++++++++++- 4 files changed, 57 insertions(+), 4 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 9f9d7b14..65cda1fc 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -22,7 +22,7 @@ const leftCql = ` scope ::= scope_term whitespace | scope_term relation ::= relation_term whitespace | relation_term scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" - relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "=" | "==" | "within" | "encloses" + relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "==" | "=" | "within" | "encloses" quoted_term ::= quote phrase quote phrase ::= phrase whitespace word | word whitespace ::= [#x20#x09#x0A#x0D]+ diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 8555738f..fa28f9e1 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -185,7 +185,6 @@ function buildAtomicNested (name, { fields, relation, terms, term }) { - put all terms in term matches with term fields */ function buildAtomicMain ({ fields, relation, terms, term }) { - console.log('building atomic main ', relation, terms, term) return anyAllQueries({ fields, relation, terms }) || adjEqQueries({ fields, relation, terms, term }) || dateQueries({ fields, relation, terms, term }) diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index cd55ec4a..cf856b5c 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -23,7 +23,8 @@ const { dateWithinQuery, dateEnclosesQuery, filterQuery, - multiAdjQuery + multiAdjQuery, + exactMatchQuery } = require('./fixtures/cql_fixtures') describe('CQL Query Builder', function () { @@ -188,4 +189,11 @@ describe('CQL Query Builder', function () { filterQuery ) }) + + it('Exact match query', function () { + expect(buildEsQuery('author == "William Shakespeare"')) + .to.deep.equal( + exactMatchQuery + ) + }) }) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index 0ca5bae9..555e0a45 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -1362,6 +1362,51 @@ const filterQuery = { } } +const exactMatchQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + term: { + 'creatorLiteral.keywordLowercased': 'William Shakespeare' + } + }, + { + term: { + 'contributorLiteral.keywordLowercased': 'William Shakespeare' + } + }, + { + term: { + 'parallelCreatorLiteral.raw': 'William Shakespeare' + } + }, + { + term: { + 'parallelContributorLiteral.raw': 'William Shakespeare' + } + } + ] + } + }, + { + nested: { path: 'items', query: { bool: { should: [] } } } + }, + { + nested: { path: 'holdings', query: { bool: { should: [] } } } + } + ] + } + } + ] + } +} + module.exports = { simpleAdjQuery, simpleAnyQuery, @@ -1383,5 +1428,6 @@ module.exports = { dateWithinQuery, dateEnclosesQuery, filterQuery, - multiAdjQuery + multiAdjQuery, + exactMatchQuery } From 98321abe433a3481f8bac9dd84fa895e4c52801a Mon Sep 17 00:00:00 2001 From: Ian O'Connor Date: Tue, 10 Mar 2026 11:16:30 -0400 Subject: [PATCH 32/54] Update to latest prod resource index --- config/production.env | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/production.env b/config/production.env index cd7e9955..2635bc6a 100644 --- a/config/production.env +++ b/config/production.env @@ -1,5 +1,5 @@ ENCRYPTED_ELASTICSEARCH_URI=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAJYwgZMGCSqGSIb3DQEHBqCBhTCBggIBADB9BgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDFWw8ECX9Pz81z0kvAIBEIBQGec9PCpwuvEgLH6imhqP6tx1fj8Vlf2ZipnUy06jzmpE262Qvk9LPAq7sIYPVkTCZctwilwcU9oC6yxasVoUlK87la77v03CeZsPIDwciFY= -ENCRYPTED_RESOURCES_INDEX=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAHIwcAYJKoZIhvcNAQcGoGMwYQIBADBcBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDGw+4ALa5eFYzC4SWwIBEIAvOVuHGgflzmWLE6GBauPAiSQLmL4xaSfBFED+zyGnQ+jV0KS64aN45ZFR0xgaZS0= +ENCRYPTED_RESOURCES_INDEX=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAHcwdQYJKoZIhvcNAQcGoGgwZgIBADBhBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDLHxxMobbmmkVc76dQIBEIA0aZV7enzAKvaDMt7lJik1Ps8J6wikj4dJTL6YzjIlpRh8Dsl7tYgpEgsTNVqA7JRhErUT9w== ENCRYPTED_ELASTICSEARCH_API_KEY=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAJ4wgZsGCSqGSIb3DQEHBqCBjTCBigIBADCBhAYJKoZIhvcNAQcBMB4GCWCGSAFlAwQBLjARBAyPOPaQCBbvKQhJoPQCARCAV2TlWlRh+xKnCegpprEQgfldZGcVW48RND0LVd/pQpVTJnRTtbCpP7damT7k8ziJVdWZ3jsfs5fw5YnKc/EIQ1M//DRUzOJL98ir5LTTxE7QhflKDtUY+Q== ENCRYPTED_SCSB_URL=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAHwwegYJKoZIhvcNAQcGoG0wawIBADBmBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDKPFC8wFkVM5CyT6VQIBEIA5m4eLBkpChRA//ZNEWsRqIDGZmevb/thzI03a0NiAW6VfybSAYpFthh+bj/yAk1VEEBF6r1T4A2GP From 891e41d9e637cd80ce4c89651b5ea4f7b08353dd Mon Sep 17 00:00:00 2001 From: Ian O'Connor Date: Wed, 11 Mar 2026 16:49:50 -0400 Subject: [PATCH 33/54] Support for _displayPacked fields (contributors and creators) --- lib/contributors.js | 6 +++--- lib/display-field-unpacker.js | 25 ++++++++++++++++++++++ lib/elasticsearch/elastic-query-builder.js | 2 +- lib/response_massager.js | 4 ++++ test/display-field-unpacker.test.js | 22 +++++++++++++++++++ test/elastic-query-builder.test.js | 2 +- test/fixtures/packed-display-response.json | 18 ++++++++++++++++ 7 files changed, 74 insertions(+), 5 deletions(-) create mode 100644 lib/display-field-unpacker.js create mode 100644 test/display-field-unpacker.test.js create mode 100644 test/fixtures/packed-display-response.json diff --git a/lib/contributors.js b/lib/contributors.js index 0e7321de..4ccf874c 100644 --- a/lib/contributors.js +++ b/lib/contributors.js @@ -100,21 +100,21 @@ module.exports = function (app, _private = null) { } /** - * Builds an aggregation query that checks the resource index for counts on the contributorRoleLiteral field for a list of contributors. + * Builds an aggregation query that checks the resource index for counts on the browseableContributorRole_packed field for a list of contributors. */ const buildElasticRoleCountQuery = function (contributorList) { return { size: 0, query: { terms: { - contributorRoleLiteral: contributorList + browseableContributorRole_packed: contributorList } }, aggs: { contributor_role: { terms: { script: { - source: 'def results = []; for (val in doc["contributorRoleLiteral"]) { int pos = val.indexOf("||"); if (pos != -1) { String name = val.substring(0, pos); if (params.targets.contains(name)) { results.add(val); } } } return results;', + source: 'def results = []; for (val in doc["browseableContributorRole_packed"]) { int pos = val.indexOf("||"); if (pos != -1) { String name = val.substring(0, pos); if (params.targets.contains(name)) { results.add(val); } } } return results;', params: { targets: contributorList } diff --git a/lib/display-field-unpacker.js b/lib/display-field-unpacker.js new file mode 100644 index 00000000..5ae5b26e --- /dev/null +++ b/lib/display-field-unpacker.js @@ -0,0 +1,25 @@ +const parseValueAndLabel = (delimitedString) => { + if (!delimitedString.includes('||')) { + return { value: delimitedString, display: null } + } + const [value, display] = delimitedString.split('||') + return { value, display } +} + +module.exports = (elasticSearchResponse) => { + elasticSearchResponse.hits.hits.forEach((bib) => { + // Contributors and creators are packed like so || where + // can have prefix, title, and roles. We'd like to unpack them in a friendly format for the frontend + // to display the full label and use the isolated name for link-building + Object.entries(bib._source).forEach(([key, value]) => { + if (key.endsWith('_displayPacked')) { + const fieldName = key.replace('_displayPacked', '') + bib._source[fieldName + 'Display'] = value.map((packedValue) => parseValueAndLabel(packedValue)) + delete bib._source[key] + } + }) + + return bib + }) + return elasticSearchResponse +} diff --git a/lib/elasticsearch/elastic-query-builder.js b/lib/elasticsearch/elastic-query-builder.js index 70efa58f..0b4452bc 100644 --- a/lib/elasticsearch/elastic-query-builder.js +++ b/lib/elasticsearch/elastic-query-builder.js @@ -181,7 +181,7 @@ class ElasticQueryBuilder { * Concat contributor + role if role param is provided */ applyContributorRole () { - this.query.addMust(termMatch('contributorRoleLiteral', this.request.params.filters.contributorLiteral + '||' + this.request.params.role)) + this.query.addMust(termMatch('browseableContributorRole_packed', this.request.params.filters.contributorLiteral + '||' + this.request.params.role)) } /** diff --git a/lib/response_massager.js b/lib/response_massager.js index a1eb17ec..034580a5 100644 --- a/lib/response_massager.js +++ b/lib/response_massager.js @@ -1,6 +1,7 @@ const LocationLabelUpdater = require('./location_label_updater') const AvailabilityResolver = require('./availability_resolver.js') const parallelFieldsExtractor = require('./parallel-fields-extractor') +const displayFieldUnpacker = require('./display-field-unpacker') const { isAeonUrl, sortOnPropWithUndefinedLast } = require('../lib/util') const FulfillmentResolver = require('./fulfillment_resolver') const fixItemRequestability = require('./requestability_resolver') @@ -73,6 +74,9 @@ class ResponseMassager { // Rename parallel fields: response = parallelFieldsExtractor(response) + // Extract display values and labels from packed fields + response = displayFieldUnpacker(response) + // Update ES response with updated availability from SCSB: const updatedWithAvailability = (new AvailabilityResolver(response)) .responseWithUpdatedAvailability(options) diff --git a/test/display-field-unpacker.test.js b/test/display-field-unpacker.test.js new file mode 100644 index 00000000..3f768393 --- /dev/null +++ b/test/display-field-unpacker.test.js @@ -0,0 +1,22 @@ +const { expect } = require('chai') +const displayFieldsUnpacker = require('../lib/display-field-unpacker') +const packedDisplayBib = require('./fixtures/packed-display-response.json') + +describe('Display field unpacker', () => { + describe('When a bib has a packed display property', () => { + it('adds each of the items in that array as unpacked objects', () => { + const displayFieldsUnpacked = displayFieldsUnpacker(packedDisplayBib).hits.hits[0]._source + expect(Object.keys(displayFieldsUnpacked).length).to.equal(2) + expect(displayFieldsUnpacked).to.deep.equal({ + testDisplay: [ + { value: 'someValue', display: 'someDisplay' }, + { value: 'someValueB', display: 'someDisplayB' }, + { value: 'someValueC', display: null } + ], + testOtherDisplay: [ + { value: 'otherValue', display: 'otherDisplay' } + ] + }) + }) + }) +}) diff --git a/test/elastic-query-builder.test.js b/test/elastic-query-builder.test.js index 48c08440..062a9ec0 100644 --- a/test/elastic-query-builder.test.js +++ b/test/elastic-query-builder.test.js @@ -272,7 +272,7 @@ describe('ElasticQueryBuilder', () => { const inst = ElasticQueryBuilder.forApiRequest(request) expect(inst.query.toJson()).to.nested - .include({ 'bool.must[0].term.contributorRoleLiteral.value': 'Patinkin, Mandy||performer.' }) + .include({ 'bool.must[0].term.browseableContributorRole_packed.value': 'Patinkin, Mandy||performer.' }) }) }) diff --git a/test/fixtures/packed-display-response.json b/test/fixtures/packed-display-response.json new file mode 100644 index 00000000..3a304c50 --- /dev/null +++ b/test/fixtures/packed-display-response.json @@ -0,0 +1,18 @@ +{ + "hits": { + "hits": [ + { + "_source": { + "test_displayPacked": [ + "someValue||someDisplay", + "someValueB||someDisplayB", + "someValueC" + ], + "testOther_displayPacked": [ + "otherValue||otherDisplay" + ] + } + } + ] + } +} From 3dcbf335834f95f818cf3266b35cbc5cbff9faee Mon Sep 17 00:00:00 2001 From: Vera Kahn Date: Thu, 12 Mar 2026 10:20:01 -0400 Subject: [PATCH 34/54] refactor --- .github/workflows/test-and-deploy.yml | 133 ++++---------------------- 1 file changed, 20 insertions(+), 113 deletions(-) diff --git a/.github/workflows/test-and-deploy.yml b/.github/workflows/test-and-deploy.yml index c9306ac6..a179ba4b 100644 --- a/.github/workflows/test-and-deploy.yml +++ b/.github/workflows/test-and-deploy.yml @@ -2,6 +2,11 @@ name: Unit Tests on: [push] +# Global environment variable based on the branch name +env: + ENV_TAG: ${{ github.ref_name }} + ECR_REPOSITORY: discovery-api + jobs: tests: runs-on: ubuntu-latest @@ -15,114 +20,15 @@ jobs: run: npm ci - name: Unit Tests run: npm test - integration-test-qa: - permissions: - id-token: write - contents: read - runs-on: ubuntu-latest - needs: tests - if: github.ref == 'refs/heads/qa' - steps: - - uses: actions/checkout@v4 - - name: Set Node version - uses: actions/setup-node@v4 - with: - node-version-file: '.nvmrc' - - name: Install dependencies - run: npm ci - - name: Start service - run: ENV=qa npm start & - - name: Run tests - run: npm run test-integration - deploy-qa: - permissions: - id-token: write - contents: read - runs-on: ubuntu-latest - needs: - - tests - if: github.ref == 'refs/heads/qa' - steps: - - name: Checkout repo - uses: actions/checkout@v3 - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v2 - with: - role-to-assume: arn:aws:iam::946183545209:role/GithubActionsDeployerRole - aws-region: us-east-1 - - name: Log in to ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - - name: Back up previous image for rollback - env: - ECR_REPOSITORY: discovery-api - run: | - MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="qa-latest" --output json | jq --raw-output --join-output '.images[0].imageManifest') - PREVIOUS_MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="qa-previous" --output json | jq --raw-output --join-output '.images[0].imageManifest') - if [ "$MANIFEST" != "$PREVIOUS_MANIFEST" ]; then aws ecr put-image --repository-name $ECR_REPOSITORY --image-tag "qa-previous" --image-manifest "$MANIFEST"; fi - - name: Build, tag, and push image to Amazon ECR - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: discovery-api - IMAGE_TAG: ${{ github.sha }} - run: | - docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG - docker tag $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG $ECR_REGISTRY/$ECR_REPOSITORY:qa-latest - docker push $ECR_REGISTRY/$ECR_REPOSITORY:qa-latest - - name: Force ECS Update - run: | - aws ecs update-service --cluster discovery-api-qa --service discovery-api-qa --force-new-deployment - deploy-qa2: - permissions: - id-token: write - contents: read - runs-on: ubuntu-latest - needs: - - tests - if: github.ref == 'refs/heads/qa2' - steps: - - name: Checkout repo - uses: actions/checkout@v3 - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v2 - with: - role-to-assume: arn:aws:iam::946183545209:role/GithubActionsDeployerRole - aws-region: us-east-1 - - - name: Log in to ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - - name: Back up previous image for rollback - env: - ECR_REPOSITORY: discovery-api - run: | - MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="qa2-latest" --output json | jq --raw-output --join-output '.images[0].imageManifest') - PREVIOUS_MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="qa2-previous" --output json | jq --raw-output --join-output '.images[0].imageManifest') - if [ "$MANIFEST" != "$PREVIOUS_MANIFEST" ]; then aws ecr put-image --repository-name $ECR_REPOSITORY --image-tag "qa2-previous" --image-manifest "$MANIFEST"; fi - - name: Build, tag, and push image to Amazon ECR - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: discovery-api - IMAGE_TAG: ${{ github.sha }} - run: | - docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG - docker tag $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG $ECR_REGISTRY/$ECR_REPOSITORY:qa2-latest - docker push $ECR_REGISTRY/$ECR_REPOSITORY:qa2-latest - - - name: Force ECS Update - run: | - aws ecs update-service --cluster discovery-api-qa2 --service discovery-api-qa2 --force-new-deployment - deploy-production: + deploy: permissions: id-token: write contents: read runs-on: ubuntu-latest needs: tests - if: github.ref == 'refs/heads/production' + # Only run if it's one of our three deployment branches + if: contains(fromJSON('["qa", "qa2", "production"]'), github.ref_name) steps: - name: Checkout repo uses: actions/checkout@v3 @@ -138,22 +44,23 @@ jobs: uses: aws-actions/amazon-ecr-login@v1 - name: Back up previous image for rollback - env: - ECR_REPOSITORY: discovery-api run: | - MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="production-latest" --output json | jq --raw-output --join-output '.images[0].imageManifest') - PREVIOUS_MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="production-previous" --output json | jq --raw-output --join-output '.images[0].imageManifest') - if [ "$MANIFEST" != "$PREVIOUS_MANIFEST" ]; then aws ecr put-image --repository-name $ECR_REPOSITORY --image-tag "production-previous" --image-manifest "$MANIFEST"; fi + MANIFEST=$(aws ecr batch-get-image --repository-name ${{ env.ECR_REPOSITORY }} --image-ids imageTag="${{ env.ENV_TAG }}-latest" --output json | jq --raw-output --join-output '.images[0].imageManifest') + PREVIOUS_MANIFEST=$(aws ecr batch-get-image --repository-name ${{ env.ECR_REPOSITORY }} --image-ids imageTag="${{ env.ENV_TAG }}-previous" --output json | jq --raw-output --join-output '.images[0].imageManifest') + if [ "$MANIFEST" != "$PREVIOUS_MANIFEST" ]; then + aws ecr put-image --repository-name ${{ env.ECR_REPOSITORY }} --image-tag "${{ env.ENV_TAG }}-previous" --image-manifest "$MANIFEST" + fi + - name: Build, tag, and push image to Amazon ECR env: ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: discovery-api IMAGE_TAG: ${{ github.sha }} run: | - docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG - docker tag $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG $ECR_REGISTRY/$ECR_REPOSITORY:production-latest - docker push $ECR_REGISTRY/$ECR_REPOSITORY:production-latest + docker build -t $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:$IMAGE_TAG . + docker push $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:$IMAGE_TAG + docker tag $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:$IMAGE_TAG $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:${{ env.ENV_TAG }}-latest + docker push $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:${{ env.ENV_TAG }}-latest + - name: Force ECS Update run: | - aws ecs update-service --cluster discovery-api-production --service discovery-api-production --force-new-deployment + aws ecs update-service --cluster discovery-api-${{ env.ENV_TAG }} --service discovery-api-${{ env.ENV_TAG }} --force-new-deployment \ No newline at end of file From 643b8df9855904f87154e16f1793e2dde465973c Mon Sep 17 00:00:00 2001 From: Vera Kahn Date: Thu, 12 Mar 2026 14:16:29 -0400 Subject: [PATCH 35/54] fix filter --- lib/elasticsearch/config.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/elasticsearch/config.js b/lib/elasticsearch/config.js index a7ae053d..bc9f9ec7 100644 --- a/lib/elasticsearch/config.js +++ b/lib/elasticsearch/config.js @@ -89,7 +89,7 @@ const FILTER_CONFIG = { mediaType: { operator: 'match', field: ['mediaType.id', 'mediaType.label'], repeatable: true }, carrierType: { operator: 'match', field: ['carrierType.id', 'carrierType.label'], repeatable: true }, publisher: { operator: 'match', field: ['publisherLiteral.raw'], repeatable: true }, - contributorLiteral: { operator: 'match', field: ['contributorLiteral.keywordLowercased', 'parallelContributor.raw', 'creatorLiteral.keywordLowercased', 'parallelCreatorLiteral.raw'], repeatable: true }, + contributorLiteral: { operator: 'match', field: ['contributorLiteral.keywordLowercased', 'parallelContributorLiteral.raw', 'creatorLiteral.keywordLowercased', 'parallelCreatorLiteral.raw'], repeatable: true }, creatorLiteral: { operator: 'match', field: ['creatorLiteral.raw', 'parallelCreatorLiteral.raw'], repeatable: true }, issuance: { operator: 'match', field: ['issuance.id', 'issuance.label'], repeatable: true }, createdYear: { operator: 'match', field: ['createdYear'], repeatable: true }, From d4b31bcbfdeb4321725a5ffd532aa08d71884274 Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Thu, 12 Mar 2026 16:30:51 -0400 Subject: [PATCH 36/54] Bump core to 2.39 --- config/production.env | 2 +- config/qa.env | 2 +- config/test.env | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/production.env b/config/production.env index cd7e9955..6fe4c6b2 100644 --- a/config/production.env +++ b/config/production.env @@ -10,7 +10,7 @@ NYPL_OAUTH_URL=https://isso.nypl.org/ ENCRYPTED_NYPL_OAUTH_ID=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGswaQYJKoZIhvcNAQcGoFwwWgIBADBVBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDMLKVUQA58B6vprNcAIBEIAoaz0lI9EL2M9NyTuEwT8JDmPBt6aXfMiFs027DEuwsCN0wS0qWeFL1g== ENCRYPTED_NYPL_OAUTH_SECRET=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAIcwgYQGCSqGSIb3DQEHBqB3MHUCAQAwcAYJKoZIhvcNAQcBMB4GCWCGSAFlAwQBLjARBAyWz91LOP2YP5fg0q0CARCAQ9inO9SV1M8R0Pkkx84r7UdwlU1FxfXvIjk/z6Qs81KBAVELhby2iD5LawQyDrR9tjhuMbotS6QnydwwMR/p8+qJXHI= -NYPL_CORE_VERSION=v2.35 +NYPL_CORE_VERSION=v2.39 LOG_LEVEL=info FEATURES=on-site-edd diff --git a/config/qa.env b/config/qa.env index 627e25b8..05a98812 100644 --- a/config/qa.env +++ b/config/qa.env @@ -12,7 +12,7 @@ NYPL_OAUTH_URL=https://isso.nypl.org/ ENCRYPTED_NYPL_OAUTH_ID=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGswaQYJKoZIhvcNAQcGoFwwWgIBADBVBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDMLKVUQA58B6vprNcAIBEIAoaz0lI9EL2M9NyTuEwT8JDmPBt6aXfMiFs027DEuwsCN0wS0qWeFL1g== ENCRYPTED_NYPL_OAUTH_SECRET=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAIcwgYQGCSqGSIb3DQEHBqB3MHUCAQAwcAYJKoZIhvcNAQcBMB4GCWCGSAFlAwQBLjARBAyWz91LOP2YP5fg0q0CARCAQ9inO9SV1M8R0Pkkx84r7UdwlU1FxfXvIjk/z6Qs81KBAVELhby2iD5LawQyDrR9tjhuMbotS6QnydwwMR/p8+qJXHI= -NYPL_CORE_VERSION=v2.35 +NYPL_CORE_VERSION=v2.39 LOG_LEVEL=debug FEATURES=on-site-edd diff --git a/config/test.env b/config/test.env index 3883094a..9267f131 100644 --- a/config/test.env +++ b/config/test.env @@ -10,7 +10,7 @@ NYPL_OAUTH_URL=http://oauth.example.com NYPL_OAUTH_ID=encrypted-nypl-oauth-id NYPL_OAUTH_SECRET=encrypted-nypl-oauth-id -NYPL_CORE_VERSION=v2.37 +NYPL_CORE_VERSION=v2.39 LOG_LEVEL=error FEATURES=on-site-edd From 220f3363ed7a111dcf32f2d7054a5ab251e71a13 Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Thu, 12 Mar 2026 16:31:06 -0400 Subject: [PATCH 37/54] Add collection access type to item holding location, plus test --- lib/jsonld_serializers.js | 18 ++++++++++++++++++ test/item-resource-serializer.test.js | 14 +++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/lib/jsonld_serializers.js b/lib/jsonld_serializers.js index 31081f7e..300d42bd 100644 --- a/lib/jsonld_serializers.js +++ b/lib/jsonld_serializers.js @@ -374,10 +374,15 @@ class ItemResourceSerializer extends JsonLdItemSerializer { }) } + if (this.body.holdingLocation) { + stmts.holdingLocation = ItemResourceSerializer.getFormattedHoldingLocation(this.body.holdingLocation) + } + // Override default serialization of item.electronicLocator statements (full digital surrogates): if (this.body.electronicLocator) { stmts.electronicLocator = this.body.electronicLocator.map((link) => ResourceSerializer.formatElectronicResourceBlankNode(link, 'nypl:ElectronicLocation')) } + return stmts } @@ -386,6 +391,19 @@ class ItemResourceSerializer extends JsonLdItemSerializer { return (new ItemResourceSerializer(resp, options)).format() } + static getFormattedHoldingLocation (location) { + const loc = location[0] + const locationId = loc['@id'].split(':')[1] + // Add collection access type from corresponding Core location + const collectionAccessType = + nyplCore.sierraLocations()[locationId]?.collectionAccessType + return [{ + '@id': loc['@id'], + prefLabel: loc.prefLabel, + collectionAccessType + }] + } + // Given an item, returns item with an added `identifier` // of form 'urn:[sourceIdentifierPrefix]:[sourceIdentifier]' // e.g. diff --git a/test/item-resource-serializer.test.js b/test/item-resource-serializer.test.js index 57429231..3387c8c8 100644 --- a/test/item-resource-serializer.test.js +++ b/test/item-resource-serializer.test.js @@ -60,7 +60,19 @@ describe('ItemResourceSerializer', () => { expect(doc.idNyplSourceId['@value']).to.eq('9876543210') }) }) - + describe('getFormattedHoldingLocation', () => { + it('should return holding location with id, label, and collection access type', () => { + const locationEntity = ItemResourceSerializer.getFormattedHoldingLocation([ + { + '@id': 'loc:maff1', + prefLabel: 'Schwarzman Building - Dorot Jewish Division Reference Room 111' + } + ]) + expect(locationEntity[0].prefLabel).to.equal('Schwarzman Building - Dorot Jewish Division Reference Room 111') + expect(locationEntity[0]['@id']).to.equal('loc:maff1') + expect(locationEntity[0].collectionAccessType).to.equal('shelf') + }) + }) describe('addSourceIdentifier', () => { it('adds source identifier for NYPL', async () => { const item = { uri: 'i1234' } From c5d482d4d7cfdd94d5674d41a1c689a01a98b9c2 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 13 Mar 2026 13:08:25 -0400 Subject: [PATCH 38/54] Add shelfmark sorting for callnumber searches --- lib/resources.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/resources.js b/lib/resources.js index 462ae779..3c9cccf6 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -46,6 +46,10 @@ const SORT_FIELDS = { initialDirection: 'asc', field: 'creator_sort' }, + callnumber: { + initialDirection: 'asc', + field: 'shelfMark.keywordLowercased' + }, relevance: {} } From 36e630203776e16104c113d44da10e4cbad21420 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 13 Mar 2026 15:10:51 -0400 Subject: [PATCH 39/54] Fixes in response to PR comments --- lib/elasticsearch/config.js | 2 +- lib/elasticsearch/cql/mapping-from-es.json | 1120 -------------------- lib/elasticsearch/cql_grammar.js | 13 +- lib/elasticsearch/cql_query_builder.js | 52 +- lib/resources.js | 1 - 5 files changed, 49 insertions(+), 1139 deletions(-) delete mode 100644 lib/elasticsearch/cql/mapping-from-es.json diff --git a/lib/elasticsearch/config.js b/lib/elasticsearch/config.js index 494dbb15..806ed729 100644 --- a/lib/elasticsearch/config.js +++ b/lib/elasticsearch/config.js @@ -74,7 +74,7 @@ const SEARCH_SCOPES = { standard_number: { // We do custom field matching for this search-scope }, - cql: {} + cql: {} // see cql/index_mapping for this search scope } const FILTER_CONFIG = { diff --git a/lib/elasticsearch/cql/mapping-from-es.json b/lib/elasticsearch/cql/mapping-from-es.json deleted file mode 100644 index 614a62e3..00000000 --- a/lib/elasticsearch/cql/mapping-from-es.json +++ /dev/null @@ -1,1120 +0,0 @@ -{ - "resources-2025-07-07": { - "mappings": { - "dynamic": "strict", - "properties": { - "addedAuthorTitle": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "buildingLocationIds": { - "type": "keyword", - "eager_global_ordinals": true - }, - "carrierType": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "carrierType_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "collectionIds": { - "type": "keyword", - "eager_global_ordinals": true - }, - "contentsTitle": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "contributions": { - "type": "keyword" - }, - "contributorLiteral": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "keywordLowercased": { - "type": "keyword", - "normalizer": "lowercase_normalizer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "contributorLiteralNormalized": { - "type": "keyword", - "fields": { - "keywordLowercased": { - "type": "keyword", - "normalizer": "lowercase_normalizer" - } - } - }, - "contributorLiteralWithoutDates": { - "type": "keyword", - "fields": { - "keywordLowercased": { - "type": "keyword", - "normalizer": "lowercase_normalizer" - } - } - }, - "contributorNameRoleTest": { - "type": "keyword" - }, - "contributor_sort": { - "type": "keyword" - }, - "contributors": { - "properties": { - "label": { - "type": "keyword" - }, - "name": { - "type": "keyword" - }, - "nameRole": { - "type": "keyword" - } - } - }, - "contributorsPacked": { - "type": "keyword" - }, - "contributorsTest": { - "properties": { - "label": { - "type": "keyword" - }, - "name": { - "type": "keyword" - }, - "nameRole": { - "type": "keyword" - }, - "title": { - "type": "keyword" - } - } - }, - "created": { - "type": "date", - "index": false - }, - "createdDecade": { - "type": "short" - }, - "createdString": { - "type": "keyword" - }, - "createdYear": { - "type": "short" - }, - "creatorLiteral": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "keywordLowercased": { - "type": "keyword", - "normalizer": "lowercase_normalizer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "creatorLiteralNormalized": { - "type": "keyword", - "fields": { - "keywordLowercased": { - "type": "keyword", - "normalizer": "lowercase_normalizer" - } - } - }, - "creatorLiteralWithoutDates": { - "type": "keyword", - "fields": { - "keywordLowercased": { - "type": "keyword", - "normalizer": "lowercase_normalizer" - } - } - }, - "creator_sort": { - "type": "keyword" - }, - "dateEndDecade": { - "type": "short" - }, - "dateEndString": { - "type": "keyword" - }, - "dateEndYear": { - "type": "short" - }, - "dateStartDecade": { - "type": "short" - }, - "dateStartString": { - "type": "keyword" - }, - "dateStartYear": { - "type": "short" - }, - "dateString": { - "type": "keyword" - }, - "dates": { - "type": "nested", - "properties": { - "range": { - "type": "date_range" - }, - "raw": { - "type": "keyword" - }, - "tag": { - "type": "keyword" - } - } - }, - "depiction": { - "type": "keyword" - }, - "description": { - "type": "text", - "fields": { - "foldedStemmed": { - "type": "text", - "analyzer": "folding_stemming_analyzer" - } - } - }, - "dimensions": { - "type": "keyword" - }, - "donor": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "editionStatement": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "electronicResources": { - "properties": { - "label": { - "type": "keyword" - }, - "url": { - "type": "keyword" - } - } - }, - "extent": { - "type": "keyword" - }, - "formatId": { - "type": "keyword" - }, - "formerTitle": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "genreForm": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "holdings": { - "type": "nested", - "properties": { - "checkInBoxes": { - "type": "nested", - "properties": { - "copies": { - "type": "short" - }, - "coverage": { - "type": "keyword" - }, - "position": { - "type": "short" - }, - "shelfMark": { - "type": "keyword", - "index": false - }, - "status": { - "type": "keyword" - }, - "type": { - "type": "keyword" - } - } - }, - "format": { - "type": "keyword" - }, - "holdingStatement": { - "type": "keyword" - }, - "identifier": { - "properties": { - "identifierStatus": { - "type": "keyword" - }, - "type": { - "type": "keyword" - }, - "value": { - "type": "keyword" - } - } - }, - "location": { - "properties": { - "code": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "notes": { - "type": "keyword" - }, - "physicalLocation": { - "type": "keyword", - "index": false - }, - "shelfMark": { - "type": "keyword" - }, - "uri": { - "type": "keyword" - } - } - }, - "idIsbn": { - "type": "keyword", - "fields": { - "clean": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "identifier_normalizer" - } - } - }, - "idIsbn_clean": { - "type": "keyword" - }, - "idIssn": { - "type": "keyword", - "fields": { - "clean": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "identifier_normalizer" - } - } - }, - "idLcc": { - "type": "keyword" - }, - "idLccSort": { - "type": "keyword" - }, - "idLccn": { - "type": "keyword", - "normalizer": "punctuation_and_lowercase_normalizer" - }, - "idOclc": { - "type": "keyword" - }, - "idOwi": { - "type": "keyword" - }, - "identifier": { - "type": "keyword" - }, - "identifierV2": { - "properties": { - "identifierStatus": { - "type": "keyword", - "normalizer": "punctuation_and_lowercase_normalizer" - }, - "type": { - "type": "keyword" - }, - "value": { - "type": "keyword" - } - } - }, - "issuance": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "issuance_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "items": { - "type": "nested", - "properties": { - "accessMessage": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "accessMessage_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "aeonUrl": { - "type": "keyword" - }, - "catalogItemType": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "catalogItemType_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "dateRange": { - "type": "date_range", - "format": "yyyy-MM-dd||yyyy-MM||yyyy" - }, - "dateRaw": { - "type": "text" - }, - "deliveryLocation": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "deliveryLocation_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "dueDate": { - "type": "date" - }, - "electronicLocator": { - "properties": { - "label": { - "type": "keyword", - "index": false - }, - "url": { - "type": "keyword" - } - } - }, - "enumerationChronology": { - "type": "keyword" - }, - "enumerationChronology_sort": { - "type": "keyword" - }, - "formatLiteral": { - "type": "keyword" - }, - "holdingLocation": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "holdingLocation_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "idBarcode": { - "type": "keyword" - }, - "identifier": { - "type": "keyword" - }, - "identifierV2": { - "properties": { - "type": { - "type": "keyword" - }, - "value": { - "type": "keyword" - } - } - }, - "location": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "m2CustomerCode": { - "type": "keyword" - }, - "owner": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "owner_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "physicalLocation": { - "type": "keyword" - }, - "recapCustomerCode": { - "type": "keyword" - }, - "requestable": { - "type": "boolean" - }, - "shelfMark": { - "type": "text", - "fields": { - "keywordLowercased": { - "type": "keyword", - "normalizer": "shelfmark_normalizer" - }, - "raw": { - "type": "keyword" - } - } - }, - "shelfMark_sort": { - "type": "keyword" - }, - "status": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "status_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "type": { - "type": "keyword" - }, - "uri": { - "type": "keyword" - }, - "volumeRange": { - "type": "integer_range" - }, - "volumeRaw": { - "type": "text" - } - } - }, - "language": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "language_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "lccClassification": { - "type": "keyword" - }, - "materialType": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "materialType_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "mediaType": { - "properties": { - "id": { - "type": "keyword" - }, - "label": { - "type": "keyword" - } - } - }, - "mediaType_packed": { - "type": "keyword", - "eager_global_ordinals": true - }, - "note": { - "properties": { - "label": { - "type": "text", - "fields": { - "foldedStemmed": { - "type": "text", - "analyzer": "folding_stemming_analyzer" - } - } - }, - "noteType": { - "type": "keyword" - }, - "type": { - "type": "keyword", - "index": false - } - } - }, - "numAvailable": { - "type": "short" - }, - "numCheckinCardItems": { - "type": "short" - }, - "numElectronicResources": { - "type": "short" - }, - "numItemDatesParsed": { - "type": "short" - }, - "numItemVolumesParsed": { - "type": "short" - }, - "numItems": { - "type": "short" - }, - "numItemsTotal": { - "type": "short" - }, - "nyplSource": { - "type": "keyword" - }, - "parallelAddedAuthorTitle": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "parallelContributorLiteral": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "parallelCreatorLiteral": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "parallelDescription": { - "type": "text", - "fields": { - "foldedStemmed": { - "type": "text", - "analyzer": "folding_stemming_analyzer" - } - } - }, - "parallelDisplayField": { - "properties": { - "fieldName": { - "type": "keyword" - }, - "index": { - "type": "short" - }, - "value": { - "type": "text" - } - } - }, - "parallelEditionStatement": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "parallelNote": { - "properties": { - "label": { - "type": "text", - "fields": { - "foldedStemmed": { - "type": "text", - "analyzer": "folding_stemming_analyzer" - } - } - }, - "noteType": { - "type": "keyword" - }, - "type": { - "type": "keyword", - "index": false - } - } - }, - "parallelPlaceOfPublication": { - "type": "keyword", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "parallelPublicationStatement": { - "type": "keyword", - "index": false - }, - "parallelPublisher": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "parallelPublisherLiteral": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "parallelSeries": { - "type": "keyword" - }, - "parallelSeriesStatement": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "parallelSubjectLiteral": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "parallelSummary": { - "type": "text", - "fields": { - "foldedStemmed": { - "type": "text", - "analyzer": "folding_stemming_analyzer" - } - } - }, - "parallelTableOfContents": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "parallelTitle": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "parallelTitleAlt": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "parallelTitleDisplay": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "parallelUniformTitle": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "partOf": { - "type": "keyword" - }, - "physicalDescription": { - "type": "keyword", - "index": false - }, - "placeOfPublication": { - "type": "keyword", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "popularity": { - "type": "short" - }, - "publicDomain": { - "type": "boolean" - }, - "publicationStatement": { - "type": "keyword", - "index": false - }, - "publisherLiteral": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "recordType": { - "type": "keyword" - }, - "recordTypeId": { - "type": "keyword" - }, - "serialPublicationDates": { - "type": "keyword", - "index": false - }, - "series": { - "type": "keyword" - }, - "seriesStatement": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "shelfMark": { - "type": "text", - "fields": { - "keywordLowercased": { - "type": "keyword", - "normalizer": "shelfmark_normalizer" - }, - "raw": { - "type": "keyword" - } - } - }, - "subjectLiteral": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword", - "eager_global_ordinals": true - } - } - }, - "subjectLiteral_exploded": { - "type": "keyword" - }, - "summary": { - "type": "text", - "fields": { - "foldedStemmed": { - "type": "text", - "analyzer": "folding_stemming_analyzer" - } - } - }, - "supplementaryContent": { - "properties": { - "label": { - "type": "keyword", - "index": false - }, - "url": { - "type": "keyword" - } - } - }, - "suppressed": { - "type": "boolean" - }, - "tableOfContents": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "title": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "foldedStemmed": { - "type": "text", - "analyzer": "folding_stemming_analyzer" - }, - "keyword": { - "type": "keyword", - "ignore_above": 256 - }, - "keywordLowercased": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "lowercase_normalizer" - }, - "keywordLowercasedStripped": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "punctuation_and_lowercase_normalizer" - }, - "shingle": { - "type": "text", - "analyzer": "shingles_analyzer" - } - } - }, - "titleAlt": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "titleDisplay": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - } - } - }, - "title_sort": { - "type": "keyword" - }, - "type": { - "type": "keyword" - }, - "uniformTitle": { - "type": "text", - "fields": { - "folded": { - "type": "text", - "analyzer": "folding_analyzer" - }, - "raw": { - "type": "keyword" - } - } - }, - "updatedAt": { - "type": "date" - }, - "uri": { - "type": "keyword" - }, - "uris": { - "type": "keyword" - } - } - } - } -} diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 65cda1fc..87b8c042 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -75,6 +75,7 @@ function reverseString (string) { } function reverseAST (tree) { + if (!tree) return null tree.text = reverseString(tree.text) tree.children = tree.children.map(child => reverseAST(child)).reverse() return tree @@ -82,15 +83,16 @@ function reverseAST (tree) { const rightCqlParser = new Grammars.W3C.Parser(rightCql) +// we want to associate operators to the left, but we have a right parser. +// so: reverse the grammar and the input string, then reverse the output function parseRight (string, parser) { return reverseAST(parser.getAST(reverseString(string))) } - function parseWithRightCql (string) { return parseRight(string, rightCqlParser) } -function display (ast) { +function parsedASTtoNestedArray (ast) { if (!ast.type.includes('query')) { return ast.text } @@ -102,7 +104,7 @@ function display (ast) { const children = ast.children .filter(child => childTypes.includes(child.type)) - .map(child => display(child)) + .map(child => parsedASTtoNestedArray(child)) if (children.length === 1) { return children[0] @@ -111,8 +113,9 @@ function display (ast) { return children } +// we need to reverse the error message since `parseWithRightCql` doesn't function displayParsed (string) { - const parsed = rightCqlParser.getAST(reverseString(string)) + const parsed = parseWithRightCql(string) if (!parsed) return {} if (parsed.errors.length) { return { @@ -121,7 +124,7 @@ function displayParsed (string) { ).join('\n') } } - return { parsed: display(reverseAST(parsed)) } + return { parsed: parsedASTtoNestedArray(reverseAST(parsed)) } } module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql, rightCqlParser, reverseString, displayParsed } diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index fa28f9e1..c497ebad 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -105,10 +105,10 @@ function findTopWords (tree) { } /** - For an object where the keys are arrays, apply the given filter and map + For an object where the values are arrays, apply the given filter and map to each of the arrays */ -function nestedMapAndFilter (obj, filter, map) { +function nestedFilterAndMap (obj, filter, map) { return Object.assign({}, ...(Object.entries(obj) .map(([k, v]) => ({ [k]: v.filter(filter).map(map) })) @@ -131,25 +131,25 @@ function nestedMapAndFilter (obj, filter, map) { */ function buildAtomic ({ scope, relation, terms, term }) { - const allFields = nestedMapAndFilter( + const allFields = nestedFilterAndMap( indexMapping[scope], field => typeof field === 'string' || field.on(term), field => (typeof field === 'string' ? field : field.field) ) - const bibFields = nestedMapAndFilter( + const bibFields = nestedFilterAndMap( allFields, (field) => !['items', 'holdings'].some(prefix => field.startsWith(prefix)), field => field ) - const itemFields = nestedMapAndFilter( + const itemFields = nestedFilterAndMap( allFields, (field) => field.startsWith('items'), field => field ) - const holdingsFields = nestedMapAndFilter( + const holdingsFields = nestedFilterAndMap( allFields, (field) => field.startsWith('holdings'), field => field @@ -185,9 +185,24 @@ function buildAtomicNested (name, { fields, relation, terms, term }) { - put all terms in term matches with term fields */ function buildAtomicMain ({ fields, relation, terms, term }) { - return anyAllQueries({ fields, relation, terms }) || - adjEqQueries({ fields, relation, terms, term }) || - dateQueries({ fields, relation, terms, term }) + switch (relation) { + case 'any': + case 'all': + return anyAllQueries({ fields, relation, terms }) + case '=': + case '==': + case 'adj': + return adjEqQueries({ fields, relation, terms, term }) + case '>': + case '<': + case '<=': + case '>=': + case 'within': + case 'encloses': + return dateQueries({ fields, relation, terms, term }) + default: + break + } } function anyAllQueries ({ fields, relation, terms }) { @@ -206,16 +221,29 @@ function adjEqQueries ({ fields, relation, terms, term }) { return matchTermWithFields(fields, term, type) } -const table = { +// depending on the type of cql query supplied by the user, +// we may need to modify the es query from the type indicated by the index +// mapping. +// e.g. in case the user indicates a prefix query, all `term` queries should be +// mapped to `prefix` queries +// X represents field types that should be excluded e.g. for exact matching, +// exclude regular fields and use matching `exact_fields` instead +const esQueryMappingByCqlQueryType = { exact: { term: 'term', prefix: 'prefix', fields: 'X', exact_fields: 'term' }, prefix: { term: 'prefix', prefix: 'prefix', fields: 'X', exact_fields: 'prefix' }, basic: { term: 'term', prefix: 'prefix', fields: 'multi_match', exact_fields: 'X' } } +// used to turn the above table inside out, e.g. +// in case of queryType = `prefix`, +// will gather together, for a given set of fields, all the query tyoes that +// need to be included under `selector` +// so e.g. `term`, 'prefix', and `exact_fields` fields all need to be included +// in the `prefix` matcher, since they are all mapped to `prefix` in this case const selectFields = (queryType, fields) => (selector) => { return Object.entries(fields) .filter(([fieldType, fieldNames]) => { - return table[queryType][fieldType] === selector + return esQueryMappingByCqlQueryType[queryType][fieldType] === selector }) .map(([fieldType, fieldNames]) => fieldNames) .flat() @@ -306,7 +334,7 @@ module.exports = { buildBoolean, buildAtomic, buildAtomicMain, - nestedMapAndFilter, + nestedFilterAndMap, selectFields, indexMapping } diff --git a/lib/resources.js b/lib/resources.js index 3d7ccf7f..ce1ef036 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -911,7 +911,6 @@ const buildElasticQuery = function (params) { const query = cqlQueryBuilder.buildEsQuery(params.q, request) return query } - console.log('request: ', request) const builder = ElasticQueryBuilder.forApiRequest(request) return builder.query.toJson() From fa9747970affc20a437f815a6ecf7731650a9a68 Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Fri, 13 Mar 2026 16:28:01 -0400 Subject: [PATCH 40/54] Update collection entities to include locationsPath, bump nypl-core-objects to 3.0.5 --- lib/jsonld_serializers.js | 4 +++- lib/vocabularies.js | 3 ++- package-lock.json | 40 ++++++++++++++++++++++++++------ package.json | 2 +- test/resource_serializer.test.js | 2 ++ test/vocabularies.test.js | 2 +- 6 files changed, 42 insertions(+), 11 deletions(-) diff --git a/lib/jsonld_serializers.js b/lib/jsonld_serializers.js index 300d42bd..a2d690b5 100644 --- a/lib/jsonld_serializers.js +++ b/lib/jsonld_serializers.js @@ -301,12 +301,14 @@ ResourceSerializer.getFormattedFormat = function (formatId) { ResourceSerializer.formatCollection = function (collectionId) { const prefLabel = nyplCore.collections()[`nyplCollection:${collectionId}`]?.label + const locationsPath = nyplCore.collections()[`nyplCollection:${collectionId}`]?.locationsPath const buildingLocationLabel = buildingLocations.find((loc) => loc.value === collectionId.slice(0, 2))?.label if (!prefLabel) return null return { '@id': collectionId, prefLabel, - buildingLocationLabel + buildingLocationLabel, + locationsPath } } diff --git a/lib/vocabularies.js b/lib/vocabularies.js index b7627e48..451f1e81 100644 --- a/lib/vocabularies.js +++ b/lib/vocabularies.js @@ -17,7 +17,8 @@ module.exports = function (app, _private = null) { (val) => ({ value: val.code, label: val.label, - holdingLocations: val.holdingLocations + holdingLocations: val.holdingLocations, + locationsPath: val.locationsPath }) ) return ({ formats, collections, languages: languages.values, buildingLocations }) diff --git a/package-lock.json b/package-lock.json index e142a8e2..12741fbc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,7 +11,7 @@ "dependencies": { "@aws-sdk/client-kms": "^3.948.0", "@elastic/elasticsearch": "~8.12.0", - "@nypl/nypl-core-objects": "3.0.4", + "@nypl/nypl-core-objects": "3.0.5", "@nypl/nypl-data-api-client": "^2.0.0", "@nypl/scsb-rest-client": "3.0.0", "dotenv": "^16.4.5", @@ -3588,9 +3588,9 @@ } }, "node_modules/@nypl/nypl-core-objects": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/@nypl/nypl-core-objects/-/nypl-core-objects-3.0.4.tgz", - "integrity": "sha512-xFo14urEMO2VUL2/YcjG3io1deU/UCKtYlI2ADCOTEwOcCpwzzvkvOvTI2E2CvgstkHplVejBjdvd4l6ak0inQ==", + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/@nypl/nypl-core-objects/-/nypl-core-objects-3.0.5.tgz", + "integrity": "sha512-XXQ15ekqBgn/XX9eNmU40EE+OpIYSyTkq/TdAEhmTRiHWC+tohT8cfDNABz8lH+xnuqiwam6Fmvla3g5KI2DHQ==", "dependencies": { "axios": "^1.6.8", "csv": "^5.3.2", @@ -7245,9 +7245,9 @@ "license": "MIT" }, "node_modules/follow-redirects": { - "version": "1.15.6", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", - "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==", + "version": "1.15.11", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", + "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", "funding": [ { "type": "individual", @@ -10379,6 +10379,27 @@ "node": ">=10" } }, + "node_modules/research-catalog-indexer/node_modules/@nypl/nypl-core-objects": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@nypl/nypl-core-objects/-/nypl-core-objects-3.0.4.tgz", + "integrity": "sha512-xFo14urEMO2VUL2/YcjG3io1deU/UCKtYlI2ADCOTEwOcCpwzzvkvOvTI2E2CvgstkHplVejBjdvd4l6ak0inQ==", + "dependencies": { + "axios": "^1.6.8", + "csv": "^5.3.2", + "csv-stringify": "^5.6.0", + "just-flatten": "^1.0.0" + } + }, + "node_modules/research-catalog-indexer/node_modules/@nypl/nypl-core-objects/node_modules/axios": { + "version": "1.13.6", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.6.tgz", + "integrity": "sha512-ChTCHMouEe2kn713WHbQGcuYrr6fXTBiu460OTwWrWob16g1bXn4vtz07Ope7ewMozJAnEquLk5lWQWtBig9DQ==", + "dependencies": { + "follow-redirects": "^1.15.11", + "form-data": "^4.0.5", + "proxy-from-env": "^1.1.0" + } + }, "node_modules/research-catalog-indexer/node_modules/@nypl/scsb-rest-client": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/@nypl/scsb-rest-client/-/scsb-rest-client-2.0.0.tgz", @@ -10397,6 +10418,11 @@ "form-data": "^4.0.0" } }, + "node_modules/research-catalog-indexer/node_modules/csv-stringify": { + "version": "5.6.5", + "resolved": "https://registry.npmjs.org/csv-stringify/-/csv-stringify-5.6.5.tgz", + "integrity": "sha512-PjiQ659aQ+fUTQqSrd1XEDnOr52jh30RBurfzkscaE2tPaFsDH5wOAHJiw8XAHphRknCwMUE9KRayc4K/NbO8A==" + }, "node_modules/research-catalog-indexer/node_modules/debug": { "version": "4.3.7", "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", diff --git a/package.json b/package.json index 3383b465..a1cec83f 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "dependencies": { "@aws-sdk/client-kms": "^3.948.0", "@elastic/elasticsearch": "~8.12.0", - "@nypl/nypl-core-objects": "3.0.4", + "@nypl/nypl-core-objects": "3.0.5", "@nypl/nypl-data-api-client": "^2.0.0", "@nypl/scsb-rest-client": "3.0.0", "dotenv": "^16.4.5", diff --git a/test/resource_serializer.test.js b/test/resource_serializer.test.js index be0427e6..5924b72b 100644 --- a/test/resource_serializer.test.js +++ b/test/resource_serializer.test.js @@ -9,11 +9,13 @@ describe('Resource Serializer', () => { { '@id': 'mal', buildingLocationLabel: 'Stephen A. Schwarzman Building (SASB)', + locationsPath: 'locations/schwarzman/general-research-division', prefLabel: 'General Research Division' }, { '@id': 'bur', buildingLocationLabel: 'Stavros Niarchos Foundation Library (SNFL)', + locationsPath: 'locations/snfl/yoseloff-business', prefLabel: 'Yoseloff Business Center' } ]) diff --git a/test/vocabularies.test.js b/test/vocabularies.test.js index d3bdb48d..81c87a6e 100644 --- a/test/vocabularies.test.js +++ b/test/vocabularies.test.js @@ -57,6 +57,6 @@ describe('Vocabularies', function () { const results = await app.vocabularies({}, { baseUrl: app.baseUrl }) - expect(results.collections[0]).to.have.keys(['value', 'label', 'holdingLocations']) + expect(results.collections[0]).to.have.keys(['value', 'label', 'holdingLocations', 'locationsPath']) }) }) From 129f9a691f80b034fc75874bddc34a3efa0e6105 Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Fri, 13 Mar 2026 16:29:59 -0400 Subject: [PATCH 41/54] One more test --- test/resource_serializer.test.js | 1 + 1 file changed, 1 insertion(+) diff --git a/test/resource_serializer.test.js b/test/resource_serializer.test.js index 5924b72b..133671ae 100644 --- a/test/resource_serializer.test.js +++ b/test/resource_serializer.test.js @@ -32,6 +32,7 @@ describe('Resource Serializer', () => { expect(collectionEntity.prefLabel).to.equal('Art & Architecture Collection') expect(collectionEntity['@id']).to.equal('mab') expect(collectionEntity.buildingLocationLabel).to.equal('Stephen A. Schwarzman Building (SASB)') + expect(collectionEntity.locationsPath).to.equal('locations/schwarzman/wallach-division/art-architecture-collection') }) }) describe('.formatItemFilterAggregations()', () => { From af3fdab47faf72787a53d8a1beb2f742b4751c56 Mon Sep 17 00:00:00 2001 From: Emma Mansell <73774046+7emansell@users.noreply.github.com> Date: Fri, 13 Mar 2026 16:35:25 -0400 Subject: [PATCH 42/54] Make getFormattedHoldingLocation more defensive --- lib/jsonld_serializers.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/jsonld_serializers.js b/lib/jsonld_serializers.js index a2d690b5..b3b1ca36 100644 --- a/lib/jsonld_serializers.js +++ b/lib/jsonld_serializers.js @@ -394,14 +394,14 @@ class ItemResourceSerializer extends JsonLdItemSerializer { } static getFormattedHoldingLocation (location) { - const loc = location[0] - const locationId = loc['@id'].split(':')[1] - // Add collection access type from corresponding Core location - const collectionAccessType = - nyplCore.sierraLocations()[locationId]?.collectionAccessType + const loc = Array.isArray(location) ? location[0] : null + if (!loc) return [] + const locationId = loc['@id']?.split(':')[1] + const sierraLocations = nyplCore.sierraLocations() + const collectionAccessType = sierraLocations?.[locationId]?.collectionAccessType return [{ '@id': loc['@id'], - prefLabel: loc.prefLabel, + prefLabel: loc?.prefLabel, collectionAccessType }] } From b6dcd86bc4e396853067c3d7153b9c990a4a0e41 Mon Sep 17 00:00:00 2001 From: Vera Kahn Date: Mon, 16 Mar 2026 15:29:04 -0400 Subject: [PATCH 43/54] the tests --- .github/workflows/test-and-deploy.yml | 133 +++--------------- .../delivery-locations-by-barcode.test.js | 41 ++++-- .../delivery-locations-constants.js | 10 +- 3 files changed, 55 insertions(+), 129 deletions(-) diff --git a/.github/workflows/test-and-deploy.yml b/.github/workflows/test-and-deploy.yml index c9306ac6..a179ba4b 100644 --- a/.github/workflows/test-and-deploy.yml +++ b/.github/workflows/test-and-deploy.yml @@ -2,6 +2,11 @@ name: Unit Tests on: [push] +# Global environment variable based on the branch name +env: + ENV_TAG: ${{ github.ref_name }} + ECR_REPOSITORY: discovery-api + jobs: tests: runs-on: ubuntu-latest @@ -15,114 +20,15 @@ jobs: run: npm ci - name: Unit Tests run: npm test - integration-test-qa: - permissions: - id-token: write - contents: read - runs-on: ubuntu-latest - needs: tests - if: github.ref == 'refs/heads/qa' - steps: - - uses: actions/checkout@v4 - - name: Set Node version - uses: actions/setup-node@v4 - with: - node-version-file: '.nvmrc' - - name: Install dependencies - run: npm ci - - name: Start service - run: ENV=qa npm start & - - name: Run tests - run: npm run test-integration - deploy-qa: - permissions: - id-token: write - contents: read - runs-on: ubuntu-latest - needs: - - tests - if: github.ref == 'refs/heads/qa' - steps: - - name: Checkout repo - uses: actions/checkout@v3 - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v2 - with: - role-to-assume: arn:aws:iam::946183545209:role/GithubActionsDeployerRole - aws-region: us-east-1 - - name: Log in to ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - - name: Back up previous image for rollback - env: - ECR_REPOSITORY: discovery-api - run: | - MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="qa-latest" --output json | jq --raw-output --join-output '.images[0].imageManifest') - PREVIOUS_MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="qa-previous" --output json | jq --raw-output --join-output '.images[0].imageManifest') - if [ "$MANIFEST" != "$PREVIOUS_MANIFEST" ]; then aws ecr put-image --repository-name $ECR_REPOSITORY --image-tag "qa-previous" --image-manifest "$MANIFEST"; fi - - name: Build, tag, and push image to Amazon ECR - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: discovery-api - IMAGE_TAG: ${{ github.sha }} - run: | - docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG - docker tag $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG $ECR_REGISTRY/$ECR_REPOSITORY:qa-latest - docker push $ECR_REGISTRY/$ECR_REPOSITORY:qa-latest - - name: Force ECS Update - run: | - aws ecs update-service --cluster discovery-api-qa --service discovery-api-qa --force-new-deployment - deploy-qa2: - permissions: - id-token: write - contents: read - runs-on: ubuntu-latest - needs: - - tests - if: github.ref == 'refs/heads/qa2' - steps: - - name: Checkout repo - uses: actions/checkout@v3 - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v2 - with: - role-to-assume: arn:aws:iam::946183545209:role/GithubActionsDeployerRole - aws-region: us-east-1 - - - name: Log in to ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - - name: Back up previous image for rollback - env: - ECR_REPOSITORY: discovery-api - run: | - MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="qa2-latest" --output json | jq --raw-output --join-output '.images[0].imageManifest') - PREVIOUS_MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="qa2-previous" --output json | jq --raw-output --join-output '.images[0].imageManifest') - if [ "$MANIFEST" != "$PREVIOUS_MANIFEST" ]; then aws ecr put-image --repository-name $ECR_REPOSITORY --image-tag "qa2-previous" --image-manifest "$MANIFEST"; fi - - name: Build, tag, and push image to Amazon ECR - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: discovery-api - IMAGE_TAG: ${{ github.sha }} - run: | - docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG - docker tag $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG $ECR_REGISTRY/$ECR_REPOSITORY:qa2-latest - docker push $ECR_REGISTRY/$ECR_REPOSITORY:qa2-latest - - - name: Force ECS Update - run: | - aws ecs update-service --cluster discovery-api-qa2 --service discovery-api-qa2 --force-new-deployment - deploy-production: + deploy: permissions: id-token: write contents: read runs-on: ubuntu-latest needs: tests - if: github.ref == 'refs/heads/production' + # Only run if it's one of our three deployment branches + if: contains(fromJSON('["qa", "qa2", "production"]'), github.ref_name) steps: - name: Checkout repo uses: actions/checkout@v3 @@ -138,22 +44,23 @@ jobs: uses: aws-actions/amazon-ecr-login@v1 - name: Back up previous image for rollback - env: - ECR_REPOSITORY: discovery-api run: | - MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="production-latest" --output json | jq --raw-output --join-output '.images[0].imageManifest') - PREVIOUS_MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="production-previous" --output json | jq --raw-output --join-output '.images[0].imageManifest') - if [ "$MANIFEST" != "$PREVIOUS_MANIFEST" ]; then aws ecr put-image --repository-name $ECR_REPOSITORY --image-tag "production-previous" --image-manifest "$MANIFEST"; fi + MANIFEST=$(aws ecr batch-get-image --repository-name ${{ env.ECR_REPOSITORY }} --image-ids imageTag="${{ env.ENV_TAG }}-latest" --output json | jq --raw-output --join-output '.images[0].imageManifest') + PREVIOUS_MANIFEST=$(aws ecr batch-get-image --repository-name ${{ env.ECR_REPOSITORY }} --image-ids imageTag="${{ env.ENV_TAG }}-previous" --output json | jq --raw-output --join-output '.images[0].imageManifest') + if [ "$MANIFEST" != "$PREVIOUS_MANIFEST" ]; then + aws ecr put-image --repository-name ${{ env.ECR_REPOSITORY }} --image-tag "${{ env.ENV_TAG }}-previous" --image-manifest "$MANIFEST" + fi + - name: Build, tag, and push image to Amazon ECR env: ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: discovery-api IMAGE_TAG: ${{ github.sha }} run: | - docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG - docker tag $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG $ECR_REGISTRY/$ECR_REPOSITORY:production-latest - docker push $ECR_REGISTRY/$ECR_REPOSITORY:production-latest + docker build -t $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:$IMAGE_TAG . + docker push $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:$IMAGE_TAG + docker tag $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:$IMAGE_TAG $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:${{ env.ENV_TAG }}-latest + docker push $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:${{ env.ENV_TAG }}-latest + - name: Force ECS Update run: | - aws ecs update-service --cluster discovery-api-production --service discovery-api-production --force-new-deployment + aws ecs update-service --cluster discovery-api-${{ env.ENV_TAG }} --service discovery-api-${{ env.ENV_TAG }} --force-new-deployment \ No newline at end of file diff --git a/test/integration/delivery-locations-by-barcode.test.js b/test/integration/delivery-locations-by-barcode.test.js index 2302b12b..9da42420 100644 --- a/test/integration/delivery-locations-by-barcode.test.js +++ b/test/integration/delivery-locations-by-barcode.test.js @@ -1,17 +1,25 @@ -require('dotenv').config('config/qa.env') -const axios = require('axios') +const { loadConfig } = require('../../lib/load-config') const { expectations, ptypes } = require('./delivery-locations-constants') +const { makeNyplDataApiClient } = require('../../lib/data-api-client') const checkLocationsForPtype = async (ptype) => { const problems = [] const match = [] - await Promise.all(Object.values(expectations).map(async (expectation) => { - const deliveryLocationsFromApi = await getDeliveryLocations(expectation.barcode, ptypes[ptype]) + + await Promise.all(Object.entries(expectations).map(async ([holdingLocation, expectation], i) => { + let deliveryLocationsFromApi let totalMatch = true const registerProblem = (problem) => { - problems.push({ barcode: expectation.barcode, deliveryLocationsFromApi, ...problem }) + problems.push({ holdingLocation, barcode: expectation.barcode, deliveryLocationsFromApi, ...problem }) totalMatch = false } + try { + deliveryLocationsFromApi = await getDeliveryLocations(expectation.barcode, ptypes[ptype]) + } catch (e) { + registerProblem({ lookUpFailed: true }) + return + } + const checkForValue = (expectedValue, action) => { const includedValueIncluded = deliveryLocationsFromApi.some((label) => label.includes(expectedValue)) const match = action === 'include' ? includedValueIncluded : !includedValueIncluded @@ -27,20 +35,31 @@ const checkLocationsForPtype = async (ptype) => { } const getDeliveryLocations = async (barcode, patronId) => { - const { data: { itemListElement: deliveryLocationsPerRecord } } = await axios.get(`http://localhost:8082/api/v0.1/request/deliveryLocationsByBarcode?barcodes[]=${barcode}&patronId=${patronId}`) - // per record - return deliveryLocationsPerRecord[0] - .deliveryLocation.map(loc => loc.prefLabel.toLowerCase()) + try { + // const { data: { itemListElement: deliveryLocationsPerRecord } } = await axios.get(`https://${process.env === 'qa' ? 'qa-' : ''}platform.nypl.org/api/v0.1/request/deliveryLocationsByBarcode?barcodes[]=${barcode}&patronId=${patronId}`) + const { itemListElement: itemData } = await makeNyplDataApiClient().get(`request/deliveryLocationsByBarcode?barcodes[]=${barcode}&patronId=${patronId}`) + // per record + return itemData[0] + .deliveryLocation.map(loc => loc.prefLabel.toLowerCase()) + } catch (e) { + console.error(e) + } } const theThing = async () => { + await loadConfig() const results = await Promise.all(Object.keys(ptypes).map((checkLocationsForPtype))) - Object.keys(ptypes).forEach((ptype, i) => { + const resultsHaveProblems = Object.keys(ptypes).some((ptype, i) => { const resultsForPtype = results[i] if (resultsForPtype.problems.length) { console.error(`Error with ${ptype} ptype delivery results, `, resultsForPtype.problems) - } else console.log(`All delivery location checks for ${ptype} patron type successful`) + return true + } else { + console.log(`All delivery location checks for ${ptype} patron type successful`) + return false + } }) + if (resultsHaveProblems) throw new Error('Delivery location checks failed.') } theThing() diff --git a/test/integration/delivery-locations-constants.js b/test/integration/delivery-locations-constants.js index b46d812c..bf079333 100644 --- a/test/integration/delivery-locations-constants.js +++ b/test/integration/delivery-locations-constants.js @@ -38,11 +38,11 @@ const expectations = { scholar: { includes: [schomburg], excludes: [scholar, sasb, lpa] }, general: { includes: [schomburg], excludes: [scholar, sasb, lpa] } }, - // nyplM1: { - // barcode: null, - // scholar: { includes: [sasb], excludes: [scholar, lpa, schomburg] }, - // general: { includes: [sasb], excludes: [scholar, lpa, schomburg] } - // }, + nyplM1: { + barcode: '33433084847221', + scholar: { includes: [sasb], excludes: [scholar, lpa, schomburg] }, + general: { includes: [sasb], excludes: [scholar, lpa, schomburg] } + }, nyplM2: { barcode: '33333069027734', scholar: { includes: [sasb, scholar], excludes: [lpa, schomburg] }, From 3aa92cd32e70effed78a390442e75267a1e8f570 Mon Sep 17 00:00:00 2001 From: Vera Kahn Date: Mon, 16 Mar 2026 15:32:56 -0400 Subject: [PATCH 44/54] add int test file --- .github/workflows/integration-tests.yml | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/integration-tests.yml diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml new file mode 100644 index 00000000..f9d5f765 --- /dev/null +++ b/.github/workflows/integration-tests.yml @@ -0,0 +1,30 @@ +name: integration-test + +on: + pull_request: + branches: + - production + +jobs: + integration-test: + permissions: + id-token: write + contents: read + runs-on: ubuntu-latest + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: arn:aws:iam::946183545209:role/GithubActionsDeployerRole + aws-region: us-east-1 + - uses: actions/checkout@v3 + - name: Set Node version + uses: actions/setup-node@v3 + with: + node-version-file: '.nvmrc' + - name: npm install + run: npm i + - name: Run integration tests + env: + ENV: 'qa' + run: node test/integration/delivery-locations-by-barcode.test.js From b62d8467b3c72929707e0bf000de2a98d0341096 Mon Sep 17 00:00:00 2001 From: Vera Kahn Date: Mon, 16 Mar 2026 15:42:38 -0400 Subject: [PATCH 45/54] rm commented code --- test/integration/delivery-locations-by-barcode.test.js | 1 - 1 file changed, 1 deletion(-) diff --git a/test/integration/delivery-locations-by-barcode.test.js b/test/integration/delivery-locations-by-barcode.test.js index 9da42420..23245370 100644 --- a/test/integration/delivery-locations-by-barcode.test.js +++ b/test/integration/delivery-locations-by-barcode.test.js @@ -36,7 +36,6 @@ const checkLocationsForPtype = async (ptype) => { const getDeliveryLocations = async (barcode, patronId) => { try { - // const { data: { itemListElement: deliveryLocationsPerRecord } } = await axios.get(`https://${process.env === 'qa' ? 'qa-' : ''}platform.nypl.org/api/v0.1/request/deliveryLocationsByBarcode?barcodes[]=${barcode}&patronId=${patronId}`) const { itemListElement: itemData } = await makeNyplDataApiClient().get(`request/deliveryLocationsByBarcode?barcodes[]=${barcode}&patronId=${patronId}`) // per record return itemData[0] From 8f07bfca68f5a14e16f2f3be644b1e1aa1b3e39c Mon Sep 17 00:00:00 2001 From: Vera Kahn Date: Wed, 18 Mar 2026 10:44:56 -0400 Subject: [PATCH 46/54] test From d5ec597185d49be4ac5a3dea203054bdd246923c Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Mar 2026 11:53:44 -0400 Subject: [PATCH 47/54] Add check for whether query has fields before adding --- lib/elasticsearch/cql_query_builder.js | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index c497ebad..542ad83b 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -106,7 +106,7 @@ function findTopWords (tree) { /** For an object where the values are arrays, apply the given filter and map - to each of the arrays + to each of the arrays. */ function nestedFilterAndMap (obj, filter, map) { return Object.assign({}, @@ -115,6 +115,14 @@ function nestedFilterAndMap (obj, filter, map) { ) ) } + +/** + Return truthy value if and only if one of the values is a non-empty array + */ +function hasFields (obj) { + return Object.values(obj).some(arr => arr.length) +} + /** build atomic: - identify the scope fields that match the term @@ -155,13 +163,15 @@ function buildAtomic ({ scope, relation, terms, term }) { field => field ) + console.log('holdingsFields: ', holdingsFields) + return { bool: { should: [ buildAtomicMain({ fields: bibFields, relation, terms, term }), - buildAtomicNested('items', { fields: itemFields, relation, terms, term }), - buildAtomicNested('holdings', { fields: holdingsFields, relation, terms, term }) - ] + (hasFields(itemFields) && buildAtomicNested('items', { fields: itemFields, relation, terms, term })), + (hasFields(holdingsFields) && buildAtomicNested('holdings', { fields: holdingsFields, relation, terms, term })) + ].filter(x => x) } } } From f53c870f957abed778a3db124e95f9b9b3b6ef66 Mon Sep 17 00:00:00 2001 From: Vera Kahn Date: Thu, 19 Mar 2026 12:49:24 -0400 Subject: [PATCH 48/54] expect scholar delivery for m1 item/scholar --- test/integration/delivery-locations-constants.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/delivery-locations-constants.js b/test/integration/delivery-locations-constants.js index bf079333..6b70cfd7 100644 --- a/test/integration/delivery-locations-constants.js +++ b/test/integration/delivery-locations-constants.js @@ -40,7 +40,7 @@ const expectations = { }, nyplM1: { barcode: '33433084847221', - scholar: { includes: [sasb], excludes: [scholar, lpa, schomburg] }, + scholar: { includes: [sasb, scholar], excludes: [lpa, schomburg] }, general: { includes: [sasb], excludes: [scholar, lpa, schomburg] } }, nyplM2: { From 4f26b201010d2a688c157d4f2cf3d6e5ad35e02b Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Mar 2026 15:21:56 -0400 Subject: [PATCH 49/54] Remove console log --- lib/elasticsearch/cql_query_builder.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 542ad83b..87ef7f84 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -163,8 +163,6 @@ function buildAtomic ({ scope, relation, terms, term }) { field => field ) - console.log('holdingsFields: ', holdingsFields) - return { bool: { should: [ From 94d0d610a14b81bc3262bd2b40ca34b3b7ffc94b Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Mar 2026 15:29:24 -0400 Subject: [PATCH 50/54] Fix tests --- test/fixtures/cql_fixtures.js | 253 +--------------------------------- 1 file changed, 6 insertions(+), 247 deletions(-) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index 555e0a45..bc35e24a 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -33,12 +33,6 @@ const simpleAdjQuery = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -82,12 +76,6 @@ const multiAdjQuery = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -162,12 +150,6 @@ const prefixPhraseQuery = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -247,32 +229,6 @@ const simpleAnyQuery = { } ] } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { bool: { should: [] } }, - { bool: { should: [] } } - ] - } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { bool: { should: [] } }, - { bool: { should: [] } } - ] - } - } - } } ] } @@ -416,36 +372,6 @@ const anyWithPrefixQuery = { } ] } - }, - { - nested: { - path: 'items', - query: { - bool: { - should: [ - { bool: { should: [] } }, - { bool: { should: [] } }, - { bool: { should: [] } }, - { bool: { should: [] } } - ] - } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - should: [ - { bool: { should: [] } }, - { bool: { should: [] } }, - { bool: { should: [] } }, - { bool: { should: [] } } - ] - } - } - } } ] } @@ -525,32 +451,6 @@ const simpleAllQuery = { } ] } - }, - { - nested: { - path: 'items', - query: { - bool: { - must: [ - { bool: { should: [] } }, - { bool: { should: [] } } - ] - } - } - } - }, - { - nested: { - path: 'holdings', - query: { - bool: { - must: [ - { bool: { should: [] } }, - { bool: { should: [] } } - ] - } - } - } } ] } @@ -617,9 +517,6 @@ const keywordQueryForBarcode = { } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -694,9 +591,6 @@ const keywordQueryForShelfMark = { } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -753,12 +647,6 @@ const keywordQueryForGeneralTerm = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -799,9 +687,6 @@ const identifierQuery = { } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -837,15 +722,6 @@ const binaryBooleanQuery = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -860,15 +736,6 @@ const binaryBooleanQuery = { { term: { 'language.label': 'English' } } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -910,18 +777,6 @@ const ternaryBooleanQuery = { } ] } - }, - { - nested: { - path: 'items', - query: { bool: { should: [] } } - } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -936,18 +791,6 @@ const ternaryBooleanQuery = { { term: { 'language.label': 'English' } } ] } - }, - { - nested: { - path: 'items', - query: { bool: { should: [] } } - } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -970,15 +813,6 @@ const ternaryBooleanQuery = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -1017,15 +851,6 @@ const queryWithParentheses = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -1043,18 +868,6 @@ const queryWithParentheses = { { term: { 'language.label': 'English' } } ] } - }, - { - nested: { - path: 'items', - query: { bool: { should: [] } } - } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -1074,18 +887,6 @@ const queryWithParentheses = { } ] } - }, - { - nested: { - path: 'items', - query: { bool: { should: [] } } - } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -1127,15 +928,6 @@ const negationQuery = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -1152,15 +944,6 @@ const negationQuery = { { term: { 'language.label': 'English' } } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { - path: 'holdings', - query: { bool: { should: [] } } - } } ] } @@ -1185,9 +968,7 @@ const dateAfterQuery = { query: { range: { 'dates.range': { gt: '1990' } } } } } - ], - { nested: { path: 'items', query: null } }, - { nested: { path: 'holdings', query: null } } + ] ] } } @@ -1208,9 +989,7 @@ const dateBeforeQuery = { query: { range: { 'dates.range': { lt: '1990' } } } } } - ], - { nested: { path: 'items', query: null } }, - { nested: { path: 'holdings', query: null } } + ] ] } } @@ -1231,9 +1010,7 @@ const dateBeforeOrOnQuery = { query: { range: { 'dates.range': { lte: '1990' } } } } } - ], - { nested: { path: 'items', query: null } }, - { nested: { path: 'holdings', query: null } } + ] ] } } @@ -1254,9 +1031,7 @@ const dateAfterOrOnQuery = { query: { range: { 'dates.range': { gte: '1990' } } } } } - ], - { nested: { path: 'items', query: null } }, - { nested: { path: 'holdings', query: null } } + ] ] } } @@ -1279,9 +1054,7 @@ const dateWithinQuery = { } } } - ], - { nested: { path: 'items', query: null } }, - { nested: { path: 'holdings', query: null } } + ] ] } } @@ -1304,9 +1077,7 @@ const dateEnclosesQuery = { } } } - ], - { nested: { path: 'items', query: null } }, - { nested: { path: 'holdings', query: null } } + ] ] } } @@ -1338,12 +1109,6 @@ const filterQuery = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } @@ -1393,12 +1158,6 @@ const exactMatchQuery = { } ] } - }, - { - nested: { path: 'items', query: { bool: { should: [] } } } - }, - { - nested: { path: 'holdings', query: { bool: { should: [] } } } } ] } From e0e8f6ba4eb3befa53259753fd0b0ca8252b4dee Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Mar 2026 15:49:37 -0400 Subject: [PATCH 51/54] Add reversing strings in nested array --- lib/elasticsearch/cql_grammar.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 87b8c042..84815c35 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -94,7 +94,7 @@ function parseWithRightCql (string) { function parsedASTtoNestedArray (ast) { if (!ast.type.includes('query')) { - return ast.text + return reverseString(ast.text) } const childTypes = [ From d65a3d377cd9d3d4844842c567d2814409ce1936 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 19 Mar 2026 16:56:39 -0400 Subject: [PATCH 52/54] Remove double reversing in display --- lib/elasticsearch/cql_grammar.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 84815c35..81f9aafc 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -124,7 +124,7 @@ function displayParsed (string) { ).join('\n') } } - return { parsed: parsedASTtoNestedArray(reverseAST(parsed)) } + return { parsed: parsedASTtoNestedArray(parsed) } } module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql, rightCqlParser, reverseString, displayParsed } From 330e170359e1af561c2309924f20a3ef9674cb50 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 20 Mar 2026 12:09:47 -0400 Subject: [PATCH 53/54] Fix double nesting of should array for dates --- lib/elasticsearch/cql_query_builder.js | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 87ef7f84..0f10f9bf 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -302,18 +302,16 @@ function dateQueries ({ fields, relation, terms, term }) { break } - return [ - { - nested: { - path: 'dates', - query: { - range: { - 'dates.range': range - } + return { + nested: { + path: 'dates', + query: { + range: { + 'dates.range': range } } } - ] + } } function termQuery (field, term) { From b0f75851200764b5bb3e924b3e67bb98ba345c1d Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 20 Mar 2026 12:12:15 -0400 Subject: [PATCH 54/54] Fix date test fixtures --- test/fixtures/cql_fixtures.js | 76 +++++++++++++++-------------------- 1 file changed, 32 insertions(+), 44 deletions(-) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index bc35e24a..552333c0 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -961,14 +961,12 @@ const dateAfterQuery = { { bool: { should: [ - [ - { - nested: { - path: 'dates', - query: { range: { 'dates.range': { gt: '1990' } } } - } + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { gt: '1990' } } } } - ] + } ] } } @@ -982,14 +980,12 @@ const dateBeforeQuery = { { bool: { should: [ - [ - { - nested: { - path: 'dates', - query: { range: { 'dates.range': { lt: '1990' } } } - } + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { lt: '1990' } } } } - ] + } ] } } @@ -1003,14 +999,12 @@ const dateBeforeOrOnQuery = { { bool: { should: [ - [ - { - nested: { - path: 'dates', - query: { range: { 'dates.range': { lte: '1990' } } } - } + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { lte: '1990' } } } } - ] + } ] } } @@ -1024,14 +1018,12 @@ const dateAfterOrOnQuery = { { bool: { should: [ - [ - { - nested: { - path: 'dates', - query: { range: { 'dates.range': { gte: '1990' } } } - } + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { gte: '1990' } } } } - ] + } ] } } @@ -1045,16 +1037,14 @@ const dateWithinQuery = { { bool: { should: [ - [ - { - nested: { - path: 'dates', - query: { - range: { 'dates.range': { gte: '1990', lte: '2000' } } - } + { + nested: { + path: 'dates', + query: { + range: { 'dates.range': { gte: '1990', lte: '2000' } } } } - ] + } ] } } @@ -1068,16 +1058,14 @@ const dateEnclosesQuery = { { bool: { should: [ - [ - { - nested: { - path: 'dates', - query: { - range: { 'dates.range': { gt: '1990', lt: '2000' } } - } + { + nested: { + path: 'dates', + query: { + range: { 'dates.range': { gt: '1990', lt: '2000' } } } } - ] + } ] } }