From d4e66c7d09c2d8647322ae4484372bd019eee008 Mon Sep 17 00:00:00 2001 From: danamansana Date: Wed, 1 Apr 2026 12:57:20 -0400 Subject: [PATCH 01/21] Fix tests for cql grammar --- lib/elasticsearch/cql_grammar.js | 5 ++++- test/cql_grammar.test.js | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 81f9aafc..9e8ced76 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -18,7 +18,8 @@ const leftCql = ` query ::= query whitespace connective whitespace sub_query | sub_query connective ::= "AND NOT" | "AND" | "OR" | "NOT" sub_query ::= atomic_query | "(" query ")" - atomic_query ::= scope relation quoted_term + atomic_query ::= scope relation search_term + search_term ::= quoted_term | word scope ::= scope_term whitespace | scope_term relation ::= relation_term whitespace | relation_term scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" @@ -56,6 +57,8 @@ function simplify (ast) { return ast.text case 'relation_term': return ast.text + case 'search_term': + return simplify(ast.children.find(child => child.type.includes('quoted_term') || child.type === 'word')) case 'quoted_term': return simplify(ast.children.find(child => child.type.includes('phrase'))) case 'phrase': { diff --git a/test/cql_grammar.test.js b/test/cql_grammar.test.js index 24d5d5d6..be1f6fa5 100644 --- a/test/cql_grammar.test.js +++ b/test/cql_grammar.test.js @@ -16,8 +16,8 @@ function validateAtomicQuery (parsed, scope, relation, quotedTerm) { const relationNode = atomicQuery.children.find(child => child.type === 'relation') const relationTerm = relationNode.children.find(child => child.type === 'relation_term') expect(relationTerm.text).to.equal(relation) - const quotedTermNode = atomicQuery.children.find(child => child.type === 'quoted_term') - expect(quotedTermNode.text).to.equal(quotedTerm) + const searchTermNode = atomicQuery.children.find(child => child.type === 'search_term') + expect(searchTermNode.text).to.equal(quotedTerm) } describe('CQL Grammar', function () { From 8adffea0c54cdabf51d742600d56e95efbdb3dcd Mon Sep 17 00:00:00 2001 From: danamansana Date: Wed, 1 Apr 2026 13:16:42 -0400 Subject: [PATCH 02/21] &ai Update grammar and tests - Grammar now allows for unquoted single word queries - Special characters are allowed only in quoted queries - Update tests to reflect this --- lib/elasticsearch/cql_grammar.js | 7 +++++-- lib/elasticsearch/cql_query_builder.js | 2 +- test/cql_grammar.test.js | 10 ++++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 9e8ced76..73f99cca 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -19,7 +19,7 @@ const leftCql = ` connective ::= "AND NOT" | "AND" | "OR" | "NOT" sub_query ::= atomic_query | "(" query ")" atomic_query ::= scope relation search_term - search_term ::= quoted_term | word + search_term ::= quoted_term | unquoted_word scope ::= scope_term whitespace | scope_term relation ::= relation_term whitespace | relation_term scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" @@ -29,6 +29,8 @@ const leftCql = ` whitespace ::= [#x20#x09#x0A#x0D]+ word ::= word escaped_char | word regular_char | escaped_char | regular_char regular_char ::= [^#x22#x5c#x20#x09#x0A#x0D] + unquoted_word ::= unquoted_word escaped_char | unquoted_word unquoted_char | escaped_char | unquoted_char + unquoted_char ::= [^#x22#x5c#x20#x09#x0A#x0D=<>()] escaped_char ::= slash char slash ::= [#x5c] char ::= [a-z]|[^a-z] @@ -58,7 +60,7 @@ function simplify (ast) { case 'relation_term': return ast.text case 'search_term': - return simplify(ast.children.find(child => child.type.includes('quoted_term') || child.type === 'word')) + return simplify(ast.children.find(child => child.type.includes('quoted_term') || child.type.includes('word'))) case 'quoted_term': return simplify(ast.children.find(child => child.type.includes('phrase'))) case 'phrase': { @@ -67,6 +69,7 @@ function simplify (ast) { return [simplify(word)].concat(phrase ? simplify(phrase) : []) } case 'word': + case 'unquoted_word': return ast.text default: break diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 0f10f9bf..db3392a3 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -100,7 +100,7 @@ function findTopPhrase (tree) { of word nodes for H, Ha, Ham, etc... */ function findTopWords (tree) { - if (tree.type === 'word') return [tree.text] + if (tree.type === 'word' || tree.type === 'unquoted_word') return [tree.text] return tree.children.map(child => findTopWords(child)).flat() } diff --git a/test/cql_grammar.test.js b/test/cql_grammar.test.js index be1f6fa5..014652c2 100644 --- a/test/cql_grammar.test.js +++ b/test/cql_grammar.test.js @@ -29,6 +29,16 @@ describe('CQL Grammar', function () { validateAtomicQuery(parseWithRightCql('subject all "hamlet shakespeare"'), 'subject', 'all', '"hamlet shakespeare"') }) + it('parses single-word atomic queries without quotes', function () { + validateAtomicQuery(parseWithRightCql('title=hamlet'), 'title', '=', 'hamlet') + }) + + it('parses quoted queries containing special characters', function () { + validateAtomicQuery(parseWithRightCql('title="hamlet=prince"'), 'title', '=', '"hamlet=prince"') + validateAtomicQuery(parseWithRightCql('date > "1990 > 1980"'), 'date', '>', '"1990 > 1980"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare (william)"'), 'author', 'adj', '"shakespeare (william)"') + }) + it('allows whitespace variants', function () { validateAtomicQuery(parseWithRightCql('title ="hamlet"'), 'title', '=', '"hamlet"') validateAtomicQuery(parseWithRightCql('title= "hamlet"'), 'title', '=', '"hamlet"') From 8bcab301bc8c9722e60afaa8bbc493a8f2757ad1 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 2 Apr 2026 11:51:26 -0400 Subject: [PATCH 03/21] &ai Add case insensitivity in grammar operators --- lib/elasticsearch/cql_grammar.js | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 73f99cca..fb80b4a0 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -36,8 +36,15 @@ const leftCql = ` char ::= [a-z]|[^a-z] quote ::= [#x22] ` +function makeCaseInsensitiveLiterals (grammar) { + // Transform literals (e.g. "and not") into case-insensitive EBNF matches + return grammar.replace(/"([a-zA-Z ]+)"/g, (match, p1) => { + return p1.split('').map(c => c === ' ' ? 'whitespace' : `[${c.toLowerCase()}${c.toUpperCase()}]`).join(' ') + }) +} -const rightCql = reverseGrammar(leftCql) +const processedLeftCql = makeCaseInsensitiveLiterals(leftCql) +const rightCql = reverseGrammar(processedLeftCql) function simplify (ast) { switch (ast.type) { From 6cac1c15f455f2b336a376df5b79d0cc33f333d5 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 2 Apr 2026 12:29:33 -0400 Subject: [PATCH 04/21] Add support for case-insensitive scope/connective/relation --- lib/elasticsearch/cql_grammar.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index fb80b4a0..1dbd6050 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -17,7 +17,7 @@ function reverseGrammar (grammar) { const leftCql = ` query ::= query whitespace connective whitespace sub_query | sub_query connective ::= "AND NOT" | "AND" | "OR" | "NOT" - sub_query ::= atomic_query | "(" query ")" + sub_query ::= atomic_query | lparen_space query rparen_space atomic_query ::= scope relation search_term search_term ::= quoted_term | unquoted_word scope ::= scope_term whitespace | scope_term @@ -35,6 +35,10 @@ const leftCql = ` slash ::= [#x5c] char ::= [a-z]|[^a-z] quote ::= [#x22] + lparen_space ::= lparen whitespace | lparen + rparen_space ::= whitespace rparen | rparen + lparen ::= [#x28] + rparen ::= [#x29] ` function makeCaseInsensitiveLiterals (grammar) { // Transform literals (e.g. "and not") into case-insensitive EBNF matches From 29351f85c74d6e312fbbeb1231bf932bed0dcfa3 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 2 Apr 2026 12:37:59 -0400 Subject: [PATCH 05/21] Add support for upper and lowercase in cql reserved terms --- lib/elasticsearch/cql_query_builder.js | 5 +++-- test/cql_query_builder.test.js | 7 +++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index db3392a3..b983e97a 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -52,6 +52,7 @@ function buildEsQueryFromTree (tree) { } function buildBoolean (operator, queries) { + operator = operator.toUpperCase() if (['NOT', 'AND NOT'].includes(operator)) return buildNegation(queries) const esOperator = operator === 'AND' ? 'must' : 'should' return { @@ -139,6 +140,8 @@ function hasFields (obj) { */ function buildAtomic ({ scope, relation, terms, term }) { + scope = scope.toLowerCase() + relation = relation.toLowerCase() const allFields = nestedFilterAndMap( indexMapping[scope], field => typeof field === 'string' || field.on(term), @@ -214,7 +217,6 @@ function buildAtomicMain ({ fields, relation, terms, term }) { } function anyAllQueries ({ fields, relation, terms }) { - if (!['any', 'all'].includes(relation)) { return null } const operator = (relation === 'any' ? 'should' : 'must') return { bool: { @@ -224,7 +226,6 @@ function anyAllQueries ({ fields, relation, terms }) { } function adjEqQueries ({ fields, relation, terms, term }) { - if (!['=', '==', 'adj'].includes(relation)) { return null } const type = (relation === '==') ? 'exact' : 'phrase' return matchTermWithFields(fields, term, type) } diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index cf856b5c..7d122b69 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -196,4 +196,11 @@ describe('CQL Query Builder', function () { exactMatchQuery ) }) + + it('Handles query with funny casing', function () { + expect(buildEsQuery('AuThOr = "Shakespeare" aNd LaNgUaGe = "English"')) + .to.deep.equal( + binaryBooleanQuery + ) + }) }) From 670561c490510dce27c484b3170a32050cd398d3 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 2 Apr 2026 12:53:29 -0400 Subject: [PATCH 06/21] Add CqlQuery class --- lib/elasticsearch/cql_grammar.js | 16 +------ lib/elasticsearch/cql_query_builder.js | 49 +++++++++++++++------ lib/resources.js | 17 ++++---- test/cql_query_builder.test.js | 59 ++++++++++++++------------ 4 files changed, 80 insertions(+), 61 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 1dbd6050..9efe2415 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -130,18 +130,4 @@ function parsedASTtoNestedArray (ast) { return children } -// we need to reverse the error message since `parseWithRightCql` doesn't -function displayParsed (string) { - const parsed = parseWithRightCql(string) - if (!parsed) return {} - if (parsed.errors.length) { - return { - error: parsed.errors.map(error => - `Parsing error likely near end of "${reverseString(error.token.rest)}"` - ).join('\n') - } - } - return { parsed: parsedASTtoNestedArray(parsed) } -} - -module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql, rightCqlParser, reverseString, displayParsed } +module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql, rightCqlParser, reverseString, parsedASTtoNestedArray } diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index b983e97a..385010e9 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -1,18 +1,43 @@ -const { parseWithRightCql } = require('./cql_grammar') +const { parseWithRightCql, reverseString, parsedASTtoNestedArray } = require('./cql_grammar') const { indexMapping } = require('./cql/index-mapping') const ElasticQueryBuilder = require('./elastic-query-builder') -function buildEsQuery (cqlQuery, request = null) { - const filterQuery = buildFilterQuery(request) - return { - bool: { - should: [ - buildEsQueryFromTree( - parseWithRightCql(cqlQuery.trim()) - ) - ], - ...filterQuery +class CqlQuery { + constructor (queryStr) { + this.queryStr = (queryStr || '').trim() + this.parsedAST = null + } + + parse () { + if (!this.parsedAST) { + this.parsedAST = parseWithRightCql(this.queryStr) + } + return this.parsedAST + } + + buildEsQuery (request = null) { + const filterQuery = buildFilterQuery(request) + return { + bool: { + should: [ + buildEsQueryFromTree(this.parse(), this.queryStr) + ], + ...filterQuery + } + } + } + + displayParsed () { + const parsed = this.parse() + if (!parsed) return {} + if (parsed.errors && parsed.errors.length) { + return { + error: parsed.errors.map(error => + `Parsing error likely near end of "${reverseString(error.token.rest)}"` + ).join('\n') + } } + return { parsed: parsedASTtoNestedArray(parsed) } } } @@ -336,7 +361,7 @@ function multiMatch (fields, term, type) { } module.exports = { - buildEsQuery, + CqlQuery, buildEsQueryFromTree, buildBoolean, buildAtomic, diff --git a/lib/resources.js b/lib/resources.js index 5c6dd4e9..95ad8774 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -19,8 +19,7 @@ const { parseParams, deepValue } = require('../lib/util') const ApiRequest = require('./api-request') const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') -const cqlQueryBuilder = require('./elasticsearch/cql_query_builder') -const { displayParsed } = require('./elasticsearch/cql_grammar') +const { CqlQuery } = require('./elasticsearch/cql_query_builder') const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC } = require('./elasticsearch/config') const errors = require('./errors') @@ -651,10 +650,12 @@ module.exports = function (app, _private = null) { app.logger.debug('Parsed params: ', params) let parsed = {} + let cqlQuery = null if (params.search_scope === 'cql') { + cqlQuery = new CqlQuery(params.q) try { - parsed = displayParsed(params.q) // ? + parsed = cqlQuery.displayParsed() // ? } catch (e) { throw new IndexSearchError('Unknown parsing error. Error most likely near end of string') } @@ -666,7 +667,7 @@ module.exports = function (app, _private = null) { } } - let body = buildElasticBody(params) + let body = buildElasticBody(params, cqlQuery) // Strip unnecessary _source fields body._source = { @@ -878,13 +879,13 @@ module.exports = function (app, _private = null) { * * @return {object} An object that can be posted directly to ES */ -const buildElasticBody = function (params) { +const buildElasticBody = function (params, cqlQuery = null) { const body = { from: (params.per_page * (params.page - 1)), size: params.per_page } - body.query = buildElasticQuery(params) + body.query = buildElasticQuery(params, cqlQuery) // Apply sort: let direction @@ -910,10 +911,10 @@ const buildElasticBody = function (params) { * * @return {object} ES query object suitable to be POST'd to ES endpoint */ -const buildElasticQuery = function (params) { +const buildElasticQuery = function (params, cqlQuery = null) { const request = ApiRequest.fromParams(params) if (params.search_scope === 'cql') { - const query = cqlQueryBuilder.buildEsQuery(params.q, request) + const query = (cqlQuery || new CqlQuery(params.q)).buildEsQuery(request) return query } diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 7d122b69..e1ae789b 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -1,6 +1,6 @@ const { expect } = require('chai') -const { buildEsQuery } = require('../lib/elasticsearch/cql_query_builder') +const { CqlQuery } = require('../lib/elasticsearch/cql_query_builder') const ApiRequest = require('../lib/api-request') const { simpleAdjQuery, @@ -29,154 +29,161 @@ const { describe('CQL Query Builder', function () { it('Simple = query', function () { - expect(buildEsQuery('title="Hamlet"')) + expect(new CqlQuery('title="Hamlet"').buildEsQuery()) .to.deep.equal( simpleAdjQuery ) }) it('Simple adj query', function () { - expect(buildEsQuery('title adj "Hamlet"')) + expect(new CqlQuery('title adj "Hamlet"').buildEsQuery()) .to.deep.equal( simpleAdjQuery ) }) it('Multi-word adj query', function () { - expect(buildEsQuery('title adj "Hamlet, Prince"')) + expect(new CqlQuery('title adj "Hamlet, Prince"').buildEsQuery()) .to.deep.equal( multiAdjQuery ) }) it('Simple any query', function () { - expect(buildEsQuery('title any "Hamlet Othello"')) + expect(new CqlQuery('title any "Hamlet Othello"').buildEsQuery()) .to.deep.equal( simpleAnyQuery ) }) it('Simple all query', function () { - expect(buildEsQuery('title all "Hamlet Othello"')) + expect(new CqlQuery('title all "Hamlet Othello"').buildEsQuery()) .to.deep.equal( simpleAllQuery ) }) it('Prefix phrase query', function () { - expect(buildEsQuery('title = "^The Tragedy of Hamlet, Prince of Denmark"')) + expect(new CqlQuery('title = "^The Tragedy of Hamlet, Prince of Denmark"').buildEsQuery()) .to.deep.equal( prefixPhraseQuery ) }) it('Prefix queries mixed into any query', function () { - expect(buildEsQuery('title any "^Tragedy ^Comedy Hamlet Othello"')) + expect(new CqlQuery('title any "^Tragedy ^Comedy Hamlet Othello"').buildEsQuery()) .to.deep.equal( anyWithPrefixQuery ) }) it('Keyword query for barcode', function () { - expect(buildEsQuery('keyword = "123456"')) + expect(new CqlQuery('keyword = "123456"').buildEsQuery()) .to.deep.equal( keywordQueryForBarcode ) }) it('Keyword query for shelfMark', function () { - expect(buildEsQuery('keyword = "B 12"')) + expect(new CqlQuery('keyword = "B 12"').buildEsQuery()) .to.deep.equal( keywordQueryForShelfMark ) }) it('Keyword query for general term', function () { - expect(buildEsQuery('keyword = "Hamlet"')) + expect(new CqlQuery('keyword = "Hamlet"').buildEsQuery()) .to.deep.equal( keywordQueryForGeneralTerm ) }) it('Identifier query', function () { - expect(buildEsQuery('identifier = "b1234"')) + expect(new CqlQuery('identifier = "b1234"').buildEsQuery()) .to.deep.equal( identifierQuery ) }) it('Binary boolean query', function () { - expect(buildEsQuery('author = "Shakespeare" AND language = "English"')) + expect(new CqlQuery('author = "Shakespeare" AND language = "English"').buildEsQuery()) .to.deep.equal( binaryBooleanQuery ) }) it('Ternary boolean query', function () { - expect(buildEsQuery('author = "Shakespeare" AND language = "English" OR genre = "tragedy"')) + expect(new CqlQuery('author = "Shakespeare" AND language = "English" OR genre = "tragedy"').buildEsQuery()) .to.deep.equal( ternaryBooleanQuery ) }) it('Boolean query with parentheses', function () { - expect(buildEsQuery('author = "Shakespeare" AND (language = "English" OR genre = "tragedy")')) + expect(new CqlQuery('author = "Shakespeare" AND (language = "English" OR genre = "tragedy")').buildEsQuery()) + .to.deep.equal( + queryWithParentheses + ) + }) + + it('Boolean query with parentheses and whitespace', function () { + expect(new CqlQuery('author = "Shakespeare" AND ( language = "English" OR genre = "tragedy" )').buildEsQuery()) .to.deep.equal( queryWithParentheses ) }) it('Query with NOT', function () { - expect(buildEsQuery('author = "Shakespeare" NOT language = "English"')) + expect(new CqlQuery('author = "Shakespeare" NOT language = "English"').buildEsQuery()) .to.deep.equal( negationQuery ) }) it('Query with AND NOT', function () { - expect(buildEsQuery('author = "Shakespeare" AND NOT language = "English"')) + expect(new CqlQuery('author = "Shakespeare" AND NOT language = "English"').buildEsQuery()) .to.deep.equal( negationQuery ) }) it('Date after query', function () { - expect(buildEsQuery('date > "1990"')) + expect(new CqlQuery('date > "1990"').buildEsQuery()) .to.deep.equal( dateAfterQuery ) }) it('Date after or on query', function () { - expect(buildEsQuery('date >= "1990"')) + expect(new CqlQuery('date >= "1990"').buildEsQuery()) .to.deep.equal( dateAfterOrOnQuery ) }) it('Date before query', function () { - expect(buildEsQuery('date < "1990"')) + expect(new CqlQuery('date < "1990"').buildEsQuery()) .to.deep.equal( dateBeforeQuery ) }) it('Date dateBeforeOrOnQuery query', function () { - expect(buildEsQuery('date <= "1990"')) + expect(new CqlQuery('date <= "1990"').buildEsQuery()) .to.deep.equal( dateBeforeOrOnQuery ) }) it('Date within query', function () { - expect(buildEsQuery('date within "1990 2000"')) + expect(new CqlQuery('date within "1990 2000"').buildEsQuery()) .to.deep.equal( dateWithinQuery ) }) it('Date encloses query', function () { - expect(buildEsQuery('date encloses "1990 2000"')) + expect(new CqlQuery('date encloses "1990 2000"').buildEsQuery()) .to.deep.equal( dateEnclosesQuery ) @@ -184,21 +191,21 @@ describe('CQL Query Builder', function () { it('Query with applied filters', function () { const apiRequest = new ApiRequest({ filters: { language: ['Klingon'] }, search_scope: 'cql' }) - expect(buildEsQuery('author="Shakespeare"', apiRequest)) + expect(new CqlQuery('author="Shakespeare"').buildEsQuery(apiRequest)) .to.deep.equal( filterQuery ) }) it('Exact match query', function () { - expect(buildEsQuery('author == "William Shakespeare"')) + expect(new CqlQuery('author == "William Shakespeare"').buildEsQuery()) .to.deep.equal( exactMatchQuery ) }) it('Handles query with funny casing', function () { - expect(buildEsQuery('AuThOr = "Shakespeare" aNd LaNgUaGe = "English"')) + expect(new CqlQuery('AuThOr = "Shakespeare" aNd LaNgUaGe = "English"').buildEsQuery()) .to.deep.equal( binaryBooleanQuery ) From 47232331eb869cd85e35855cddc6ae6287803eed Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 2 Apr 2026 12:54:39 -0400 Subject: [PATCH 07/21] Add more whitespace to whitespace test --- test/cql_query_builder.test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index e1ae789b..3f3d4031 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -127,7 +127,7 @@ describe('CQL Query Builder', function () { }) it('Boolean query with parentheses and whitespace', function () { - expect(new CqlQuery('author = "Shakespeare" AND ( language = "English" OR genre = "tragedy" )').buildEsQuery()) + expect(new CqlQuery(' author = "Shakespeare" AND ( language = "English" OR genre = "tragedy" ) ').buildEsQuery()) .to.deep.equal( queryWithParentheses ) From dae24b415427656c6ee588b61e1aaf4690d1fe97 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 2 Apr 2026 13:10:06 -0400 Subject: [PATCH 08/21] Add unquoted_words as top-level phrases --- lib/elasticsearch/cql_query_builder.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 385010e9..522a18da 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -115,7 +115,7 @@ function atomicQueryParams (atomicQuery) { for Hamlet Shakespeare, Hamlet, and Shakespeare, and this will return Hamlet Shakespeare */ function findTopPhrase (tree) { - if (tree.type === 'phrase') return tree.text + if (tree.type === 'phrase' || tree.type === 'unquoted_word') return tree.text const topPhrases = tree.children.map(child => findTopPhrase(child)).filter(x => x) return topPhrases.length ? topPhrases[0] : null } From da35b34e89e8fce8dcb204983fdaf1399dfcb7aa Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 2 Apr 2026 13:11:50 -0400 Subject: [PATCH 09/21] Add test for single word query without quotes --- test/cql_query_builder.test.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 3f3d4031..45a8885b 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -35,6 +35,13 @@ describe('CQL Query Builder', function () { ) }) + it('Simple = query without quotes', function () { + expect(new CqlQuery('title=Hamlet').buildEsQuery()) + .to.deep.equal( + simpleAdjQuery + ) + }) + it('Simple adj query', function () { expect(new CqlQuery('title adj "Hamlet"').buildEsQuery()) .to.deep.equal( From 79ae5eb598e7cbe8b26a8e35b85c95f490d0e09b Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 2 Apr 2026 13:27:19 -0400 Subject: [PATCH 10/21] Add date validation --- lib/elasticsearch/cql_query_builder.js | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 522a18da..32985b2e 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -1,6 +1,7 @@ const { parseWithRightCql, reverseString, parsedASTtoNestedArray } = require('./cql_grammar') const { indexMapping } = require('./cql/index-mapping') const ElasticQueryBuilder = require('./elastic-query-builder') +const { InvalidParameterError } = require('../errors') class CqlQuery { constructor (queryStr) { @@ -167,6 +168,14 @@ function hasFields (obj) { function buildAtomic ({ scope, relation, terms, term }) { scope = scope.toLowerCase() relation = relation.toLowerCase() + + if (scope === 'date') { + const dateRegex = /^\d{4}(?:[-/]\d{2})?(?:[-/]\d{2})?$/ + if (!terms.every(t => dateRegex.test(t))) { + throw new InvalidParameterError('Dates must be of the form YYYY, YYYY/MM, or YYYY/MM/DD ') + } + } + const allFields = nestedFilterAndMap( indexMapping[scope], field => typeof field === 'string' || field.on(term), @@ -328,6 +337,8 @@ function dateQueries ({ fields, relation, terms, term }) { break } + + return { nested: { path: 'dates', From dc4b62da6aa89f216f40dd3f918c53c9f7c8f322 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 2 Apr 2026 13:29:09 -0400 Subject: [PATCH 11/21] &ai add test for date validation --- test/cql_query_builder.test.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 45a8885b..85e85673 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -2,6 +2,7 @@ const { expect } = require('chai') const { CqlQuery } = require('../lib/elasticsearch/cql_query_builder') const ApiRequest = require('../lib/api-request') +const { InvalidParameterError } = require('../lib/errors') const { simpleAdjQuery, simpleAnyQuery, @@ -196,6 +197,12 @@ describe('CQL Query Builder', function () { ) }) + it('Throws InvalidParameterError for invalid date formats', function () { + expect(() => new CqlQuery('date > "199"').buildEsQuery()).to.throw(InvalidParameterError, 'Dates must be of the form YYYY, YYYY/MM, or YYYY/MM/DD ') + expect(() => new CqlQuery('date > "1990/1"').buildEsQuery()).to.throw(InvalidParameterError, 'Dates must be of the form YYYY, YYYY/MM, or YYYY/MM/DD ') + expect(() => new CqlQuery('date > "not-a-date"').buildEsQuery()).to.throw(InvalidParameterError, 'Dates must be of the form YYYY, YYYY/MM, or YYYY/MM/DD ') + }) + it('Query with applied filters', function () { const apiRequest = new ApiRequest({ filters: { language: ['Klingon'] }, search_scope: 'cql' }) expect(new CqlQuery('author="Shakespeare"').buildEsQuery(apiRequest)) From 230731a377748152f2ff73974c9431a135efee60 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 2 Apr 2026 13:38:37 -0400 Subject: [PATCH 12/21] &ai Add tests for displayParsed and fix some bugs --- lib/elasticsearch/cql_grammar.js | 4 ++-- lib/elasticsearch/cql_query_builder.js | 2 +- test/cql_query_builder.test.js | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 9efe2415..9042e71c 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -111,12 +111,12 @@ function parseWithRightCql (string) { function parsedASTtoNestedArray (ast) { if (!ast.type.includes('query')) { - return reverseString(ast.text) + return ast.text.trim() } const childTypes = [ 'atomic_query', 'sub_query', 'query', 'connective', - 'scope', 'relation', 'quoted_term' + 'scope', 'relation', 'search_term' ] const children = ast.children diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 32985b2e..ee9d7271 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -30,7 +30,7 @@ class CqlQuery { displayParsed () { const parsed = this.parse() - if (!parsed) return {} + if (!parsed) return { error: 'Unknown parsing error. Error most likely near end of string' } if (parsed.errors && parsed.errors.length) { return { error: parsed.errors.map(error => diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 85e85673..6b811a56 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -224,4 +224,20 @@ describe('CQL Query Builder', function () { binaryBooleanQuery ) }) + + describe('displayParsed', function () { + it('returns parsed AST array for valid queries', function () { + const result = new CqlQuery('title="Hamlet"').displayParsed() + expect(result).to.have.property('parsed') + expect(result).to.not.have.property('error') + expect(result.parsed).to.deep.equal(['title', '=', '"Hamlet"']) + }) + + it('returns error message for invalid queries', function () { + const result = new CqlQuery('title="Hamlet" AND').displayParsed() + expect(result).to.have.property('error') + expect(result).to.not.have.property('parsed') + expect(result.error).to.include('parsing error') + }) + }) }) From 5c6542e171d3b7eb16c4a8855e4adda704fe2cdb Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 2 Apr 2026 13:42:54 -0400 Subject: [PATCH 13/21] &ai add some more tests for displayParsed --- test/cql_query_builder.test.js | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index 6b811a56..d87cc195 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -233,11 +233,33 @@ describe('CQL Query Builder', function () { expect(result.parsed).to.deep.equal(['title', '=', '"Hamlet"']) }) + it('returns parsed AST array for complex queries', function () { + const result = new CqlQuery('author="Shakespeare" AND (language="English" OR genre="tragedy")').displayParsed() + expect(result).to.have.property('parsed') + expect(result).to.not.have.property('error') + expect(result.parsed).to.deep.equal([ + ['author', '=', '"Shakespeare"'], + 'AND', + [ + ['language', '=', '"English"'], + 'OR', + ['genre', '=', '"tragedy"'] + ] + ]) + }) + it('returns error message for invalid queries', function () { const result = new CqlQuery('title="Hamlet" AND').displayParsed() expect(result).to.have.property('error') expect(result).to.not.have.property('parsed') expect(result.error).to.include('parsing error') }) + + it('returns specific error message for partially valid queries', function () { + const result = new CqlQuery('badscope="Hamlet" AND title="Dogs"').displayParsed() + expect(result).to.have.property('error') + expect(result).to.not.have.property('parsed') + expect(result.error).to.include('Parsing error likely near end of') + }) }) }) From 3b49d335dd258898956b0c7bd3817939cf1dd347 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 2 Apr 2026 13:44:48 -0400 Subject: [PATCH 14/21] &ai fix linting --- lib/elasticsearch/cql_query_builder.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index ee9d7271..71797d8f 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -337,8 +337,6 @@ function dateQueries ({ fields, relation, terms, term }) { break } - - return { nested: { path: 'dates', From 304c191a0c4b37ffc73f6d544cea10c0cfec8691 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 2 Apr 2026 13:56:42 -0400 Subject: [PATCH 15/21] Remove cql check for addInnerHits --- .python-version | 1 + lib/resources.js | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) create mode 100644 .python-version diff --git a/.python-version b/.python-version new file mode 100644 index 00000000..c8cfe395 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.10 diff --git a/lib/resources.js b/lib/resources.js index 95ad8774..6de1392b 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -674,9 +674,7 @@ module.exports = function (app, _private = null) { excludes: EXCLUDE_FIELDS.concat(['items']) } - if (params.search_scope !== 'cql') { - body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) - } + body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) app.logger.debug('Resources#search', RESOURCES_INDEX, body) From e335227d315ca1394484c7f7ac4df33eaabe6f77 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 2 Apr 2026 15:23:13 -0400 Subject: [PATCH 16/21] Remove python version --- .python-version | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .python-version diff --git a/.python-version b/.python-version deleted file mode 100644 index c8cfe395..00000000 --- a/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.10 From ecb35d01fc259fc9456ae2eb3f54b61fd3dbad47 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 2 Apr 2026 16:25:34 -0400 Subject: [PATCH 17/21] &ai Fix top-level should to must --- lib/elasticsearch/cql_query_builder.js | 2 +- test/fixtures/cql_fixtures.js | 44 +++++++++++++------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index 71797d8f..f8470d7c 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -20,7 +20,7 @@ class CqlQuery { const filterQuery = buildFilterQuery(request) return { bool: { - should: [ + must: [ buildEsQueryFromTree(this.parse(), this.queryStr) ], ...filterQuery diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js index 552333c0..cce72afc 100644 --- a/test/fixtures/cql_fixtures.js +++ b/test/fixtures/cql_fixtures.js @@ -1,6 +1,6 @@ const simpleAdjQuery = { bool: { - should: [ + must: [ { bool: { should: [ @@ -43,7 +43,7 @@ const simpleAdjQuery = { const multiAdjQuery = { bool: { - should: [ + must: [ { bool: { should: [ @@ -86,7 +86,7 @@ const multiAdjQuery = { const prefixPhraseQuery = { bool: { - should: [ + must: [ { bool: { should: [ @@ -160,7 +160,7 @@ const prefixPhraseQuery = { const simpleAnyQuery = { bool: { - should: [ + must: [ { bool: { should: [ @@ -239,7 +239,7 @@ const simpleAnyQuery = { const anyWithPrefixQuery = { bool: { - should: [ + must: [ { bool: { should: [ @@ -382,7 +382,7 @@ const anyWithPrefixQuery = { const simpleAllQuery = { bool: { - should: [ + must: [ { bool: { should: [ @@ -461,7 +461,7 @@ const simpleAllQuery = { const keywordQueryForBarcode = { bool: { - should: [ + must: [ { bool: { should: [ @@ -527,7 +527,7 @@ const keywordQueryForBarcode = { const keywordQueryForShelfMark = { bool: { - should: [ + must: [ { bool: { should: [ @@ -601,7 +601,7 @@ const keywordQueryForShelfMark = { const keywordQueryForGeneralTerm = { bool: { - should: [ + must: [ { bool: { should: [ @@ -657,7 +657,7 @@ const keywordQueryForGeneralTerm = { const identifierQuery = { bool: { - should: [ + must: [ { bool: { should: [ @@ -697,7 +697,7 @@ const identifierQuery = { const binaryBooleanQuery = { bool: { - should: [ + must: [ { bool: { must: [ @@ -749,7 +749,7 @@ const binaryBooleanQuery = { const ternaryBooleanQuery = { bool: { - should: [ + must: [ { bool: { should: [ @@ -826,7 +826,7 @@ const ternaryBooleanQuery = { const queryWithParentheses = { bool: { - should: [ + must: [ { bool: { must: [ @@ -903,7 +903,7 @@ const queryWithParentheses = { const negationQuery = { bool: { - should: [ + must: [ { bool: { must: [ @@ -957,7 +957,7 @@ const negationQuery = { const dateAfterQuery = { bool: { - should: [ + must: [ { bool: { should: [ @@ -976,7 +976,7 @@ const dateAfterQuery = { const dateBeforeQuery = { bool: { - should: [ + must: [ { bool: { should: [ @@ -995,7 +995,7 @@ const dateBeforeQuery = { const dateBeforeOrOnQuery = { bool: { - should: [ + must: [ { bool: { should: [ @@ -1014,7 +1014,7 @@ const dateBeforeOrOnQuery = { const dateAfterOrOnQuery = { bool: { - should: [ + must: [ { bool: { should: [ @@ -1033,7 +1033,7 @@ const dateAfterOrOnQuery = { const dateWithinQuery = { bool: { - should: [ + must: [ { bool: { should: [ @@ -1054,7 +1054,7 @@ const dateWithinQuery = { const dateEnclosesQuery = { bool: { - should: [ + must: [ { bool: { should: [ @@ -1075,7 +1075,7 @@ const dateEnclosesQuery = { const filterQuery = { bool: { - should: [ + must: [ { bool: { should: [ @@ -1117,7 +1117,7 @@ const filterQuery = { const exactMatchQuery = { bool: { - should: [ + must: [ { bool: { should: [ From 48392a0779e48100428515a5f6ddbe2a50a19c79 Mon Sep 17 00:00:00 2001 From: danamansana Date: Thu, 2 Apr 2026 17:01:18 -0400 Subject: [PATCH 18/21] Remove extraneous comment --- lib/resources.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/resources.js b/lib/resources.js index 6de1392b..4cd606d6 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -655,7 +655,7 @@ module.exports = function (app, _private = null) { if (params.search_scope === 'cql') { cqlQuery = new CqlQuery(params.q) try { - parsed = cqlQuery.displayParsed() // ? + parsed = cqlQuery.displayParsed() } catch (e) { throw new IndexSearchError('Unknown parsing error. Error most likely near end of string') } From 19b2e6689a06cdb4400287dc49144ef3c86477d3 Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 3 Apr 2026 11:02:04 -0400 Subject: [PATCH 19/21] Add trimming of whitespace within query terms --- lib/elasticsearch/cql_grammar.js | 9 ++++++--- lib/elasticsearch/cql_query_builder.js | 4 ++-- test/cql_query_builder.test.js | 7 +++++++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js index 9042e71c..eb6efa86 100644 --- a/lib/elasticsearch/cql_grammar.js +++ b/lib/elasticsearch/cql_grammar.js @@ -25,7 +25,8 @@ const leftCql = ` scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "==" | "=" | "within" | "encloses" quoted_term ::= quote phrase quote - phrase ::= phrase whitespace word | word + phrase ::= phrase whitespace_or_word | whitespace_or_word + whitespace_or_word ::= whitespace | word whitespace ::= [#x20#x09#x0A#x0D]+ word ::= word escaped_char | word regular_char | escaped_char | regular_char regular_char ::= [^#x22#x5c#x20#x09#x0A#x0D] @@ -75,10 +76,12 @@ function simplify (ast) { case 'quoted_term': return simplify(ast.children.find(child => child.type.includes('phrase'))) case 'phrase': { - const word = ast.children.find(child => child.type === 'word') + const word = ast.children.find(child => child.type === 'whitespace_or_word') const phrase = ast.children.find(child => child.type === 'phrase') - return [simplify(word)].concat(phrase ? simplify(phrase) : []) + return [simplify(word)].filter(x => x).concat(phrase ? simplify(phrase) : []) } + case 'whitespace_or_word': + return simplify(ast.children.find(child => child.type === 'word')) case 'word': case 'unquoted_word': return ast.text diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js index f8470d7c..58fb7a27 100644 --- a/lib/elasticsearch/cql_query_builder.js +++ b/lib/elasticsearch/cql_query_builder.js @@ -116,7 +116,7 @@ function atomicQueryParams (atomicQuery) { for Hamlet Shakespeare, Hamlet, and Shakespeare, and this will return Hamlet Shakespeare */ function findTopPhrase (tree) { - if (tree.type === 'phrase' || tree.type === 'unquoted_word') return tree.text + if (tree.type === 'phrase' || tree.type === 'unquoted_word') return tree.text.trim() const topPhrases = tree.children.map(child => findTopPhrase(child)).filter(x => x) return topPhrases.length ? topPhrases[0] : null } @@ -127,7 +127,7 @@ function findTopPhrase (tree) { of word nodes for H, Ha, Ham, etc... */ function findTopWords (tree) { - if (tree.type === 'word' || tree.type === 'unquoted_word') return [tree.text] + if (tree.type === 'word' || tree.type === 'unquoted_word') return [tree.text.trim()] return tree.children.map(child => findTopWords(child)).flat() } diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js index d87cc195..88515494 100644 --- a/test/cql_query_builder.test.js +++ b/test/cql_query_builder.test.js @@ -36,6 +36,13 @@ describe('CQL Query Builder', function () { ) }) + it('Trims whitespace in query terms', function () { + expect(new CqlQuery('title=" Hamlet "').buildEsQuery()) + .to.deep.equal( + simpleAdjQuery + ) + }) + it('Simple = query without quotes', function () { expect(new CqlQuery('title=Hamlet').buildEsQuery()) .to.deep.equal( From a9240d677de1ef3d86766e82ced7a2dd36d077ad Mon Sep 17 00:00:00 2001 From: danamansana Date: Fri, 17 Apr 2026 13:46:11 -0400 Subject: [PATCH 20/21] Add config to exclude marcTag 340 subfield a --- data/annotated-marc-rules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/annotated-marc-rules.json b/data/annotated-marc-rules.json index 467d3694..26a5eaff 100644 --- a/data/annotated-marc-rules.json +++ b/data/annotated-marc-rules.json @@ -625,7 +625,7 @@ "marcIndicatorRegExp": "^340", "subfieldSpec": { "subfields": [ - "6" + "6", "2" ], "directive": "exclude" }, From 32caebf1ed5492b374f5a1ce2000bc9eafe72251 Mon Sep 17 00:00:00 2001 From: danamansana Date: Tue, 21 Apr 2026 10:28:10 -0400 Subject: [PATCH 21/21] Add comment about marc rules fork - Add comment to update-annotated-marc-rules script noting that the marc rules have been forked from webpub.def --- scripts/update-annotated-marc-rules.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/update-annotated-marc-rules.js b/scripts/update-annotated-marc-rules.js index 89047644..5626aafb 100755 --- a/scripts/update-annotated-marc-rules.js +++ b/scripts/update-annotated-marc-rules.js @@ -1,6 +1,8 @@ #!/usr/bin/env node /** * This file rebuilds data/annotated-marc-rules.json from data/webpub.def + * Note that currently data/annotated-marc-rules.json intentionally diverges + * from webpub.def, in particular 340 excludes subfield 2 * * Webpub.def is a Sierra configuration file, which controls how specific marc * fields are rendered in the catalog. We use it to build our own "annotated-