diff --git a/.github/workflows/test-and-deploy.yml b/.github/workflows/test-and-deploy.yml index a179ba4b..5f46adb2 100644 --- a/.github/workflows/test-and-deploy.yml +++ b/.github/workflows/test-and-deploy.yml @@ -20,6 +20,37 @@ jobs: run: npm ci - name: Unit Tests run: npm test + integration-test-qa: + permissions: + id-token: write + contents: read + runs-on: ubuntu-latest + needs: + - tests + if: github.ref == 'refs/heads/qa' + steps: + - uses: actions/checkout@v4 + - name: Set Node version + uses: actions/setup-node@v4 + with: + node-version-file: '.nvmrc' + - name: Install dependencies + run: npm ci + - name: Start service + run: ENV=qa npm start & + - name: Run tests + run: node test/integration/delivery-locations-by-barcode.test.js + deploy-qa: + permissions: + id-token: write + contents: read + runs-on: ubuntu-latest + needs: + - tests + if: github.ref == 'refs/heads/qa' + steps: + - name: Checkout repo + uses: actions/checkout@v3 deploy: permissions: diff --git a/lib/elasticsearch/config.js b/lib/elasticsearch/config.js index bc9f9ec7..d2a3e0a2 100644 --- a/lib/elasticsearch/config.js +++ b/lib/elasticsearch/config.js @@ -73,7 +73,8 @@ const SEARCH_SCOPES = { }, standard_number: { // We do custom field matching for this search-scope - } + }, + cql: {} // see cql/index_mapping for this search scope } const FILTER_CONFIG = { @@ -127,8 +128,47 @@ const AGGREGATIONS_SPEC = { collection: { terms: { field: 'collectionIds' } } } +const ITEM_FILTER_AGGREGATIONS = { + item_location: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.holdingLocation_packed' } } } }, + item_status: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.status_packed' } } } }, + item_format: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.formatLiteral' } } } } +} + +// Configure sort fields: +const SORT_FIELDS = { + title: { + initialDirection: 'asc', + field: 'title_sort' + }, + date: { + initialDirection: 'desc', + field: 'dateStartYear' + }, + creator: { + initialDirection: 'asc', + field: 'creator_sort' + }, + relevance: {} +} + +// The following fields can be excluded from ES responses because we don't pass them to client: +const EXCLUDE_FIELDS = [ + 'uris', + '*_packed', + '*_sort', + 'items.*_packed', + 'contentsTitle', + 'suppressed', + // Hide contributor and creator transformed fields: + '*WithoutDates', + '*Normalized' +] + module.exports = { SEARCH_SCOPES, FILTER_CONFIG, - AGGREGATIONS_SPEC + AGGREGATIONS_SPEC, + ITEM_FILTER_AGGREGATIONS, + EXCLUDE_FIELDS, + SORT_FIELDS } diff --git a/lib/elasticsearch/cql/index-mapping.js b/lib/elasticsearch/cql/index-mapping.js new file mode 100644 index 00000000..0befbc87 --- /dev/null +++ b/lib/elasticsearch/cql/index-mapping.js @@ -0,0 +1,138 @@ +const indexMapping = { + keyword: { + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded', + // Try to detect shelfmark searches (e.g. JFD 16-5143) + { field: 'items.shelfMark', on: (q) => /^[A-Z]{1,3} \d{2,}/.test(q) } + ], + exact_fields: [ + 'title.keywordLowercasedStripped', + // missing description + 'subjectLiteral.raw', + 'creatorLiteral.keywordLowercased', + 'contributorLiteral.keywordLowercased', + // note.label is missing + 'publisherLiteral.raw', + 'seriesStatement.raw', + 'titleAlt.raw', + // titleDisplay missing + // contentsTitle missing + // tableOfContents missing + 'genreForm.raw', + 'donor.raw', + // parallelTitle missing + // parallelTitleDisplay missing + 'parallelTitleAlt.raw', + 'parallelSeriesStatement.raw', + 'parallelCreatorLiteral.raw', + // parallelPublisher/parallelPublisherLiteral missing + 'uniformTitle.raw', + 'parallelUniformTitle.raw', + // formerTitle missing + 'addedAuthorTitle.raw', + 'placeOfPublication', + { field: 'items.shelfMark.raw', on: (q) => /^[A-Z]{1,3} \d{2,}/.test(q) } + ], + term: [ + { field: 'items.idBarcode', on: (q) => /\d{6,}/.test(q) } + ] + }, + title: { + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + exact_fields: [ + 'title.keywordLowercasedStripped', + 'seriesStatement.raw', + 'titleAlt.raw', + // titleDisplay missing + // contentsTitle missing + // tableOfContents missing + 'donor.raw', + // parallelTitle missing + // parallelTitleDisplay missing + 'parallelTitleAlt.raw', + 'parallelSeriesStatement.raw', + 'parallelCreatorLiteral.raw', + 'uniformTitle.raw', + 'parallelUniformTitle.raw', + // formerTitle missing + 'addedAuthorTitle.raw', + 'placeOfPublication' + ] + }, + author: { + fields: ['creatorLiteral', 'creatorLiteral.folded', 'contributorLiteral.folded', 'parallelCreatorLiteral.folded', 'parallelContributorLiteral.folded'], + exact_fields: [ + 'creatorLiteral.keywordLowercased', 'contributorLiteral.keywordLowercased', + 'parallelCreatorLiteral.raw', 'parallelContributorLiteral.raw' + ] + }, + callnumber: { + term: ['shelfMark.keywordLowercased', 'items.shelfMark.keywordLowercased'] + }, + identifier: { + prefix: ['identifierV2.value', 'items.shelfMark.keywordLowercased'], + term: ['uri', 'items.idBarcode', 'idIsbn.clean', 'idIssn.clean'] + }, + subject: { + fields: ['subjectLiteral', 'subjectLiteral.folded', 'parallelSubjectLiteral.folded'], + exact_fields: ['subjectLiteral.raw'] + }, + language: { term: ['language.id', 'language.label'] }, + date: { fields: ['dates.range'] }, + series: { + term: ['series', 'parallelSeries'] + }, + genre: { fields: ['genreForm'], exact_fields: ['genreForm.raw'] }, + center: { term: ['buildingLocationIds'] }, + division: { term: ['collectionIds'] }, + format: { term: ['formatId'] } +} + +module.exports = { + indexMapping +} diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js new file mode 100644 index 00000000..81f9aafc --- /dev/null +++ b/lib/elasticsearch/cql_grammar.js @@ -0,0 +1,130 @@ +const { Grammars } = require('ebnf') + +function reverseGrammar (grammar) { + return grammar.split('\n') + .map(line => + (line.split('::=') + .map(side => + (side.split('|') + .map(dis => + (dis.split(' ') + .map(word => + (word.includes('"') ? word.split('').reverse().join('') : word)) + .reverse().join(' ')) + ).join('|'))).join('::= '))).join('\n') +} + +const leftCql = ` + query ::= query whitespace connective whitespace sub_query | sub_query + connective ::= "AND NOT" | "AND" | "OR" | "NOT" + sub_query ::= atomic_query | "(" query ")" + atomic_query ::= scope relation quoted_term + scope ::= scope_term whitespace | scope_term + relation ::= relation_term whitespace | relation_term + scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" + relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "==" | "=" | "within" | "encloses" + quoted_term ::= quote phrase quote + phrase ::= phrase whitespace word | word + whitespace ::= [#x20#x09#x0A#x0D]+ + word ::= word escaped_char | word regular_char | escaped_char | regular_char + regular_char ::= [^#x22#x5c#x20#x09#x0A#x0D] + escaped_char ::= slash char + slash ::= [#x5c] + char ::= [a-z]|[^a-z] + quote ::= [#x22] +` + +const rightCql = reverseGrammar(leftCql) + +function simplify (ast) { + switch (ast.type) { + case 'query': { + const children = ast.children.filter(child => child.type !== 'whitespace').map(child => simplify(child)) + return children.length > 1 ? children : children[0] + } + case 'connective': + return ast.text + case 'sub_query': + return simplify(ast.children.find(child => child.type.includes('query'))) + case 'atomic_query': + return ast.children.map(child => simplify(child)) + case 'scope': + return simplify(ast.children.find(child => child.type.includes('scope_term'))) + case 'relation': + return simplify(ast.children.find(child => child.type.includes('relation_term'))) + case 'scope_term': + return ast.text + case 'relation_term': + return ast.text + case 'quoted_term': + return simplify(ast.children.find(child => child.type.includes('phrase'))) + case 'phrase': { + const word = ast.children.find(child => child.type === 'word') + const phrase = ast.children.find(child => child.type === 'phrase') + return [simplify(word)].concat(phrase ? simplify(phrase) : []) + } + case 'word': + return ast.text + default: + break + } +} + +function reverseString (string) { + return string.split('').reverse().join('') +} + +function reverseAST (tree) { + if (!tree) return null + tree.text = reverseString(tree.text) + tree.children = tree.children.map(child => reverseAST(child)).reverse() + return tree +} + +const rightCqlParser = new Grammars.W3C.Parser(rightCql) + +// we want to associate operators to the left, but we have a right parser. +// so: reverse the grammar and the input string, then reverse the output +function parseRight (string, parser) { + return reverseAST(parser.getAST(reverseString(string))) +} +function parseWithRightCql (string) { + return parseRight(string, rightCqlParser) +} + +function parsedASTtoNestedArray (ast) { + if (!ast.type.includes('query')) { + return reverseString(ast.text) + } + + const childTypes = [ + 'atomic_query', 'sub_query', 'query', 'connective', + 'scope', 'relation', 'quoted_term' + ] + + const children = ast.children + .filter(child => childTypes.includes(child.type)) + .map(child => parsedASTtoNestedArray(child)) + + if (children.length === 1) { + return children[0] + } + + return children +} + +// we need to reverse the error message since `parseWithRightCql` doesn't +function displayParsed (string) { + const parsed = parseWithRightCql(string) + if (!parsed) return {} + if (parsed.errors.length) { + return { + error: parsed.errors.map(error => + `Parsing error likely near end of "${reverseString(error.token.rest)}"` + ).join('\n') + } + } + return { parsed: parsedASTtoNestedArray(parsed) } +} + +module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql, rightCqlParser, reverseString, displayParsed } diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js new file mode 100644 index 00000000..0f10f9bf --- /dev/null +++ b/lib/elasticsearch/cql_query_builder.js @@ -0,0 +1,346 @@ +const { parseWithRightCql } = require('./cql_grammar') +const { indexMapping } = require('./cql/index-mapping') +const ElasticQueryBuilder = require('./elastic-query-builder') + +function buildEsQuery (cqlQuery, request = null) { + const filterQuery = buildFilterQuery(request) + return { + bool: { + should: [ + buildEsQueryFromTree( + parseWithRightCql(cqlQuery.trim()) + ) + ], + ...filterQuery + } + } +} + +function buildFilterQuery (request) { + if (!request) return {} + const queryJson = ElasticQueryBuilder.forApiRequest(request).query.toJson() + if (queryJson.bool && queryJson.bool.filter) { + return { filter: queryJson.bool.filter } + } + return {} +} + +/** + this is mostly there but needs to handle exact strings + */ + +function buildEsQueryFromTree (tree) { + switch (tree.type) { + case 'query': { + const queries = tree.children.filter(child => child.type.includes('query')) + const connectives = tree.children.filter(child => child.type === 'connective') + if (connectives.length) { + return buildBoolean(connectives[0].text, queries) + } + return buildEsQueryFromTree(queries[0]) + } + case 'sub_query': { + const query = tree.children.filter(child => child.type.includes('query'))[0] + return buildEsQueryFromTree(query) + } + case 'atomic_query': { + return buildAtomic(atomicQueryParams(tree)) + } + default: + break + } +} + +function buildBoolean (operator, queries) { + if (['NOT', 'AND NOT'].includes(operator)) return buildNegation(queries) + const esOperator = operator === 'AND' ? 'must' : 'should' + return { + bool: { + [esOperator]: queries.map(query => buildEsQueryFromTree(query)) + } + } +} + +function buildNegation (queries) { + return { + bool: { + must: [buildEsQueryFromTree(queries[0])], + must_not: [buildEsQueryFromTree(queries[1])] + } + } +} + +/** + A convienience method that collect the scope, relation, the full query (i.e term), and + all the separate words in the query (i.e. the terms) + */ +function atomicQueryParams (atomicQuery) { + return { + scope: atomicQuery.children.find(child => child.type === 'scope').text.trim(), + relation: atomicQuery.children.find(child => child.type === 'relation').text.trim(), + term: findTopPhrase(atomicQuery), + terms: findTopWords(atomicQuery) + } +} + +/** + Find the highest (i.e. most inclusive) phrase node and return its text + Ex: if the query was keyword="Hamlet Shakespeare", there will be phrase nodes + for Hamlet Shakespeare, Hamlet, and Shakespeare, and this will return Hamlet Shakespeare + */ +function findTopPhrase (tree) { + if (tree.type === 'phrase') return tree.text + const topPhrases = tree.children.map(child => findTopPhrase(child)).filter(x => x) + return topPhrases.length ? topPhrases[0] : null +} + +/** + Return a list of all the words that aren't fragments of larger words + E.g. Hamlet Shakespeare => [Hamlet, Shakespeare], and doesn't include the text + of word nodes for H, Ha, Ham, etc... + */ +function findTopWords (tree) { + if (tree.type === 'word') return [tree.text] + return tree.children.map(child => findTopWords(child)).flat() +} + +/** + For an object where the values are arrays, apply the given filter and map + to each of the arrays. + */ +function nestedFilterAndMap (obj, filter, map) { + return Object.assign({}, + ...(Object.entries(obj) + .map(([k, v]) => ({ [k]: v.filter(filter).map(map) })) + ) + ) +} + +/** + Return truthy value if and only if one of the values is a non-empty array + */ +function hasFields (obj) { + return Object.values(obj).some(arr => arr.length) +} + +/** + build atomic: + - identify the scope fields that match the term + - separate out into main, items, holdings + - boolean(main, items, holdings) + - items/holds = nested(items/holdings, main) + - main: + - if operator is any/all, take all query terms not starting with ^ and put in multi-match with all regular fields + - if operator is any/all, take all query terms starting with ^ and put in prefix with all regular fields + - if operator is =/adj, and query term doesn't start with ^, take all query terms and put in phrase match with all regular fields + - if operator is =/adj and query term starts with ^, strip it out and use phrase_prefix + - put all terms in prefix match with prefix fields + - put all terms in term matches with term fields + */ + +function buildAtomic ({ scope, relation, terms, term }) { + const allFields = nestedFilterAndMap( + indexMapping[scope], + field => typeof field === 'string' || field.on(term), + field => (typeof field === 'string' ? field : field.field) + ) + + const bibFields = nestedFilterAndMap( + allFields, + (field) => !['items', 'holdings'].some(prefix => field.startsWith(prefix)), + field => field + ) + + const itemFields = nestedFilterAndMap( + allFields, + (field) => field.startsWith('items'), + field => field + ) + + const holdingsFields = nestedFilterAndMap( + allFields, + (field) => field.startsWith('holdings'), + field => field + ) + + return { + bool: { + should: [ + buildAtomicMain({ fields: bibFields, relation, terms, term }), + (hasFields(itemFields) && buildAtomicNested('items', { fields: itemFields, relation, terms, term })), + (hasFields(holdingsFields) && buildAtomicNested('holdings', { fields: holdingsFields, relation, terms, term })) + ].filter(x => x) + } + } +} + +function buildAtomicNested (name, { fields, relation, terms, term }) { + return { + nested: { + path: name, + query: buildAtomicMain({ fields, relation, terms, term }) + } + } +} + +/** + - main: + - if operator is any/all, take all query terms not starting with ^ and put in multi-match with all regular fields + - if operator is any/all, take all query terms starting with ^ and put in prefix with all regular fields + - if operator is =/adj, and query term doesn't start with ^, take all query terms and put in phrase match with all regular fields + - if operator is =/adj and query term starts with ^, strip it out and use phrase_prefix + - put all terms in prefix match with prefix fields + - put all terms in term matches with term fields + */ +function buildAtomicMain ({ fields, relation, terms, term }) { + switch (relation) { + case 'any': + case 'all': + return anyAllQueries({ fields, relation, terms }) + case '=': + case '==': + case 'adj': + return adjEqQueries({ fields, relation, terms, term }) + case '>': + case '<': + case '<=': + case '>=': + case 'within': + case 'encloses': + return dateQueries({ fields, relation, terms, term }) + default: + break + } +} + +function anyAllQueries ({ fields, relation, terms }) { + if (!['any', 'all'].includes(relation)) { return null } + const operator = (relation === 'any' ? 'should' : 'must') + return { + bool: { + [operator]: terms.map(term => matchTermWithFields(fields, term, 'cross_fields')) + } + } +} + +function adjEqQueries ({ fields, relation, terms, term }) { + if (!['=', '==', 'adj'].includes(relation)) { return null } + const type = (relation === '==') ? 'exact' : 'phrase' + return matchTermWithFields(fields, term, type) +} + +// depending on the type of cql query supplied by the user, +// we may need to modify the es query from the type indicated by the index +// mapping. +// e.g. in case the user indicates a prefix query, all `term` queries should be +// mapped to `prefix` queries +// X represents field types that should be excluded e.g. for exact matching, +// exclude regular fields and use matching `exact_fields` instead +const esQueryMappingByCqlQueryType = { + exact: { term: 'term', prefix: 'prefix', fields: 'X', exact_fields: 'term' }, + prefix: { term: 'prefix', prefix: 'prefix', fields: 'X', exact_fields: 'prefix' }, + basic: { term: 'term', prefix: 'prefix', fields: 'multi_match', exact_fields: 'X' } +} + +// used to turn the above table inside out, e.g. +// in case of queryType = `prefix`, +// will gather together, for a given set of fields, all the query tyoes that +// need to be included under `selector` +// so e.g. `term`, 'prefix', and `exact_fields` fields all need to be included +// in the `prefix` matcher, since they are all mapped to `prefix` in this case +const selectFields = (queryType, fields) => (selector) => { + return Object.entries(fields) + .filter(([fieldType, fieldNames]) => { + return esQueryMappingByCqlQueryType[queryType][fieldType] === selector + }) + .map(([fieldType, fieldNames]) => fieldNames) + .flat() +} + +function matchTermWithFields (fields, term, type) { + const queryType = term[0] === '^' ? 'prefix' : (type === 'exact' ? 'exact' : 'basic') + if (term[0] === '^') term = term.slice(1) + + const selector = selectFields(queryType, fields) + + const queries = [ + ...multiMatch(selector('multi_match'), term, type), + ...(selector('term').map(termField => termQuery(termField, term))), + ...(selector('prefix').map(prefixField => prefixQuery(prefixField, term))) + ] + + return { + bool: { + should: queries + } + } +} + +function dateQueries ({ fields, relation, terms, term }) { + if (!Object.values(fields).some(fieldType => fieldType.some(field => field.includes('date')))) { return null } + let range + switch (relation) { + case '<': + range = { lt: terms[0] } + break + case '>': + range = { gt: terms[0] } + break + case '>=': + range = { gte: terms[0] } + break + case '<=': + range = { lte: terms[0] } + break + case 'encloses': + range = { gt: terms[0], lt: terms[1] } + break + case 'within': + range = { gte: terms[0], lte: terms[1] } + break + default: + break + } + + return { + nested: { + path: 'dates', + query: { + range: { + 'dates.range': range + } + } + } + } +} + +function termQuery (field, term) { + return { term: { [field]: term } } +} + +function prefixQuery (field, term) { + return { prefix: { [field]: term } } +} + +function multiMatch (fields, term, type) { + if (!fields || !fields.length) return [] + + return [{ + multi_match: { + query: term, + fields, + type + } + }] +} + +module.exports = { + buildEsQuery, + buildEsQueryFromTree, + buildBoolean, + buildAtomic, + buildAtomicMain, + nestedFilterAndMap, + selectFields, + indexMapping +} diff --git a/lib/elasticsearch/elastic-body-builder.js b/lib/elasticsearch/elastic-body-builder.js new file mode 100644 index 00000000..aa5c45a9 --- /dev/null +++ b/lib/elasticsearch/elastic-body-builder.js @@ -0,0 +1,199 @@ +const { EXCLUDE_FIELDS, ITEM_FILTER_AGGREGATIONS, SORT_FIELDS, AGGREGATIONS_SPEC } = require('./config') +const { innerHits, itemsQueryContext, itemsFilterContext } = require('./elastic-query-filter-builder') +const ApiRequest = require('../api-request') +const ElasticQueryBuilder = require('../elasticsearch/elastic-query-builder') + +const bodyForFindByUri = function (recapBarcodesByStatus, params) { + const paramsIncludesItemLevelFiltering = Object.keys(params) + .filter((param) => param.startsWith('item_')).length > 0 + + const returnAllItems = params.all_items && !paramsIncludesItemLevelFiltering + + const excludes = returnAllItems ? EXCLUDE_FIELDS.filter((field) => field !== '*_sort') : EXCLUDE_FIELDS.concat(['items']) + + const aggregations = params.include_item_aggregations + ? { aggregations: ITEM_FILTER_AGGREGATIONS } + : {} + + const itemsOptions = { + size: params.items_size, + from: params.items_from, + merge_checkin_card_items: params.merge_checkin_card_items, + query: { + volume: params.item_volume, + date: params.item_date, + format: params.item_format, + location: params.item_location, + status: params.item_status, + itemUri: params.itemUri + }, + unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] + } + + const queryFilter = { filter: !returnAllItems ? [innerHits(itemsOptions)] : [] } + + const body = { + _source: { + excludes + }, + size: 1, + query: { + bool: { + must: [ + { + term: { + uri: params.uri + } + } + ], + ...queryFilter + } + }, + ...aggregations + } + + return body +} + +/** + * Given GET params, returns a plainobject suitable for use in a ES query. + * + * @param {object} params - A hash of request params including `filters`, + * `search_scope`, `q` + * + * @return {object} ES query object suitable to be POST'd to ES endpoint + */ +const buildElasticQuery = function (params, options = {}) { + const request = ApiRequest.fromParams(params) + + const builder = ElasticQueryBuilder.forApiRequest(request, options) + return builder.query.toJson() +} + +/** + * Given GET params, returns a plainobject with `from`, `size`, `query`, + * `sort`, and any other params necessary to perform the ES query based + * on the GET params. + * + * @return {object} An object that can be posted directly to ES + */ +const buildElasticBody = function (params, options = {}) { + // Apply sort: + let direction + let field + + if (params.sort === 'relevance') { + field = '_score' + direction = 'desc' + } else { + field = SORT_FIELDS[params.sort].field || params.sort + direction = params.sort_direction || SORT_FIELDS[params.sort].initialDirection + } + + const from = params.per_page && params.page ? { from: params.per_page * (params.page - 1) } : {} + const size = params.per_page ? { size: params.per_page } : {} + + return { + ...from, + ...size, + query: buildElasticQuery(params, options), + sort: [{ [field]: direction }, { uri: 'asc' }] + } +} + +const bodyForSearch = function (params) { + const itemsOptions = { merge_checkin_card_items: params.merge_checkin_card_items } + + const body = Object.assign( + buildElasticBody(params, { items: itemsOptions }), + { + _source: { + excludes: EXCLUDE_FIELDS.concat(['items']) + } + } + ) + + return body +} + +const buildElasticAggregationsBody = (params, aggregateProps) => { + // Add an `aggregations` entry to the ES body describing the aggretations + // we want. Set the `size` property to per_page (default 50) for each. + // https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-size + const aggregations = aggregateProps.reduce((aggs, prop) => { + aggs[prop] = AGGREGATIONS_SPEC[prop] + // Only set size for terms aggs for now: + if (aggs[prop].terms) { + aggs[prop].terms.size = params.per_page + } + return aggs + }, {}) + + return Object.assign( + buildElasticBody(params), + { size: 0, aggregations } + ) +} + +/** +* Given a params hash, returns an array of ES queries for fetching relevant aggregations. +*/ +const aggregationQueriesForParams = (params) => { + // Build the complete set of distinct aggregation queries we need to run + // depending on active filters. We want: + // - one agg representing the counts for all properties _not_ used in filter + // - one agg each for each property that is used in a filter, but counts should exclude that filter + + // Build the standard aggregation: + const unfilteredAggregationProps = Object.keys(AGGREGATIONS_SPEC) + // Aggregate on all properties that aren't involved in filters: + .filter((prop) => !Object.keys(params.filters || {}).includes(prop)) + const queries = [buildElasticAggregationsBody(params, unfilteredAggregationProps)] + + // Now append all property-specific aggregation queries (one for each + // distinct property used in a filter): + return queries.concat( + Object.entries(params.filters || {}) + // Only consider filters that are also aggregations: + .filter(([prop, values]) => Object.keys(AGGREGATIONS_SPEC).includes(prop)) + .map(([prop, values]) => { + const aggFilters = structuredClone(params.filters) + // For this aggregation, don't filter on namesake property: + delete aggFilters[prop] + + // Build query for single aggregation: + const modifiedParams = Object.assign({}, params, { filters: aggFilters }) + return buildElasticAggregationsBody(modifiedParams, [prop]) + }) + ) +} + +const bodyForAggregation = (params) => { + const aggregations = {} + aggregations[params.field] = AGGREGATIONS_SPEC[params.field] + + // If it's a terms agg, we can apply per_page: + if (aggregations[params.field].terms) { + aggregations[params.field].terms.size = params.per_page + } + + return Object.assign( + buildElasticBody(params), + { + size: 0, + aggregations + } + ) +} + +module.exports = { + bodyForFindByUri, + itemsFilterContext, + itemsQueryContext, + buildElasticQuery, + buildElasticBody, + bodyForSearch, + buildElasticAggregationsBody, + aggregationQueriesForParams, + bodyForAggregation +} diff --git a/lib/elasticsearch/elastic-query-builder.js b/lib/elasticsearch/elastic-query-builder.js index 0b4452bc..81dc034b 100644 --- a/lib/elasticsearch/elastic-query-builder.js +++ b/lib/elasticsearch/elastic-query-builder.js @@ -2,6 +2,7 @@ const ElasticQuery = require('./elastic-query') const ApiRequest = require('../api-request') const { escapeQuery, namedQuery, prefixMatch, termMatch, phraseMatch } = require('./utils') const { regexEscape } = require('../util') +const { innerHits } = require('./elastic-query-filter-builder') const { FILTER_CONFIG, SEARCH_SCOPES } = require('./config') @@ -11,7 +12,7 @@ const POPULARITY_BOOSTS = [ ] class ElasticQueryBuilder { - constructor (apiRequest) { + constructor (apiRequest, options = {}) { this.request = apiRequest this.query = new ElasticQuery() @@ -36,6 +37,8 @@ class ElasticQueryBuilder { case 'callnumber': this.buildCallnumberQuery() break + case 'cql': + break case 'all': default: this.buildAllQuery() @@ -44,6 +47,10 @@ class ElasticQueryBuilder { // Add user filters: this.applyFilters() + if (options.items) { + this.query.addFilter(innerHits(options.items)) + } + // if a list of ids is specified, return those ids this.applyMultipleIdMatch() @@ -743,8 +750,8 @@ class ElasticQueryBuilder { /** * Create a ElasticQueryBuilder for given ApiRequest instance */ - static forApiRequest (request) { - return new ElasticQueryBuilder(request) + static forApiRequest (request, options = {}) { + return new ElasticQueryBuilder(request, options) } } diff --git a/lib/elasticsearch/elastic-query-filter-builder.js b/lib/elasticsearch/elastic-query-filter-builder.js index e69de29b..5149cde8 100644 --- a/lib/elasticsearch/elastic-query-filter-builder.js +++ b/lib/elasticsearch/elastic-query-filter-builder.js @@ -0,0 +1,253 @@ +const { esRangeValue } = require('../utils/resource-helpers') + +/** + * Given an object containing filters, + * returns content of the ES query filter context + * + * @param {object} options - An object with keys,value pairs of the form [filter_name]:[filter_value] + * @returns {object} + */ +const itemsFilterContext = (options) => { + if (!options.query) return {} + + const filterHandlers = { + volume: (volumes) => { + return { + range: { + 'items.volumeRange': esRangeValue(volumes) + } + } + }, + date: (dates) => { + return { + range: { + 'items.dateRange': esRangeValue(dates) + } + } + }, + format: (formats) => { + return { + terms: { + 'items.formatLiteral': formats + } + } + }, + location: (locations) => { + return { + terms: { + 'items.holdingLocation.id': locations + } + } + }, + status: (statuses) => { + // Determine if all possible ReCAP statuses were selected: + const selectedRecapStatuses = recapStatuses(statuses) + + if (selectedRecapStatuses.length === 1 && + Array.isArray(options.unavailable_recap_barcodes) && + options.unavailable_recap_barcodes.length > 0) { + // There are known unavailable ReCAP items, so build a complicated + // filter clause with appropriate barcode overrides: + return itemStatusFilterWithUnavailableRecapItems(statuses, options.unavailable_recap_barcodes) + } else { + // If there are no known unavailable ReCAP items, just do a straight + // status match: + return { + terms: { + 'items.status.id': statuses + } + } + } + }, + itemUri: (uri) => { + return { term: { 'items.uri': uri } } + } + } + + const filters = Object.keys(options.query).map((filter) => { + const value = options.query[filter] + const handler = filterHandlers[filter] + return value && handler ? handler(value) : null + }).filter((x) => x) + + return filters.length + ? { filter: filters } + : {} +} + +/** + * Given an array of status ids (e.g. "status:a", "status:na") returns the + * subset of statuses that are relevant in ReCAP + */ +const recapStatuses = (statuses) => { + return statuses + .filter((status) => ['status:a', 'status:na'].includes(status)) +} + +/** + * Builds a big complicated ES filter to allow us to filter items by status, + * but override the indexed status for ReCAP items with statuses retrieved + * from SCSB. This corrects for the fact that ReCAP item statuses tend to be + * wrong in the ES index: + * - partner items are indexed as Available and remain thus forever + * - NYPL item statuses _should_ equal SCSB status, but the mechanism + * for keeping them synced isn't perfect and operates on a delay + * + * @param {string[]} statuses - An array of statuses to filter on + * @param {string[]} unavailableRecapBarcodes - An array of item barcodes + * known to be unavailble + * + * Returns an ES filter that matches the desired statuses, but also uses + * the known unavailable items to override indexed item statuses for ReCAP + * items (because ReCAP is the authority for status of off-site items). + * Essentially, the criteria is for matching an item is: + * + * - if on-site (non-ReCAP): + * - has a matching indexed status + * - if off-site: + * - if filtering on status:na + * - item barcode must be in unavailableRecapBarcodes + * - if filtering on status:a: + * - item barcode must NOT be in unavailableRecapBarcodes + */ +const itemStatusFilterWithUnavailableRecapItems = (statuses, unavailableRecapBarcodes) => { + // First, let's set up some common clauses: + + // Item is in ReCAP: + const itemIsRecapClause = { + regexp: { 'items.holdingLocation.id': 'loc:rc.*' } + } + // Item's indexed status matches one of the filtered statuses: + const itemHasIndexedStatusClause = { + terms: { 'items.status.id': statuses } + } + // Item is marked Unavailable in SCSB: + const itemIsUnavailableInRecapClause = { + script: { + script: { + inline: 'doc[\'items.idBarcode\'].value == null || ' + + 'params.unavailableRecapBarcodes.contains(doc[\'items.idBarcode\'][0])', + lang: 'painless', + params: { unavailableRecapBarcodes } + } + } + } + // This function is only called if `statuses` param contains a single + // ReCAP-relevant status (i.e. status:a or status:na), so determine which + // ReCAP status to use: + const selectedRecapStatus = recapStatuses(statuses).shift() + // Item's ReCAP status agrees with filter: + const itemRecapStatusAgreesWithFilterClause = + selectedRecapStatus === 'status:na' + ? itemIsUnavailableInRecapClause + : { bool: { must_not: itemIsUnavailableInRecapClause } } + + return { + bool: { + should: [ + // Either 1) item is on-site and has correctly indexed status: + { + bool: { + must: [ + // Item is on-site (i.e. not recap): + { bool: { must_not: itemIsRecapClause } }, + // Item indexed status matches filter: + itemHasIndexedStatusClause + ] + } + }, + // Or 2) item is off-site and has a scsb status that agrees with the + // filter (e.g. if filtering on status:na, scsb marks the barcode as + // 'Not Available') + { + bool: { + must: [ + // Item is off-site: + JSON.parse(JSON.stringify(itemIsRecapClause)), + // Item is not marked unavailable + itemRecapStatusAgreesWithFilterClause + ] + } + } + ] + } + } +} + +/** + * Given an object containing query options, + * returns content of the ES query context + * + * @param {object} options - An object with request options. `merge_checkin_card_items` is the only one + * that matters right now + * @returns {object} + */ +const itemsQueryContext = (options) => { + const excludeClauses = [] + + if (!options.merge_checkin_card_items) excludeClauses.push({ term: { 'items.type': 'nypl:CheckinCardItem' } }) + + return excludeClauses.length ? { must_not: excludeClauses } : { must: { match_all: {} } } +} + +const innerHits = (_options = {}) => { + const options = Object.assign({ + size: process.env.SEARCH_ITEMS_SIZE || 200, + from: 0, + merge_checkin_card_items: true + }, _options) + + // If there is any item query at all, run an additional inner_hits query + // to retrieve the total number of items without filtering: + const itemsQuery = { + bool: Object.assign( + itemsQueryContext(options), + itemsFilterContext(options) + ) + } + + const allItemsQuery = itemsQuery.bool.filter + ? [{ + nested: { + path: 'items', + query: { + bool: { + must_not: [{ exists: { field: 'items.electronicLocator' } }] + } + }, + inner_hits: { name: 'allItems' } + } + }] + : [] + + const wrappedItemsQuery = { + bool: { + should: [ + { + nested: { + path: 'items', + query: itemsQuery, + inner_hits: { + sort: [{ 'items.enumerationChronology_sort': 'desc' }], + size: options.size, + from: options.from, + name: 'items' + } + } + }, + // Add a catch-all to ensure we return the bib document even when + // numItems=0 or applied item filters exclude all items: + { match_all: {} }, + ...allItemsQuery + ] + } + } + + return wrappedItemsQuery +} + +module.exports = { + innerHits, + itemsQueryContext, + itemsFilterContext +} diff --git a/lib/resources.js b/lib/resources.js index 3c9cccf6..36b53f43 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -1,4 +1,3 @@ -const NyplSourceMapper = require('research-catalog-indexer/lib/utils/nypl-source-mapper') const scsbClient = require('./scsb-client') const ResourceResultsSerializer = require('./jsonld_serializers.js').ResourceResultsSerializer @@ -6,97 +5,38 @@ const ResourceSerializer = require('./jsonld_serializers.js').ResourceSerializer const AggregationsSerializer = require('./jsonld_serializers.js').AggregationsSerializer const AggregationSerializer = require('./jsonld_serializers.js').AggregationSerializer const ItemResultsSerializer = require('./jsonld_serializers.js').ItemResultsSerializer -const LocationLabelUpdater = require('./location_label_updater') const AnnotatedMarcSerializer = require('./annotated-marc-serializer') const MarcSerializer = require('./marc-serializer') const { makeNyplDataApiClient } = require('./data-api-client') const { IndexSearchError, IndexConnectionError } = require('./errors') const ResponseMassager = require('./response_massager.js') -const AvailableDeliveryLocationTypes = require('./available_delivery_location_types') -const { parseParams, deepValue } = require('../lib/util') +const { parseParams } = require('../lib/util') -const ApiRequest = require('./api-request') -const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') -const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC } = require('./elasticsearch/config') +const { AGGREGATIONS_SPEC } = require('./elasticsearch/config') const errors = require('./errors') const Item = require('./models/Item.js') +const { + esRangeValue, + parseSearchParams, + nyplSourceAndId, + itemsByFilter, + mergeAggregationsResponses, + lookupPatronType, + makeRelevanceReport +} = require('./utils/resource-helpers') + +const { + bodyForFindByUri, + bodyForSearch, + aggregationQueriesForParams, + bodyForAggregation +} = require('./elasticsearch/elastic-body-builder') const RESOURCES_INDEX = process.env.RESOURCES_INDEX -const ITEM_FILTER_AGGREGATIONS = { - item_location: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.holdingLocation_packed' } } } }, - item_status: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.status_packed' } } } }, - item_format: { nested: { path: 'items' }, aggs: { _nested: { terms: { size: 100, field: 'items.formatLiteral' } } } } -} - -// Configure sort fields: -const SORT_FIELDS = { - title: { - initialDirection: 'asc', - field: 'title_sort' - }, - date: { - initialDirection: 'desc', - field: 'dateStartYear' - }, - creator: { - initialDirection: 'asc', - field: 'creator_sort' - }, - callnumber: { - initialDirection: 'asc', - field: 'shelfMark.keywordLowercased' - }, - relevance: {} -} - -// The following fields can be excluded from ES responses because we don't pass them to client: -const EXCLUDE_FIELDS = [ - 'uris', - '*_packed', - '*_sort', - 'items.*_packed', - 'contentsTitle', - 'suppressed', - // Hide contributor and creator transformed fields: - '*WithoutDates', - '*Normalized' -] - -// Configure controller-wide parameter parsing: -const parseSearchParams = function (params, overrideParams = {}) { - return parseParams(params, { - q: { type: 'string' }, - page: { type: 'int', default: 1 }, - per_page: { type: 'int', default: 50, range: [0, 100] }, - field: { type: 'string', range: Object.keys(AGGREGATIONS_SPEC) }, - sort: { type: 'string', range: Object.keys(SORT_FIELDS), default: 'relevance' }, - sort_direction: { type: 'string', range: ['asc', 'desc'] }, - search_scope: { type: 'string', range: Object.keys(SEARCH_SCOPES), default: 'all' }, - filters: { type: 'hash', fields: FILTER_CONFIG }, - items_size: { type: 'int', default: 100, range: [0, 200] }, - items_from: { type: 'int', default: 0 }, - callnumber: { type: 'string' }, - standard_number: { type: 'string' }, - contributor: { type: 'string' }, - title: { type: 'string' }, - subject: { type: 'string' }, - subject_prefix: { type: 'string' }, - isbn: { type: 'string' }, - issn: { type: 'string' }, - lccn: { type: 'string' }, - oclc: { type: 'string' }, - role: { type: 'string' }, - merge_checkin_card_items: { type: 'boolean', default: true }, - include_item_aggregations: { type: 'boolean', default: true }, - ids: { type: 'string-list' }, - ...overrideParams - }) -} - // These are the handlers made available to the router: module.exports = function (app, _private = null) { app.resources = {} @@ -120,11 +60,7 @@ module.exports = function (app, _private = null) { }) // Validate uri: - const nyplSourceMapper = await NyplSourceMapper.instance() - const { id, nyplSource } = nyplSourceMapper.splitIdentifier(params.uri) ?? {} - if (!id || !nyplSource) { - throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`) - } + await nyplSourceAndId(params.uri) // If we need to return itemAggregations or filter on item_status, // then we need to pre-retrieve SCSB item statuses to incorporate them into @@ -133,177 +69,71 @@ module.exports = function (app, _private = null) { // We only need to retrieve scsb statuses if building item aggs or // filtering on status: const retrieveScsbStatuses = params.include_item_aggregations || params.item_status - const scsbStatusLookup = retrieveScsbStatuses - ? scsbClient.getBarcodesByStatusForBnum(params.uri) - .catch((e) => { - app.logger.error(`Error connecting to SCSB; Unable to lookup barcodes for bib ${params.uri}`, e) - return {} - }) - : Promise.resolve({}) - - return scsbStatusLookup - .then((recapBarcodesByStatus) => { - // Establish base query: - let body = { - _source: { - excludes: EXCLUDE_FIELDS - }, - size: 1, - query: { - bool: { - must: [ - { - term: { - uri: params.uri - } - } - ] - } - } - } - const paramsIncludesItemLevelFiltering = Object.keys(params) - .filter((param) => param.startsWith('item_')).length > 0 - const returnAllItems = params.all_items && !paramsIncludesItemLevelFiltering - if (returnAllItems) { - body._source.excludes = EXCLUDE_FIELDS.filter((field) => field !== '*_sort') - } else { - // No specific item requested, so add pagination and matching params: - const itemsOptions = { - size: params.items_size, - from: params.items_from, - merge_checkin_card_items: params.merge_checkin_card_items, - query: { - volume: params.item_volume, - date: params.item_date, - format: params.item_format, - location: params.item_location, - status: params.item_status, - itemUri: params.itemUri - }, - unavailable_recap_barcodes: recapBarcodesByStatus['Not Available'] - } - body = addInnerHits(body, itemsOptions) - body._source = { - excludes: EXCLUDE_FIELDS.concat(['items']) - } - } - if (params.include_item_aggregations) { - body.aggregations = ITEM_FILTER_AGGREGATIONS - } - app.logger.debug('Resources#findByUri', body) - return app.esClient.search(body) - .then((resp) => { - // Mindfully throw errors for known issues: - if (!resp || !resp.hits) { - throw new Error('Error connecting to index') - } else if (resp?.hits?.total?.value === 0) { - throw new errors.NotFoundError(`Record not found: ${params.uri}`) - } else { - const massagedResponse = new ResponseMassager(resp) - return massagedResponse.massagedResponse(request, { queryRecapCustomerCode: !!params.itemUri, recapBarcodesByStatus }) - .catch((e) => { - // If error hitting HTC, just return response un-modified: - return resp - }) - } - }).then((resp) => { - const hitsAndItemAggregations = resp.hits.hits[0]._source - hitsAndItemAggregations.itemAggregations = resp.aggregations - return ResourceSerializer.serialize(hitsAndItemAggregations, Object.assign(opts, { root: true })) - }) - }) + let recapBarcodesByStatus = {} + if (retrieveScsbStatuses) { + try { + recapBarcodesByStatus = await scsbClient.getBarcodesByStatusForBnum(params.uri) + } catch (e) { + app.logger.error(`Error connecting to SCSB; Unable to lookup barcodes for bib ${params.uri}`, e) + } + } + + const body = bodyForFindByUri(recapBarcodesByStatus, params) + app.logger.debug('Resources#findByUri', body) + let resp = await app.esClient.search(body) + // Mindfully throw errors for known issues: + if (!resp || !resp.hits) { + throw new Error('Error connecting to index') + } else if (resp?.hits?.total?.value === 0) { + throw new errors.NotFoundError(`Record not found: ${params.uri}`) + } else { + const massagedResponse = new ResponseMassager(resp) + try { + resp = await massagedResponse.massagedResponse(request, { queryRecapCustomerCode: !!params.itemUri, recapBarcodesByStatus }) + } catch (e) { + // If error hitting HTC, just return response un-modified: + } + const hitsAndItemAggregations = resp.hits.hits[0]._source + hitsAndItemAggregations.itemAggregations = resp.aggregations + return ResourceSerializer.serialize(hitsAndItemAggregations, Object.assign(opts, { root: true })) + } } // Get a single raw annotated-marc resource: app.resources.annotatedMarc = async function (params, opts) { // Convert discovery id to nyplSource and un-prefixed id: - const nyplSourceMapper = await NyplSourceMapper.instance() - const { id, nyplSource } = nyplSourceMapper.splitIdentifier(params.uri) ?? {} - if (!id || !nyplSource) { - throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`) - } + const { id, nyplSource } = await nyplSourceAndId(params.uri) app.logger.debug('Resources#annotatedMarc', { id, nyplSource }) - return makeNyplDataApiClient().get(`bibs/${nyplSource}/${id}`) - .then((resp) => { - // need to check that the query actually found an entry - if (!resp.data) { - throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`) - } else { - return resp.data - } - }) - .then(AnnotatedMarcSerializer.serialize) + const resp = await makeNyplDataApiClient().get(`bibs/${nyplSource}/${id}`) + // need to check that the query actually found an entry + if (!resp.data) { + throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`) + } + + return AnnotatedMarcSerializer.serialize(resp.data) } // Get a single raw marc: app.resources.marc = async function (params, opts) { // Convert discovery id to nyplSource and un-prefixed id: - const nyplSourceMapper = await NyplSourceMapper.instance() - const { id, nyplSource } = nyplSourceMapper.splitIdentifier(params.uri) ?? {} - - if (!id || !nyplSource) { - throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`) - } + const { id, nyplSource } = await nyplSourceAndId(params.uri) - app.logger.debug('Resources#marc', { id, nyplSource }) - - return makeNyplDataApiClient().get(`bibs/${nyplSource}/${id}`) - .then((resp) => { - // need to check that the query actually found an entry - if (!resp.data) { - throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`) - } else { - return resp.data - } - }) - .then(MarcSerializer.serialize) - } + app.logger.debug('Resources#annotatedMarc', { id, nyplSource }) - function itemsByFilter (filter, opts) { - opts = Object.assign({ - _source: null - }, opts) - - // Build ES query body: - const body = { - query: { - nested: { - path: 'items', - score_mode: 'avg', - query: { - constant_score: { - filter - } - } - } - } + const resp = await makeNyplDataApiClient().get(`bibs/${nyplSource}/${id}`) + // need to check that the query actually found an entry + if (!resp.data) { + throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`) } - if (opts._source) body._source = opts._source - - app.logger.debug('Resources#itemsByFilter', body) - return app.esClient.search(body) - .then((resp) => { - if (!resp || !resp.hits || resp.hits.total === 0) return Promise.reject(new Error('No matching items')) - resp = new LocationLabelUpdater(resp).responseWithUpdatedLabels() - // Convert this ES bibs response into an array of flattened items: - return resp.hits.hits - .map((doc) => doc._source) - // Reduce to a flat array of items - .reduce((a, bib) => { - return a.concat(bib.items) - // Let's affix that bnum into the item's identifiers so we know where it came from: - .map((i) => { - return Object.assign(i, { identifier: [`urn:bnum:${bib.uri}`].concat(i.identifier) }) - }) - }, []) - }) + + return MarcSerializer.serialize(resp.data) } // Get deliveryLocations for given resource(s) - app.resources.deliveryLocationsByBarcode = function (params, opts) { + app.resources.deliveryLocationsByBarcode = async function (params, opts) { params = parseParams(params, { barcodes: { type: 'string', repeatable: true }, patronId: { type: 'string' } @@ -312,471 +142,70 @@ module.exports = function (app, _private = null) { const identifierValues = barcodes.map((barcode) => `urn:barcode:${barcode}`) - // Create promise to resolve deliveryLocationTypes by patron type: - const lookupPatronType = AvailableDeliveryLocationTypes.getScholarRoomByPatronId(params.patronId) - .catch((e) => { - throw new errors.InvalidParameterError('Invalid patronId') - }) - // Create promise to resolve items: - const fetchItems = itemsByFilter( - { terms: { 'items.identifier': identifierValues } }, - { _source: ['uri', 'type', 'items.uri', 'items.type', 'items.identifier', 'items.holdingLocation', 'items.status', 'items.catalogItemType', 'items.accessMessage', 'items.m2CustomerCode'] } - - // Filter out any items (multi item bib) that don't match one of the queriered barcodes: - ).then((items) => { - return items.filter((item) => { - return item.identifier.filter((i) => identifierValues.indexOf(i) >= 0).length > 0 - }) - }) + const fetchItems = itemsByFilter(identifierValues, app) // Run both item fetch and patron fetch in parallel: - return Promise.all([fetchItems, lookupPatronType]) - .then((resp) => { - // The resolved values of Promise.all are strictly ordered based on original array of promises - const items = resp[0] - const scholarRoom = resp[1] - - // Use HTC API and nypl-core mappings to ammend ES response with deliveryLocations: - return Promise.all(items.map(async (item) => Item.withDeliveryLocationsByBarcode(item, scholarRoom))) - .catch((e) => { - // An error here is likely an HTC API outage - // Let's return items unmodified: - // - app.logger.info({ message: 'Caught (and ignoring) error mapping barcodes to recap customer codes', htcError: e.message }) - return items - }) - }) - .then((items) => ItemResultsSerializer.serialize(items, opts)) - } - - /** - * Given a ES search body, returns same object modified to include the - * additional query necessary to limit (and paginate through) items - * - * @param {object} body - An ES query object (suitable for POSTing to ES - * @param {object} options - An object optionally defining `size` and `from` - * for limiting and paginating through items - */ - const addInnerHits = (body, _options = {}) => { - const options = Object.assign({ - size: process.env.SEARCH_ITEMS_SIZE || 200, - from: 0, - merge_checkin_card_items: true - }, _options) - - // Make sure necessary structure exists: - if (!deepValue(body, 'query.bool') && !deepValue(body, 'query.function_score.query.bool')) { - body.query = { bool: {} } - } - - // The place to add the filter depends on the query built to this point: - const placeToAddFilter = (body.query.bool || body.query.function_score.query.bool) - // Initialize filter object if it doesn't already exist: - placeToAddFilter.filter = placeToAddFilter.filter || [] - // If filter object already exists, convert it to array: - if (!Array.isArray(placeToAddFilter.filter)) placeToAddFilter.filter = [placeToAddFilter.filter] - - const itemsQuery = { - bool: Object.assign( - itemsQueryContext(options), - itemsFilterContext(options) - ) + const [resp] = Promise.all([fetchItems, lookupPatronType]) + // The resolved values of Promise.all are strictly ordered based on original array of promises + let items = resp[0] + const scholarRoom = resp[1] + + // Use HTC API and nypl-core mappings to ammend ES response with deliveryLocations: + try { + items = await items.map(async (item) => Item.withDeliveryLocationsByBarcode(item, scholarRoom)) + } catch (e) { + // An error here is likely an HTC API outage + // Let's return items unmodified: + // + app.logger.info({ message: 'Caught (and ignoring) error mapping barcodes to recap customer codes', htcError: e.message }) + return items } - - const wrappedItemsQuery = { - bool: { - should: [ - { - nested: { - path: 'items', - query: itemsQuery, - inner_hits: { - sort: [{ 'items.enumerationChronology_sort': 'desc' }], - size: options.size, - from: options.from, - name: 'items' - } - } - }, - // Add a catch-all to ensure we return the bib document even when - // numItems=0 or applied item filters exclude all items: - { match_all: {} } - ] - } - } - placeToAddFilter.filter.push(wrappedItemsQuery) - - // If there is any item query at all, run an additional inner_hits query - // to retrieve the total number of items without filtering: - if (itemsQuery.bool.filter) { - wrappedItemsQuery.bool.should.push({ - nested: { - path: 'items', - query: { - bool: { - must_not: [{ exists: { field: 'items.electronicLocator' } }] - } - }, - inner_hits: { name: 'allItems' } - } - }) - } - - return body - } - - /** - * Given a range represented as an array, returns a corresponding ES range object - * - * @param {Array.} range - An array consisting of a single date or a pair of dates - * @returns {object} - */ - const esRangeValue = (range) => { - // the greater-than-equal value will always be the first value in the range array. - // depending on the number of values and their equality, we query using less-than-equal - // the second value, or just less-than the first value plus one - - // Treat case where range start equals range end same as case of single value: - if (range[0] === range[1]) range = range.slice(0, 1) - const rangeQuery = { - gte: range[0] - } - if (range.length === 2) { - // search on both range values - rangeQuery.lte = range[range.length - 1] - } else if (range.length === 1) { - // if there is just one range, query up until the next year - rangeQuery.lt = range[0] + 1 - } - return rangeQuery - } - - /** - * Given an object containing filters, - * returns content of the ES query filter context - * - * @param {object} options - An object with keys,value pairs of the form [filter_name]:[filter_value] - * @returns {object} - */ - const itemsFilterContext = (options) => { - if (!options.query) return {} - - const filterHandlers = { - volume: (volumes) => { - return { - range: { - 'items.volumeRange': esRangeValue(volumes) - } - } - }, - date: (dates) => { - return { - range: { - 'items.dateRange': esRangeValue(dates) - } - } - }, - format: (formats) => { - return { - terms: { - 'items.formatLiteral': formats - } - } - }, - location: (locations) => { - return { - terms: { - 'items.holdingLocation.id': locations - } - } - }, - status: (statuses) => { - // Determine if all possible ReCAP statuses were selected: - const selectedRecapStatuses = recapStatuses(statuses) - - if (selectedRecapStatuses.length === 1 && - Array.isArray(options.unavailable_recap_barcodes) && - options.unavailable_recap_barcodes.length > 0) { - // There are known unavailable ReCAP items, so build a complicated - // filter clause with appropriate barcode overrides: - return itemStatusFilterWithUnavailableRecapItems(statuses, options.unavailable_recap_barcodes) - } else { - // If there are no known unavailable ReCAP items, just do a straight - // status match: - return { - terms: { - 'items.status.id': statuses - } - } - } - }, - itemUri: (uri) => { - return { term: { 'items.uri': uri } } - } - } - - const filters = Object.keys(options.query).map((filter) => { - const value = options.query[filter] - const handler = filterHandlers[filter] - return value && handler ? handler(value) : null - }).filter((x) => x) - - return filters.length - ? { filter: filters } - : {} - } - - /** - * Given an array of status ids (e.g. "status:a", "status:na") returns the - * subset of statuses that are relevant in ReCAP - */ - const recapStatuses = (statuses) => { - return statuses - .filter((status) => ['status:a', 'status:na'].includes(status)) - } - - /** - * Builds a big complicated ES filter to allow us to filter items by status, - * but override the indexed status for ReCAP items with statuses retrieved - * from SCSB. This corrects for the fact that ReCAP item statuses tend to be - * wrong in the ES index: - * - partner items are indexed as Available and remain thus forever - * - NYPL item statuses _should_ equal SCSB status, but the mechanism - * for keeping them synced isn't perfect and operates on a delay - * - * @param {string[]} statuses - An array of statuses to filter on - * @param {string[]} unavailableRecapBarcodes - An array of item barcodes - * known to be unavailble - * - * Returns an ES filter that matches the desired statuses, but also uses - * the known unavailable items to override indexed item statuses for ReCAP - * items (because ReCAP is the authority for status of off-site items). - * Essentially, the criteria is for matching an item is: - * - * - if on-site (non-ReCAP): - * - has a matching indexed status - * - if off-site: - * - if filtering on status:na - * - item barcode must be in unavailableRecapBarcodes - * - if filtering on status:a: - * - item barcode must NOT be in unavailableRecapBarcodes - */ - const itemStatusFilterWithUnavailableRecapItems = (statuses, unavailableRecapBarcodes) => { - // First, let's set up some common clauses: - - // Item is in ReCAP: - const itemIsRecapClause = { - regexp: { 'items.holdingLocation.id': 'loc:rc.*' } - } - // Item's indexed status matches one of the filtered statuses: - const itemHasIndexedStatusClause = { - terms: { 'items.status.id': statuses } - } - // Item is marked Unavailable in SCSB: - const itemIsUnavailableInRecapClause = { - script: { - script: { - inline: 'doc[\'items.idBarcode\'].value == null || ' + - 'params.unavailableRecapBarcodes.contains(doc[\'items.idBarcode\'][0])', - lang: 'painless', - params: { unavailableRecapBarcodes } - } - } - } - // This function is only called if `statuses` param contains a single - // ReCAP-relevant status (i.e. status:a or status:na), so determine which - // ReCAP status to use: - const selectedRecapStatus = recapStatuses(statuses).shift() - // Item's ReCAP status agrees with filter: - const itemRecapStatusAgreesWithFilterClause = - selectedRecapStatus === 'status:na' - ? itemIsUnavailableInRecapClause - : { bool: { must_not: itemIsUnavailableInRecapClause } } - - return { - bool: { - should: [ - // Either 1) item is on-site and has correctly indexed status: - { - bool: { - must: [ - // Item is on-site (i.e. not recap): - { bool: { must_not: itemIsRecapClause } }, - // Item indexed status matches filter: - itemHasIndexedStatusClause - ] - } - }, - // Or 2) item is off-site and has a scsb status that agrees with the - // filter (e.g. if filtering on status:na, scsb marks the barcode as - // 'Not Available') - { - bool: { - must: [ - // Item is off-site: - JSON.parse(JSON.stringify(itemIsRecapClause)), - // Item is not marked unavailable - itemRecapStatusAgreesWithFilterClause - ] - } - } - ] - } - } - } - - /** - * Given an object containing query options, - * returns content of the ES query context - * - * @param {object} options - An object with request options. `merge_checkin_card_items` is the only one - * that matters right now - * @returns {object} - */ - const itemsQueryContext = (options) => { - const excludeClauses = [] - - if (!options.merge_checkin_card_items) excludeClauses.push({ term: { 'items.type': 'nypl:CheckinCardItem' } }) - - return excludeClauses.length ? { must_not: excludeClauses } : { must: { match_all: {} } } + items = await ItemResultsSerializer.serialize(items, opts) + return items } // Conduct a search across resources: - app.resources.search = function (params, opts, request) { + app.resources.search = async function (params, opts, request) { app.logger.debug('Unparsed params: ', params) params = parseSearchParams(params) app.logger.debug('Parsed params: ', params) - let body = buildElasticBody(params) - - // Strip unnecessary _source fields - body._source = { - excludes: EXCLUDE_FIELDS.concat(['items']) - } - - body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) + const body = bodyForSearch(params) app.logger.debug('Resources#search', RESOURCES_INDEX, body) - return app.esClient.search(body) - .then((resp) => { - const massagedResponse = new ResponseMassager(resp) - return massagedResponse.massagedResponse(request) - .catch((e) => { - // If error hitting HTC, just return response un-modified: - return resp - }) - .then((updatedResponse) => ResourceResultsSerializer.serialize(updatedResponse, opts)) - .then((resp) => { - // Build relevance report (for debugging): - const relevanceReport = resp.itemListElement - .map((r, ind) => { - const out = [] - out.push(`${ind + 1}: ${r.searchResultScore} score > ${r.result.uri}:`) - if (params.search_scope === 'contributor') out.push(`(${r.result.creatorLiteral || r.result.contributorLiteral})`) - if (['standard_number', 'callnumber'].includes(params.search_scope)) out.push(`(${r.result.items && r.result.items[0]?.shelfMark})`) - out.push(`${r.result.title} (displayed as "${r.result.titleDisplay}")`) - if (r.matchedQueries) out.push(`\n ${r.matchedQueries.join(', ')}`) - return out.join(' ') - }) - app.logger.debug(`Relevances:\n ${relevanceReport.join('\n')}`) - - resp.debug = { - relevanceReport, - query: body - } - return resp - }) - }) - .catch((e) => { - // Wrap ES client errors or any downstream error - if (e instanceof IndexSearchError || e instanceof IndexConnectionError) { - throw e // already a custom error - } - throw new IndexSearchError(`Error processing search: ${e.message || e}`) - }) - } + let resp - const buildElasticAggregationsBody = (params, aggregateProps) => { - // Add an `aggregations` entry to the ES body describing the aggretations - // we want. Set the `size` property to per_page (default 50) for each. - // https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-size - const aggregations = aggregateProps.reduce((aggs, prop) => { - aggs[prop] = AGGREGATIONS_SPEC[prop] - // Only set size for terms aggs for now: - if (aggs[prop].terms) { - aggs[prop].terms.size = params.per_page + try { + resp = await app.esClient.search(body) + } catch (e) { + // Wrap ES client errors or any downstream error + if (e instanceof IndexSearchError || e instanceof IndexConnectionError) { + throw e // already a custom error } - return aggs - }, {}) + throw new IndexSearchError(`Error processing search: ${e.message || e}`) + } - const body = buildElasticBody(params) - body.size = 0 - body.aggregations = aggregations + try { + const massagedResponse = new ResponseMassager(resp) + resp = await massagedResponse.massagedResponse(request) + } catch (e) { - return body - } + } - /** - * Given a params hash, returns an array of ES queries for fetching relevant aggregations. - */ - const aggregationQueriesForParams = (params) => { - // Build the complete set of distinct aggregation queries we need to run - // depending on active filters. We want: - // - one agg representing the counts for all properties _not_ used in filter - // - one agg each for each property that is used in a filter, but counts should exclude that filter - - // Build the standard aggregation: - const unfilteredAggregationProps = Object.keys(AGGREGATIONS_SPEC) - // Aggregate on all properties that aren't involved in filters: - .filter((prop) => !Object.keys(params.filters || {}).includes(prop)) - const queries = [buildElasticAggregationsBody(params, unfilteredAggregationProps)] - - // Now append all property-specific aggregation queries (one for each - // distinct property used in a filter): - return queries.concat( - Object.entries(params.filters || {}) - // Only consider filters that are also aggregations: - .filter(([prop, values]) => Object.keys(AGGREGATIONS_SPEC).includes(prop)) - .map(([prop, values]) => { - const aggFilters = structuredClone(params.filters) - // For this aggregation, don't filter on namesake property: - delete aggFilters[prop] - - // Build query for single aggregation: - const modifiedParams = Object.assign({}, params, { filters: aggFilters }) - return buildElasticAggregationsBody(modifiedParams, [prop]) - }) - ) - } + resp = await ResourceResultsSerializer.serialize(resp, opts) - /** - * Given an array of ES aggregations responses (such as that returned from msearch) - **/ - const mergeAggregationsResponses = (responses) => { - // Filter out errored responses: - responses = responses.filter((resp) => resp.aggregations) - if (responses.length === 0) { - return {} - } - return { - // Use `hits` of last element, somewhat arbitrarily: - hits: responses[responses.length - 1].hits, - aggregations: responses - .reduce((allAggs, resp) => { - const respAggs = Object.entries(resp.aggregations) - // Build hash of response aggs, squashing _nested aggs: - .reduce((a, [field, _a]) => { - // If it's nested, it will be in our special '_nested' prop: - a[field] = _a._nested || _a - return a - }, {}) - // Add response aggs to combined aggs: - return Object.assign(allAggs, respAggs) - }, {}) + const relevanceReport = resp.itemListElement + .map(makeRelevanceReport(params)) + app.logger.debug(`Relevances:\n ${relevanceReport.join('\n')}`) + + resp.debug = { + relevanceReport, + query: body } + return resp } // Get all aggregations: @@ -800,7 +229,7 @@ module.exports = function (app, _private = null) { } // Get a single aggregation: - app.resources.aggregation = (params, opts) => { + app.resources.aggregation = async (params, opts) => { params = parseSearchParams(params, { per_page: { type: 'int', default: 50, range: [0, 1000] } }) @@ -808,91 +237,27 @@ module.exports = function (app, _private = null) { return Promise.reject(new Error('Invalid aggregation field')) } - const body = buildElasticBody(params) - - // We're fetching aggs, so specify 0 resource results: - body.size = 0 - - body.aggregations = {} - body.aggregations[params.field] = AGGREGATIONS_SPEC[params.field] - - // If it's a terms agg, we can apply per_page: - if (body.aggregations[params.field].terms) { - body.aggregations[params.field].terms.size = params.per_page - } - const serializationOpts = Object.assign(opts, { // This tells the serializer what fields are "packed" fields, which should be split apart packed_fields: ['materialType', 'language', 'carrierType', 'mediaType', 'issuance', 'status', 'owner'], root: true }) + const body = bodyForAggregation(params) + app.logger.debug('Resources#aggregation:', body) - return app.esClient.search(body) - .then((resp) => { - // If it's nested, it will be in our special '_nested' prop: - resp = resp.aggregations[params.field]._nested || resp.aggregations[params.field] - resp.id = params.field - return AggregationSerializer.serialize(resp, serializationOpts) - }) + + let resp = await app.esClient.search(body) + resp = resp.aggregations[params.field]._nested || resp.aggregations[params.field] + resp.id = params.field + return AggregationSerializer.serialize(resp, serializationOpts) } // For unit testing, export private methods if second arg given: if (_private && typeof _private === 'object') { - _private.buildElasticBody = buildElasticBody - _private.buildElasticQuery = buildElasticQuery _private.parseSearchParams = parseSearchParams _private.esRangeValue = esRangeValue - _private.itemsFilterContext = itemsFilterContext - _private.itemsQueryContext = itemsQueryContext - _private.addInnerHits = addInnerHits _private.aggregationQueriesForParams = aggregationQueriesForParams _private.mergeAggregationsResponses = mergeAggregationsResponses } } - -/** - * Given GET params, returns a plainobject with `from`, `size`, `query`, - * `sort`, and any other params necessary to perform the ES query based - * on the GET params. - * - * @return {object} An object that can be posted directly to ES - */ -const buildElasticBody = function (params) { - const body = { - from: (params.per_page * (params.page - 1)), - size: params.per_page - } - - body.query = buildElasticQuery(params) - - // Apply sort: - let direction - let field - - if (params.sort === 'relevance') { - field = '_score' - direction = 'desc' - } else { - field = SORT_FIELDS[params.sort].field || params.sort - direction = params.sort_direction || SORT_FIELDS[params.sort].initialDirection - } - body.sort = [{ [field]: direction }, { uri: 'asc' }] - - return body -} - -/** - * Given GET params, returns a plainobject suitable for use in a ES query. - * - * @param {object} params - A hash of request params including `filters`, - * `search_scope`, `q` - * - * @return {object} ES query object suitable to be POST'd to ES endpoint - */ -const buildElasticQuery = function (params) { - const request = ApiRequest.fromParams(params) - - const builder = ElasticQueryBuilder.forApiRequest(request) - return builder.query.toJson() -} diff --git a/lib/utils/resource-helpers.js b/lib/utils/resource-helpers.js new file mode 100644 index 00000000..2af14f15 --- /dev/null +++ b/lib/utils/resource-helpers.js @@ -0,0 +1,175 @@ +const { parseParams } = require('../util') +const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC, SORT_FIELDS } = require('../elasticsearch/config') +const NyplSourceMapper = require('research-catalog-indexer/lib/utils/nypl-source-mapper') +const errors = require('../errors') +const LocationLabelUpdater = require('../location_label_updater') +const AvailableDeliveryLocationTypes = require('../available_delivery_location_types') + +/** + * Given a range represented as an array, returns a corresponding ES range object + * + * @param {Array.} range - An array consisting of a single date or a pair of dates + * @returns {object} + */ +const esRangeValue = (range) => { + // the greater-than-equal value will always be the first value in the range array. + // depending on the number of values and their equality, we query using less-than-equal + // the second value, or just less-than the first value plus one + + // Treat case where range start equals range end same as case of single value: + if (range[0] === range[1]) range = range.slice(0, 1) + const rangeQuery = { + gte: range[0] + } + if (range.length === 2) { + // search on both range values + rangeQuery.lte = range[range.length - 1] + } else if (range.length === 1) { + // if there is just one range, query up until the next year + rangeQuery.lt = range[0] + 1 + } + return rangeQuery +} + +// Configure controller-wide parameter parsing: +const parseSearchParams = function (params, overrideParams = {}) { + return parseParams(params, { + q: { type: 'string' }, + page: { type: 'int', default: 1 }, + per_page: { type: 'int', default: 50, range: [0, 100] }, + field: { type: 'string', range: Object.keys(AGGREGATIONS_SPEC) }, + sort: { type: 'string', range: Object.keys(SORT_FIELDS), default: 'relevance' }, + sort_direction: { type: 'string', range: ['asc', 'desc'] }, + search_scope: { type: 'string', range: Object.keys(SEARCH_SCOPES), default: 'all' }, + filters: { type: 'hash', fields: FILTER_CONFIG }, + items_size: { type: 'int', default: 100, range: [0, 200] }, + items_from: { type: 'int', default: 0 }, + callnumber: { type: 'string' }, + standard_number: { type: 'string' }, + contributor: { type: 'string' }, + title: { type: 'string' }, + subject: { type: 'string' }, + subject_prefix: { type: 'string' }, + isbn: { type: 'string' }, + issn: { type: 'string' }, + lccn: { type: 'string' }, + oclc: { type: 'string' }, + role: { type: 'string' }, + merge_checkin_card_items: { type: 'boolean', default: true }, + include_item_aggregations: { type: 'boolean', default: true }, + ids: { type: 'string_list' }, + ...overrideParams + }) +} + +const nyplSourceAndId = async function (uri) { + const nyplSourceMapper = await NyplSourceMapper.instance() + const { id, nyplSource } = nyplSourceMapper.splitIdentifier(uri) ?? {} + if (!id || !nyplSource) { + throw new errors.InvalidParameterError(`Invalid bnum: ${uri}`) + } + return { id, nyplSource } +} + +function itemsByFilter (identifierValues, app) { + const filter = { terms: { 'items.identifier': identifierValues } } + let opts = { _source: ['uri', 'type', 'items.uri', 'items.type', 'items.identifier', 'items.holdingLocation', 'items.status', 'items.catalogItemType', 'items.accessMessage', 'items.m2CustomerCode'] } + + opts = Object.assign({ + _source: null + }, opts) + + // Build ES query body: + const body = { + query: { + nested: { + path: 'items', + score_mode: 'avg', + query: { + constant_score: { + filter + } + } + } + } + } + if (opts._source) body._source = opts._source + + app.logger.debug('Resources#itemsByFilter', body) + return app.esClient.search(body) + .then((resp) => { + if (!resp || !resp.hits || resp.hits.total === 0) return Promise.reject(new Error('No matching items')) + resp = new LocationLabelUpdater(resp).responseWithUpdatedLabels() + // Convert this ES bibs response into an array of flattened items: + return resp.hits.hits + .map((doc) => doc._source) + // Reduce to a flat array of items + .reduce((a, bib) => { + return a.concat(bib.items) + // Let's affix that bnum into the item's identifiers so we know where it came from: + .map((i) => { + return Object.assign(i, { identifier: [`urn:bnum:${bib.uri}`].concat(i.identifier) }) + }) + }, []) + }).then((items) => { + return items.filter((item) => { + return item.identifier.filter((i) => identifierValues.indexOf(i) >= 0).length > 0 + }) + }) +} + +/** +* Given an array of ES aggregations responses (such as that returned from msearch) +**/ +const mergeAggregationsResponses = (responses) => { + // Filter out errored responses: + responses = responses.filter((resp) => resp.aggregations) + if (responses.length === 0) { + return {} + } + return { + // Use `hits` of last element, somewhat arbitrarily: + hits: responses[responses.length - 1].hits, + aggregations: responses + .reduce((allAggs, resp) => { + const respAggs = Object.entries(resp.aggregations) + // Build hash of response aggs, squashing _nested aggs: + .reduce((a, [field, _a]) => { + // If it's nested, it will be in our special '_nested' prop: + a[field] = _a._nested || _a + return a + }, {}) + // Add response aggs to combined aggs: + return Object.assign(allAggs, respAggs) + }, {}) + } +} + +// Create promise to resolve deliveryLocationTypes by patron type: +const lookupPatronType = async function (params) { + try { + await AvailableDeliveryLocationTypes.getScholarRoomByPatronId(params.patronId) + } catch (e) { + throw new errors.InvalidParameterError('Invalid patronId') + } +} + +const makeRelevanceReport = (params) => (r, ind) => { + const out = [] + out.push(`${ind + 1}: ${r.searchResultScore} score > ${r.result.uri}:`) + if (params.search_scope === 'contributor') out.push(`(${r.result.creatorLiteral || r.result.contributorLiteral})`) + if (['standard_number', 'callnumber'].includes(params.search_scope)) out.push(`(${r.result.items && r.result.items[0]?.shelfMark})`) + out.push(`${r.result.title} (displayed as "${r.result.titleDisplay}")`) + if (r.matchedQueries) out.push(`\n ${r.matchedQueries.join(', ')}`) + return out.join(' ') +} + +module.exports = { + esRangeValue, + parseSearchParams, + nyplSourceAndId, + itemsByFilter, + mergeAggregationsResponses, + lookupPatronType, + makeRelevanceReport +} diff --git a/package-lock.json b/package-lock.json index e142a8e2..d253d9a4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15,6 +15,7 @@ "@nypl/nypl-data-api-client": "^2.0.0", "@nypl/scsb-rest-client": "3.0.0", "dotenv": "^16.4.5", + "ebnf": "^1.9.1", "express": "^4.18.3", "research-catalog-indexer": "git+https://github.com/NYPL/research-catalog-indexer.git#a292adff0d3ed594a0c2996561d9e17b0f9dc04a", "winston": "3.12.0" @@ -6194,6 +6195,15 @@ "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", "devOptional": true }, + "node_modules/ebnf": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ebnf/-/ebnf-1.9.1.tgz", + "integrity": "sha512-uW2UKSsuty9ANJ3YByIQE4ANkD8nqUPO7r6Fwcc1ADKPe9FRdcPpMl3VEput4JSvKBJ4J86npIC2MLP0pYkCuw==", + "license": "MIT", + "bin": { + "ebnf": "dist/bin.js" + } + }, "node_modules/ecc-jsbn": { "version": "0.1.2", "dev": true, diff --git a/package.json b/package.json index 3383b465..8443ec2b 100644 --- a/package.json +++ b/package.json @@ -8,6 +8,7 @@ "@nypl/nypl-data-api-client": "^2.0.0", "@nypl/scsb-rest-client": "3.0.0", "dotenv": "^16.4.5", + "ebnf": "^1.9.1", "express": "^4.18.3", "research-catalog-indexer": "git+https://github.com/NYPL/research-catalog-indexer.git#a292adff0d3ed594a0c2996561d9e17b0f9dc04a", "winston": "3.12.0" @@ -33,7 +34,6 @@ }, "scripts": { "test": "./node_modules/.bin/standard --env mocha && NODE_ENV=test ./node_modules/.bin/mocha test --exit", - "test-integration": "./node_modules/.bin/mocha test/integration", "start": "node server.js", "deploy-development": "git checkout development && git pull origin development && eb deploy discovery-api-dev --profile nypl-sandbox", "deploy-qa": "git checkout qa && git pull origin qa && eb deploy discovery-api-qa --profile nypl-digital-dev", diff --git a/test/cql_grammar.test.js b/test/cql_grammar.test.js new file mode 100644 index 00000000..24d5d5d6 --- /dev/null +++ b/test/cql_grammar.test.js @@ -0,0 +1,110 @@ +const { expect } = require('chai') + +const { simplify, parseWithRightCql } = require('../lib/elasticsearch/cql_grammar') + +function validateAtomicQuery (parsed, scope, relation, quotedTerm) { + expect(parsed.type).to.equal('query') + expect(parsed.children.length).to.equal(1) + const subQuery = parsed.children[0] + expect(subQuery.type).to.equal('sub_query') + expect(subQuery.children.length).to.equal(1) + const atomicQuery = subQuery.children[0] + expect(atomicQuery.type).to.equal('atomic_query') + const scopeNode = atomicQuery.children.find(child => child.type === 'scope') + const scopeTerm = scopeNode.children.find(child => child.type === 'scope_term') + expect(scopeTerm.text).to.equal(scope) + const relationNode = atomicQuery.children.find(child => child.type === 'relation') + const relationTerm = relationNode.children.find(child => child.type === 'relation_term') + expect(relationTerm.text).to.equal(relation) + const quotedTermNode = atomicQuery.children.find(child => child.type === 'quoted_term') + expect(quotedTermNode.text).to.equal(quotedTerm) +} + +describe('CQL Grammar', function () { + describe('parsing queries', function () { + it('parses atomic queries', function () { + validateAtomicQuery(parseWithRightCql('title="hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + validateAtomicQuery(parseWithRightCql('keyword any "hamlet shakespeare"'), 'keyword', 'any', '"hamlet shakespeare"') + validateAtomicQuery(parseWithRightCql('subject all "hamlet shakespeare"'), 'subject', 'all', '"hamlet shakespeare"') + }) + + it('allows whitespace variants', function () { + validateAtomicQuery(parseWithRightCql('title ="hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('title= "hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('title = "hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('title = "hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('title = "hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + }) + + it('correctly escapes escape characters', function () { + validateAtomicQuery(parseWithRightCql('keyword="Notes on \\"The Underground\\""'), 'keyword', '=', '"Notes on \\"The Underground\\""') + validateAtomicQuery(parseWithRightCql('title="This title ends in a slash \\\\"'), 'title', '=', '"This title ends in a slash \\\\"') + }) + + it('identifies words correctly', function () { + const parsed = parseWithRightCql('keyword adj "A multiword keyword"') + const words = [] + let nodes = [parsed] + while (nodes.length) { + const node = nodes.shift() + if (node.type === 'word') { + words.push(node.text) + } else { + nodes = nodes.concat(node.children) + } + } + const expectedWords = ['A', 'multiword', 'keyword'] + words.forEach(word => { + expect(expectedWords).to.include(word) + }) + expect(words.length).to.equal(3) + }) + + it('parses boolean queries', function () { + expect(simplify(parseWithRightCql( + 'title="dogs" AND keyword="cats"' + ))).to.deep.equal( + [['title', '=', ['dogs']], 'AND', ['keyword', '=', ['cats']]] + ) + + expect(simplify(parseWithRightCql( + 'title="dogs" AND keyword="cats" OR author adj "Bird"' + ))).to.deep.equal( + [ + [ + [ + 'title', '=', ['dogs'] + ], + 'AND', + [ + 'keyword', '=', ['cats'] + ] + ], + 'OR', + [ + 'author', 'adj', ['Bird'] + ] + ] + ) + }) + + it('parses queries with parentheses', function () { + expect(simplify(parseWithRightCql( + 'title="dogs" AND (keyword="cats" OR author adj "Bird")' + ))) + .to.deep.equal( + [ + ['title', '=', ['dogs']], 'AND', [['keyword', '=', ['cats']], + 'OR', + ['author', 'adj', ['Bird']] + ] + ] + ) + }) + }) +}) diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js new file mode 100644 index 00000000..cf856b5c --- /dev/null +++ b/test/cql_query_builder.test.js @@ -0,0 +1,199 @@ +const { expect } = require('chai') + +const { buildEsQuery } = require('../lib/elasticsearch/cql_query_builder') +const ApiRequest = require('../lib/api-request') +const { + simpleAdjQuery, + simpleAnyQuery, + simpleAllQuery, + prefixPhraseQuery, + anyWithPrefixQuery, + keywordQueryForBarcode, + keywordQueryForShelfMark, + keywordQueryForGeneralTerm, + identifierQuery, + binaryBooleanQuery, + ternaryBooleanQuery, + queryWithParentheses, + negationQuery, + dateBeforeQuery, + dateBeforeOrOnQuery, + dateAfterQuery, + dateAfterOrOnQuery, + dateWithinQuery, + dateEnclosesQuery, + filterQuery, + multiAdjQuery, + exactMatchQuery +} = require('./fixtures/cql_fixtures') + +describe('CQL Query Builder', function () { + it('Simple = query', function () { + expect(buildEsQuery('title="Hamlet"')) + .to.deep.equal( + simpleAdjQuery + ) + }) + + it('Simple adj query', function () { + expect(buildEsQuery('title adj "Hamlet"')) + .to.deep.equal( + simpleAdjQuery + ) + }) + + it('Multi-word adj query', function () { + expect(buildEsQuery('title adj "Hamlet, Prince"')) + .to.deep.equal( + multiAdjQuery + ) + }) + + it('Simple any query', function () { + expect(buildEsQuery('title any "Hamlet Othello"')) + .to.deep.equal( + simpleAnyQuery + ) + }) + + it('Simple all query', function () { + expect(buildEsQuery('title all "Hamlet Othello"')) + .to.deep.equal( + simpleAllQuery + ) + }) + + it('Prefix phrase query', function () { + expect(buildEsQuery('title = "^The Tragedy of Hamlet, Prince of Denmark"')) + .to.deep.equal( + prefixPhraseQuery + ) + }) + + it('Prefix queries mixed into any query', function () { + expect(buildEsQuery('title any "^Tragedy ^Comedy Hamlet Othello"')) + .to.deep.equal( + anyWithPrefixQuery + ) + }) + + it('Keyword query for barcode', function () { + expect(buildEsQuery('keyword = "123456"')) + .to.deep.equal( + keywordQueryForBarcode + ) + }) + + it('Keyword query for shelfMark', function () { + expect(buildEsQuery('keyword = "B 12"')) + .to.deep.equal( + keywordQueryForShelfMark + ) + }) + + it('Keyword query for general term', function () { + expect(buildEsQuery('keyword = "Hamlet"')) + .to.deep.equal( + keywordQueryForGeneralTerm + ) + }) + + it('Identifier query', function () { + expect(buildEsQuery('identifier = "b1234"')) + .to.deep.equal( + identifierQuery + ) + }) + + it('Binary boolean query', function () { + expect(buildEsQuery('author = "Shakespeare" AND language = "English"')) + .to.deep.equal( + binaryBooleanQuery + ) + }) + + it('Ternary boolean query', function () { + expect(buildEsQuery('author = "Shakespeare" AND language = "English" OR genre = "tragedy"')) + .to.deep.equal( + ternaryBooleanQuery + ) + }) + + it('Boolean query with parentheses', function () { + expect(buildEsQuery('author = "Shakespeare" AND (language = "English" OR genre = "tragedy")')) + .to.deep.equal( + queryWithParentheses + ) + }) + + it('Query with NOT', function () { + expect(buildEsQuery('author = "Shakespeare" NOT language = "English"')) + .to.deep.equal( + negationQuery + ) + }) + + it('Query with AND NOT', function () { + expect(buildEsQuery('author = "Shakespeare" AND NOT language = "English"')) + .to.deep.equal( + negationQuery + ) + }) + + it('Date after query', function () { + expect(buildEsQuery('date > "1990"')) + .to.deep.equal( + dateAfterQuery + ) + }) + + it('Date after or on query', function () { + expect(buildEsQuery('date >= "1990"')) + .to.deep.equal( + dateAfterOrOnQuery + ) + }) + + it('Date before query', function () { + expect(buildEsQuery('date < "1990"')) + .to.deep.equal( + dateBeforeQuery + ) + }) + + it('Date dateBeforeOrOnQuery query', function () { + expect(buildEsQuery('date <= "1990"')) + .to.deep.equal( + dateBeforeOrOnQuery + ) + }) + + it('Date within query', function () { + expect(buildEsQuery('date within "1990 2000"')) + .to.deep.equal( + dateWithinQuery + ) + }) + + it('Date encloses query', function () { + expect(buildEsQuery('date encloses "1990 2000"')) + .to.deep.equal( + dateEnclosesQuery + ) + }) + + it('Query with applied filters', function () { + const apiRequest = new ApiRequest({ filters: { language: ['Klingon'] }, search_scope: 'cql' }) + expect(buildEsQuery('author="Shakespeare"', apiRequest)) + .to.deep.equal( + filterQuery + ) + }) + + it('Exact match query', function () { + expect(buildEsQuery('author == "William Shakespeare"')) + .to.deep.equal( + exactMatchQuery + ) + }) +}) diff --git a/test/elastic-body-builder.test.js b/test/elastic-body-builder.test.js new file mode 100644 index 00000000..ccec82dd --- /dev/null +++ b/test/elastic-body-builder.test.js @@ -0,0 +1,376 @@ +const { expect } = require('chai') + +const { bodyForSearch, bodyForFindByUri } = require('../lib/elasticsearch/elastic-body-builder') + +describe('bodyForSearch', function () { + it('excludes checkin cards by default', function () { + expect(bodyForSearch({ sort: 'relevance' })) + .to.deep.equal( + { + query: { + bool: { + filter: [ + { + bool: { + should: [ + { + nested: { + path: 'items', + query: { + bool: { + must_not: [ + { + term: { + 'items.type': 'nypl:CheckinCardItem' + } + } + ] + } + }, + inner_hits: { + sort: [ + { + 'items.enumerationChronology_sort': 'desc' + } + ], + size: '3', + from: 0, + name: 'items' + } + } + }, + { + match_all: {} + } + ] + } + } + ] + } + }, + sort: [ + { + _score: 'desc' + }, + { + uri: 'asc' + } + ], + _source: { + excludes: [ + 'uris', + '*_packed', + '*_sort', + 'items.*_packed', + 'contentsTitle', + 'suppressed', + '*WithoutDates', + '*Normalized', + 'items' + ] + } + } + ) + }) + + it('includes checkin cards when present in params', function () { + expect(bodyForSearch({ sort: 'relevance', merge_checkin_card_items: true })) + .to.deep.equal( + { + query: { + bool: { + filter: [ + { + bool: { + should: [ + { + nested: { + path: 'items', + query: { + bool: { + must: { + match_all: {} + } + } + }, + inner_hits: { + sort: [ + { + 'items.enumerationChronology_sort': 'desc' + } + ], + size: '3', + from: 0, + name: 'items' + } + } + }, + { + match_all: {} + } + ] + } + } + ] + } + }, + sort: [ + { + _score: 'desc' + }, + { + uri: 'asc' + } + ], + _source: { + excludes: [ + 'uris', + '*_packed', + '*_sort', + 'items.*_packed', + 'contentsTitle', + 'suppressed', + '*WithoutDates', + '*Normalized', + 'items' + ] + } + } + ) + }) +}) + +describe('bodyForFindByUri', function () { + it('queries for uri', function () { + const expected = { + _source: { + excludes: [ + 'uris', + '*_packed', + '*_sort', + 'items.*_packed', + 'contentsTitle', + 'suppressed', + '*WithoutDates', + '*Normalized', + 'items' + ] + }, + size: 1, + query: { + bool: { + must: [ + { + term: { + uri: 'b15781267' + } + } + ], + filter: [ + { + bool: { + should: [ + { + nested: { + path: 'items', + query: { + bool: { + must: { + match_all: {} + } + } + }, + inner_hits: { + sort: [ + { + 'items.enumerationChronology_sort': 'desc' + } + ], + size: 100, + from: 0, + name: 'items' + } + } + }, + { + match_all: {} + } + ] + } + } + ] + } + }, + aggregations: { + item_location: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.holdingLocation_packed' + } + } + } + }, + item_status: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.status_packed' + } + } + } + }, + item_format: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.formatLiteral' + } + } + } + } + } + } + + const params = { + all_items: false, + uri: 'b15781267', + items_size: 100, + items_from: 0, + merge_checkin_card_items: true, + include_item_aggregations: true + } + const barcodes = {} + expect(bodyForFindByUri(barcodes, params)) + .to.deep.equal(expected) + }) + + it('accepts item params', function () { + const barcodes = { 'Not Available': ['1234'] } + const params = { + all_items: false, + uri: 'b15781267', + items_size: 10, + items_from: 10, + merge_checkin_card_items: true, + include_item_aggregations: true + } + + const expected = { + _source: { + excludes: [ + 'uris', + '*_packed', + '*_sort', + 'items.*_packed', + 'contentsTitle', + 'suppressed', + '*WithoutDates', + '*Normalized', + 'items' + ] + }, + size: 1, + query: { + bool: { + must: [ + { + term: { + uri: 'b15781267' + } + } + ], + filter: [ + { + bool: { + should: [ + { + nested: { + path: 'items', + query: { + bool: { + must: { + match_all: {} + } + } + }, + inner_hits: { + sort: [ + { + 'items.enumerationChronology_sort': 'desc' + } + ], + size: 10, + from: 10, + name: 'items' + } + } + }, + { + match_all: {} + } + ] + } + } + ] + } + }, + aggregations: { + item_location: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.holdingLocation_packed' + } + } + } + }, + item_status: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.status_packed' + } + } + } + }, + item_format: { + nested: { + path: 'items' + }, + aggs: { + _nested: { + terms: { + size: 100, + field: 'items.formatLiteral' + } + } + } + } + } + } + + expect(bodyForFindByUri(barcodes, params)) + .to.deep.equal(expected) + }) +}) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js new file mode 100644 index 00000000..552333c0 --- /dev/null +++ b/test/fixtures/cql_fixtures.js @@ -0,0 +1,1180 @@ +const simpleAdjQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'phrase' + } + } + ] + } + } + ] + } + } + ] + } +} + +const multiAdjQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet, Prince', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'phrase' + } + } + ] + } + } + ] + } + } + ] + } +} + +const prefixPhraseQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + prefix: { + 'title.keywordLowercasedStripped': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'seriesStatement.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'titleAlt.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'donor.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'parallelTitleAlt.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'parallelSeriesStatement.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'parallelCreatorLiteral.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'uniformTitle.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'parallelUniformTitle.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'addedAuthorTitle.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + placeOfPublication: 'The Tragedy of Hamlet, Prince of Denmark' + } + } + ] + } + } + ] + } + } + ] + } +} + +const simpleAnyQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + }, + { + bool: { + should: [ + { + multi_match: { + query: 'Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + } + ] + } + } + ] + } + } + ] + } +} + +const anyWithPrefixQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + prefix: { + 'title.keywordLowercasedStripped': 'Tragedy' + } + }, + { + prefix: { 'seriesStatement.raw': 'Tragedy' } + }, + { prefix: { 'titleAlt.raw': 'Tragedy' } }, + { prefix: { 'donor.raw': 'Tragedy' } }, + { + prefix: { 'parallelTitleAlt.raw': 'Tragedy' } + }, + { + prefix: { 'parallelSeriesStatement.raw': 'Tragedy' } + }, + { + prefix: { 'parallelCreatorLiteral.raw': 'Tragedy' } + }, + { prefix: { 'uniformTitle.raw': 'Tragedy' } }, + { + prefix: { 'parallelUniformTitle.raw': 'Tragedy' } + }, + { + prefix: { 'addedAuthorTitle.raw': 'Tragedy' } + }, + { prefix: { placeOfPublication: 'Tragedy' } } + ] + } + }, + { + bool: { + should: [ + { + prefix: { + 'title.keywordLowercasedStripped': 'Comedy' + } + }, + { prefix: { 'seriesStatement.raw': 'Comedy' } }, + { prefix: { 'titleAlt.raw': 'Comedy' } }, + { prefix: { 'donor.raw': 'Comedy' } }, + { + prefix: { 'parallelTitleAlt.raw': 'Comedy' } + }, + { + prefix: { 'parallelSeriesStatement.raw': 'Comedy' } + }, + { + prefix: { 'parallelCreatorLiteral.raw': 'Comedy' } + }, + { prefix: { 'uniformTitle.raw': 'Comedy' } }, + { + prefix: { 'parallelUniformTitle.raw': 'Comedy' } + }, + { + prefix: { 'addedAuthorTitle.raw': 'Comedy' } + }, + { prefix: { placeOfPublication: 'Comedy' } } + ] + } + }, + { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + }, + { + bool: { + should: [ + { + multi_match: { + query: 'Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + } + ] + } + } + ] + } + } + ] + } +} + +const simpleAllQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + }, + { + bool: { + should: [ + { + multi_match: { + query: 'Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + } + ] + } + } + ] + } + } + ] + } +} + +const keywordQueryForBarcode = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: '123456', + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' + ], + type: 'phrase' + } + } + ] + } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [{ term: { 'items.idBarcode': '123456' } }] + } + } + } + } + ] + } + } + ] + } +} + +const keywordQueryForShelfMark = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'B 12', + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' + ], + type: 'phrase' + } + } + ] + } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'B 12', + fields: ['items.shelfMark'], + type: 'phrase' + } + } + ] + } + } + } + } + ] + } + } + ] + } +} + +const keywordQueryForGeneralTerm = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' + ], + type: 'phrase' + } + } + ] + } + } + ] + } + } + ] + } +} + +const identifierQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { term: { uri: 'b1234' } }, + { term: { 'idIsbn.clean': 'b1234' } }, + { term: { 'idIssn.clean': 'b1234' } }, + { prefix: { 'identifierV2.value': 'b1234' } } + ] + } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { term: { 'items.idBarcode': 'b1234' } }, + { + prefix: { + 'items.shelfMark.keywordLowercased': 'b1234' + } + } + ] + } + } + } + } + ] + } + } + ] + } +} + +const binaryBooleanQuery = { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], + type: 'phrase' + } + } + ] + } + } + ] + } + }, + { + bool: { + should: [ + { + bool: { + should: [ + { term: { 'language.id': 'English' } }, + { term: { 'language.label': 'English' } } + ] + } + } + ] + } + } + ] + } + } + ] + } +} + +const ternaryBooleanQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], + type: 'phrase' + } + } + ] + } + } + ] + } + }, + { + bool: { + should: [ + { + bool: { + should: [ + { term: { 'language.id': 'English' } }, + { term: { 'language.label': 'English' } } + ] + } + } + ] + } + } + ] + } + }, + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'tragedy', + fields: ['genreForm'], + type: 'phrase' + } + } + ] + } + } + ] + } + } + ] + } + } + ] + } +} + +const queryWithParentheses = { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], + type: 'phrase' + } + } + ] + } + } + ] + } + }, + { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { term: { 'language.id': 'English' } }, + { term: { 'language.label': 'English' } } + ] + } + } + ] + } + }, + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'tragedy', + fields: ['genreForm'], + type: 'phrase' + } + } + ] + } + } + ] + } + } + ] + } + } + ] + } + } + ] + } +} + +const negationQuery = { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], + type: 'phrase' + } + } + ] + } + } + ] + } + } + ], + must_not: [ + { + bool: { + should: [ + { + bool: { + should: [ + { term: { 'language.id': 'English' } }, + { term: { 'language.label': 'English' } } + ] + } + } + ] + } + } + ] + } + } + ] + } +} + +const dateAfterQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { gt: '1990' } } } + } + } + ] + } + } + ] + } +} + +const dateBeforeQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { lt: '1990' } } } + } + } + ] + } + } + ] + } +} + +const dateBeforeOrOnQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { lte: '1990' } } } + } + } + ] + } + } + ] + } +} + +const dateAfterOrOnQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { gte: '1990' } } } + } + } + ] + } + } + ] + } +} + +const dateWithinQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { + range: { 'dates.range': { gte: '1990', lte: '2000' } } + } + } + } + ] + } + } + ] + } +} + +const dateEnclosesQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { + range: { 'dates.range': { gt: '1990', lt: '2000' } } + } + } + } + ] + } + } + ] + } +} + +const filterQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], + type: 'phrase' + } + } + ] + } + } + ] + } + } + ], + filter: [ + { + bool: { + should: [ + { term: { 'language.id': 'Klingon' } }, + { term: { 'language.label': 'Klingon' } } + ] + } + } + ] + } +} + +const exactMatchQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + term: { + 'creatorLiteral.keywordLowercased': 'William Shakespeare' + } + }, + { + term: { + 'contributorLiteral.keywordLowercased': 'William Shakespeare' + } + }, + { + term: { + 'parallelCreatorLiteral.raw': 'William Shakespeare' + } + }, + { + term: { + 'parallelContributorLiteral.raw': 'William Shakespeare' + } + } + ] + } + } + ] + } + } + ] + } +} + +module.exports = { + simpleAdjQuery, + simpleAnyQuery, + simpleAllQuery, + prefixPhraseQuery, + anyWithPrefixQuery, + keywordQueryForBarcode, + keywordQueryForShelfMark, + keywordQueryForGeneralTerm, + identifierQuery, + binaryBooleanQuery, + ternaryBooleanQuery, + queryWithParentheses, + negationQuery, + dateBeforeQuery, + dateBeforeOrOnQuery, + dateAfterQuery, + dateAfterOrOnQuery, + dateWithinQuery, + dateEnclosesQuery, + filterQuery, + multiAdjQuery, + exactMatchQuery +} diff --git a/test/resources.test.js b/test/resources.test.js index 533866b2..d938e263 100644 --- a/test/resources.test.js +++ b/test/resources.test.js @@ -5,6 +5,12 @@ const scsbClient = require('../lib/scsb-client') const errors = require('../lib/errors') const { AGGREGATIONS_SPEC } = require('../lib/elasticsearch/config') const numAggregations = Object.keys(AGGREGATIONS_SPEC).length +const { + itemsFilterContext, + itemsQueryContext, + buildElasticQuery, + buildElasticBody +} = require('../lib/elasticsearch/elastic-body-builder') const fixtures = require('./fixtures') @@ -72,7 +78,7 @@ describe('Resources query', function () { describe('buildElasticQuery', function () { it('uses "query string query" if subjectLiteral: used', function () { const params = resourcesPrivMethods.parseSearchParams({ q: 'subjectLiteral:potatoes' }) - const body = resourcesPrivMethods.buildElasticQuery(params) + const body = buildElasticQuery(params) expect(body).to.be.a('object') expect(body.bool).to.be.a('object') expect(body.bool.must).to.be.a('array') @@ -83,7 +89,7 @@ describe('Resources query', function () { it('uses "query string query" if subjectLiteral: quoted phrase used', function () { const params = resourcesPrivMethods.parseSearchParams({ q: 'subjectLiteral:"hot potatoes"' }) - const body = resourcesPrivMethods.buildElasticQuery(params) + const body = buildElasticQuery(params) expect(body).to.be.a('object') expect(body.bool).to.be.a('object') expect(body.bool.must).to.be.a('array') @@ -94,7 +100,7 @@ describe('Resources query', function () { it('escapes colon if field not recognized', function () { const params = resourcesPrivMethods.parseSearchParams({ q: 'fladeedle:"hot potatoes"' }) - const body = resourcesPrivMethods.buildElasticQuery(params) + const body = buildElasticQuery(params) expect(body).to.be.a('object') expect(body.bool).to.be.a('object') expect(body.bool.must).to.be.a('array') @@ -105,7 +111,7 @@ describe('Resources query', function () { it('uses "query string query" if plain keyword query used', function () { const params = resourcesPrivMethods.parseSearchParams({ q: 'potatoes' }) - const body = resourcesPrivMethods.buildElasticQuery(params) + const body = buildElasticQuery(params) expect(body).to.be.a('object') expect(body.bool).to.be.a('object') expect(body.bool.must).to.be.a('array') @@ -116,7 +122,7 @@ describe('Resources query', function () { it('accepts advanced search parameters', function () { const params = resourcesPrivMethods.parseSearchParams({ contributor: 'Poe', title: 'Raven', subject: 'ravens' }) - const body = resourcesPrivMethods.buildElasticQuery(params) + const body = buildElasticQuery(params) expect(body).to.nested.include({ // Expect a title match on Raven: @@ -135,7 +141,7 @@ describe('Resources query', function () { describe('buildElasticBody', function () { it('uses subjectLiteral.raw when given a subjectLiteral filter', function () { const params = resourcesPrivMethods.parseSearchParams({ q: '', filters: { subjectLiteral: 'United States -- History' } }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.be.a('object') expect(body.query).to.be.a('object') expect(body.query.bool).to.be.a('object') @@ -152,7 +158,7 @@ describe('Resources query', function () { expect(process.env.HIDE_NYPL_SOURCE).to.be.a('undefined') const params = resourcesPrivMethods.parseSearchParams({ q: '' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.be.a('object') expect(body.query.filter).to.be.a('undefined') @@ -162,7 +168,7 @@ describe('Resources query', function () { process.env.HIDE_NYPL_SOURCE = 'recap-hl' const params = resourcesPrivMethods.parseSearchParams({ q: '' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) // Expect query to resemble: {"from":0,"size":50,"query":{"bool":{"filter":[{"bool":{"must_not":{"terms":{"nyplSource":["recap-hl"]}}}}]}},"sort":["uri"]} expect(body).to.be.a('object') @@ -174,26 +180,26 @@ describe('Resources query', function () { it('processes isbn correctly', () => { const params = resourcesPrivMethods.parseSearchParams({ isbn: '0689844921' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.nested .include({ 'query.bool.must[0].term.idIsbn\\.clean': '0689844921' }) }) it('processes issn correctly', () => { const params = resourcesPrivMethods.parseSearchParams({ issn: '1234-5678' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.nested.include({ 'query.bool.must[0].term.idIssn\\.clean': '1234-5678' }) }) it('processes lccn correctly', () => { const params = resourcesPrivMethods.parseSearchParams({ lccn: '00068799' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.nested.include({ 'query.bool.must[0].regexp.idLccn.value': '[^\\d]*00068799[^\\d]*' }) }) it('processes oclc correctly', () => { const params = resourcesPrivMethods.parseSearchParams({ oclc: '1033548057' }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body).to.nested.include({ 'query.bool.must[0].term.idOclc': '1033548057' }) }) @@ -206,9 +212,9 @@ describe('Resources query', function () { }) const paramsSnapshot = JSON.stringify(params) - resourcesPrivMethods.buildElasticBody(params) - resourcesPrivMethods.buildElasticBody(params) - resourcesPrivMethods.buildElasticBody(params) + buildElasticBody(params) + buildElasticBody(params) + buildElasticBody(params) expect(JSON.stringify(params)).to.equal(paramsSnapshot) }) @@ -221,7 +227,7 @@ describe('Resources query', function () { } }) - const body = resourcesPrivMethods.buildElasticBody(params) + const body = buildElasticBody(params) expect(body.query.bool.filter[0].terms['subjectLiteral.raw']).to.deep.equal(['S1', 'S1.']) @@ -481,7 +487,8 @@ describe('Resources query', function () { size: 1, query: { bool: { - must: [{ term: { uri: 'b1234' } }] + must: [{ term: { uri: 'b1234' } }], + filter: [] } }, aggregations: { @@ -662,40 +669,40 @@ describe('Resources query', function () { describe('itemsFilterContext', () => { it('should return an empty object in case of no query', () => { - expect(resourcesPrivMethods.itemsFilterContext({})).to.deep.equal({}) + expect(itemsFilterContext({})).to.deep.equal({}) }) it('should return an empty object in case there are no filters', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: {} })).to.deep.equal({}) + expect(itemsFilterContext({ query: {} })).to.deep.equal({}) }) it('should return filters for volume in case there is a volume', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { volume: [1, 2] } })) + expect(itemsFilterContext({ query: { volume: [1, 2] } })) .to.deep.equal({ filter: [{ range: { 'items.volumeRange': { gte: 1, lte: 2 } } }] }) }) it('should return filters for date in case there is a date', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { date: [1, 2] } })) + expect(itemsFilterContext({ query: { date: [1, 2] } })) .to.deep.equal({ filter: [{ range: { 'items.dateRange': { gte: 1, lte: 2 } } }] }) }) it('should return filters for format in case there is a format', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { format: ['text', 'microfilm', 'AV'] } })) + expect(itemsFilterContext({ query: { format: ['text', 'microfilm', 'AV'] } })) .to.deep.equal({ filter: [{ terms: { 'items.formatLiteral': ['text', 'microfilm', 'AV'] } }] }) }) it('should return filters for location in case there is a location', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { location: ['SASB', 'LPA', 'Schomburg'] } })) + expect(itemsFilterContext({ query: { location: ['SASB', 'LPA', 'Schomburg'] } })) .to.deep.equal({ filter: [{ terms: { 'items.holdingLocation.id': ['SASB', 'LPA', 'Schomburg'] } }] }) }) it('should return filters for status in case there is a status', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { status: ['Available', 'Unavailable', 'In Process'] } })) + expect(itemsFilterContext({ query: { status: ['Available', 'Unavailable', 'In Process'] } })) .to.deep.equal({ filter: [{ terms: { 'items.status.id': ['Available', 'Unavailable', 'In Process'] } }] }) }) it('should combine all filters in case of multiple filters', () => { - expect(resourcesPrivMethods.itemsFilterContext({ + expect(itemsFilterContext({ query: { volume: [1, 2], date: [3, 4], @@ -715,160 +722,28 @@ describe('Resources query', function () { }) it('should ignore all other parameters', () => { - expect(resourcesPrivMethods.itemsFilterContext({ query: { location: ['SASB', 'LPA', 'Schomburg'] }, something: 'else' })) + expect(itemsFilterContext({ query: { location: ['SASB', 'LPA', 'Schomburg'] }, something: 'else' })) .to.deep.equal({ filter: [{ terms: { 'items.holdingLocation.id': ['SASB', 'LPA', 'Schomburg'] } }] }) }) }) describe('itemsQueryContext', () => { it('should exclude check in card items when options.merge_checkin_card_items is not set', () => { - expect(resourcesPrivMethods.itemsQueryContext({})) + expect(itemsQueryContext({})) .to.deep.equal({ must_not: [{ term: { 'items.type': 'nypl:CheckinCardItem' } }] }) }) it('should exclude check in card items when merge_checkin_card_items is falsey', () => { - expect(resourcesPrivMethods.itemsQueryContext({ merge_checkin_card_items: false })) + expect(itemsQueryContext({ merge_checkin_card_items: false })) .to.deep.equal({ must_not: [{ term: { 'items.type': 'nypl:CheckinCardItem' } }] }) }) it('should use match_all for items when merge_checkin_card_items is truthy', () => { - expect(resourcesPrivMethods.itemsQueryContext({ merge_checkin_card_items: true })) + expect(itemsQueryContext({ merge_checkin_card_items: true })) .to.deep.equal({ must: { match_all: {} } }) }) }) - describe('addInnerHits', () => { - it('should include query for items', () => { - expect(resourcesPrivMethods.addInnerHits({ query: { bool: {} } }, { size: 1, from: 2 })) - .to.deep.equal({ - query: { - bool: { - filter: [ - { - bool: { - should: [ - { - nested: { - path: 'items', - query: { - bool: { - must: { - match_all: {} - } - } - }, - inner_hits: { - sort: [{ 'items.enumerationChronology_sort': 'desc' }], - size: 1, - from: 2, - name: 'items' - } - } - }, - { match_all: {} } - ] - } - } - ] - } - } - }) - }) - - it('should exclude check in card items if explicitly set', () => { - expect(resourcesPrivMethods.addInnerHits({ query: { bool: {} } }, { size: 1, from: 2, merge_checkin_card_items: false })) - .to.deep.equal({ - query: { - bool: { - filter: [ - { - bool: { - should: [ - { - nested: { - path: 'items', - query: { - bool: { - must_not: [ - { term: { 'items.type': 'nypl:CheckinCardItem' } } - ] - } - }, - inner_hits: { - sort: [{ 'items.enumerationChronology_sort': 'desc' }], - size: 1, - from: 2, - name: 'items' - } - } - }, - { match_all: {} } - ] - } - } - ] - } - } - }) - }) - - it('should include filters for items', () => { - expect(resourcesPrivMethods.addInnerHits( - { query: { bool: {} } }, - { size: 1, from: 2, query: { volume: [1, 2], location: ['SASB', 'LPA'], other: 'filter' } } - )).to.deep.equal({ - query: { - bool: { - filter: [ - { - bool: { - should: [ - { - nested: { - path: 'items', - query: { - bool: { - must: { - match_all: {} - }, - filter: [ - { range: { 'items.volumeRange': { gte: 1, lte: 2 } } }, - { terms: { 'items.holdingLocation.id': ['SASB', 'LPA'] } } - ] - } - }, - inner_hits: { - sort: [{ 'items.enumerationChronology_sort': 'desc' }], - size: 1, - from: 2, - name: 'items' - } - } - }, - { match_all: {} }, - { - nested: { - inner_hits: { name: 'allItems' }, - path: 'items', - query: { - bool: { - must_not: [ - { exists: { field: 'items.electronicLocator' } } - ] - } - } - } - } - ] - } - } - ] - } - } - }) - }) - }) - describe('search exception handling', () => { describe('lexical error', () => { before(() => {