diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml new file mode 100644 index 00000000..f9d5f765 --- /dev/null +++ b/.github/workflows/integration-tests.yml @@ -0,0 +1,30 @@ +name: integration-test + +on: + pull_request: + branches: + - production + +jobs: + integration-test: + permissions: + id-token: write + contents: read + runs-on: ubuntu-latest + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: arn:aws:iam::946183545209:role/GithubActionsDeployerRole + aws-region: us-east-1 + - uses: actions/checkout@v3 + - name: Set Node version + uses: actions/setup-node@v3 + with: + node-version-file: '.nvmrc' + - name: npm install + run: npm i + - name: Run integration tests + env: + ENV: 'qa' + run: node test/integration/delivery-locations-by-barcode.test.js diff --git a/.github/workflows/test-and-deploy.yml b/.github/workflows/test-and-deploy.yml index c9306ac6..a179ba4b 100644 --- a/.github/workflows/test-and-deploy.yml +++ b/.github/workflows/test-and-deploy.yml @@ -2,6 +2,11 @@ name: Unit Tests on: [push] +# Global environment variable based on the branch name +env: + ENV_TAG: ${{ github.ref_name }} + ECR_REPOSITORY: discovery-api + jobs: tests: runs-on: ubuntu-latest @@ -15,114 +20,15 @@ jobs: run: npm ci - name: Unit Tests run: npm test - integration-test-qa: - permissions: - id-token: write - contents: read - runs-on: ubuntu-latest - needs: tests - if: github.ref == 'refs/heads/qa' - steps: - - uses: actions/checkout@v4 - - name: Set Node version - uses: actions/setup-node@v4 - with: - node-version-file: '.nvmrc' - - name: Install dependencies - run: npm ci - - name: Start service - run: ENV=qa npm start & - - name: Run tests - run: npm run test-integration - deploy-qa: - permissions: - id-token: write - contents: read - runs-on: ubuntu-latest - needs: - - tests - if: github.ref == 'refs/heads/qa' - steps: - - name: Checkout repo - uses: actions/checkout@v3 - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v2 - with: - role-to-assume: arn:aws:iam::946183545209:role/GithubActionsDeployerRole - aws-region: us-east-1 - - name: Log in to ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - - name: Back up previous image for rollback - env: - ECR_REPOSITORY: discovery-api - run: | - MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="qa-latest" --output json | jq --raw-output --join-output '.images[0].imageManifest') - PREVIOUS_MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="qa-previous" --output json | jq --raw-output --join-output '.images[0].imageManifest') - if [ "$MANIFEST" != "$PREVIOUS_MANIFEST" ]; then aws ecr put-image --repository-name $ECR_REPOSITORY --image-tag "qa-previous" --image-manifest "$MANIFEST"; fi - - name: Build, tag, and push image to Amazon ECR - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: discovery-api - IMAGE_TAG: ${{ github.sha }} - run: | - docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG - docker tag $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG $ECR_REGISTRY/$ECR_REPOSITORY:qa-latest - docker push $ECR_REGISTRY/$ECR_REPOSITORY:qa-latest - - name: Force ECS Update - run: | - aws ecs update-service --cluster discovery-api-qa --service discovery-api-qa --force-new-deployment - deploy-qa2: - permissions: - id-token: write - contents: read - runs-on: ubuntu-latest - needs: - - tests - if: github.ref == 'refs/heads/qa2' - steps: - - name: Checkout repo - uses: actions/checkout@v3 - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v2 - with: - role-to-assume: arn:aws:iam::946183545209:role/GithubActionsDeployerRole - aws-region: us-east-1 - - - name: Log in to ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - - name: Back up previous image for rollback - env: - ECR_REPOSITORY: discovery-api - run: | - MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="qa2-latest" --output json | jq --raw-output --join-output '.images[0].imageManifest') - PREVIOUS_MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="qa2-previous" --output json | jq --raw-output --join-output '.images[0].imageManifest') - if [ "$MANIFEST" != "$PREVIOUS_MANIFEST" ]; then aws ecr put-image --repository-name $ECR_REPOSITORY --image-tag "qa2-previous" --image-manifest "$MANIFEST"; fi - - name: Build, tag, and push image to Amazon ECR - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: discovery-api - IMAGE_TAG: ${{ github.sha }} - run: | - docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG - docker tag $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG $ECR_REGISTRY/$ECR_REPOSITORY:qa2-latest - docker push $ECR_REGISTRY/$ECR_REPOSITORY:qa2-latest - - - name: Force ECS Update - run: | - aws ecs update-service --cluster discovery-api-qa2 --service discovery-api-qa2 --force-new-deployment - deploy-production: + deploy: permissions: id-token: write contents: read runs-on: ubuntu-latest needs: tests - if: github.ref == 'refs/heads/production' + # Only run if it's one of our three deployment branches + if: contains(fromJSON('["qa", "qa2", "production"]'), github.ref_name) steps: - name: Checkout repo uses: actions/checkout@v3 @@ -138,22 +44,23 @@ jobs: uses: aws-actions/amazon-ecr-login@v1 - name: Back up previous image for rollback - env: - ECR_REPOSITORY: discovery-api run: | - MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="production-latest" --output json | jq --raw-output --join-output '.images[0].imageManifest') - PREVIOUS_MANIFEST=$(aws ecr batch-get-image --repository-name $ECR_REPOSITORY --image-ids imageTag="production-previous" --output json | jq --raw-output --join-output '.images[0].imageManifest') - if [ "$MANIFEST" != "$PREVIOUS_MANIFEST" ]; then aws ecr put-image --repository-name $ECR_REPOSITORY --image-tag "production-previous" --image-manifest "$MANIFEST"; fi + MANIFEST=$(aws ecr batch-get-image --repository-name ${{ env.ECR_REPOSITORY }} --image-ids imageTag="${{ env.ENV_TAG }}-latest" --output json | jq --raw-output --join-output '.images[0].imageManifest') + PREVIOUS_MANIFEST=$(aws ecr batch-get-image --repository-name ${{ env.ECR_REPOSITORY }} --image-ids imageTag="${{ env.ENV_TAG }}-previous" --output json | jq --raw-output --join-output '.images[0].imageManifest') + if [ "$MANIFEST" != "$PREVIOUS_MANIFEST" ]; then + aws ecr put-image --repository-name ${{ env.ECR_REPOSITORY }} --image-tag "${{ env.ENV_TAG }}-previous" --image-manifest "$MANIFEST" + fi + - name: Build, tag, and push image to Amazon ECR env: ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: discovery-api IMAGE_TAG: ${{ github.sha }} run: | - docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG - docker tag $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG $ECR_REGISTRY/$ECR_REPOSITORY:production-latest - docker push $ECR_REGISTRY/$ECR_REPOSITORY:production-latest + docker build -t $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:$IMAGE_TAG . + docker push $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:$IMAGE_TAG + docker tag $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:$IMAGE_TAG $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:${{ env.ENV_TAG }}-latest + docker push $ECR_REGISTRY/${{ env.ECR_REPOSITORY }}:${{ env.ENV_TAG }}-latest + - name: Force ECS Update run: | - aws ecs update-service --cluster discovery-api-production --service discovery-api-production --force-new-deployment + aws ecs update-service --cluster discovery-api-${{ env.ENV_TAG }} --service discovery-api-${{ env.ENV_TAG }} --force-new-deployment \ No newline at end of file diff --git a/config/production.env b/config/production.env index 925f463f..6811e45f 100644 --- a/config/production.env +++ b/config/production.env @@ -10,7 +10,7 @@ NYPL_OAUTH_URL=https://isso.nypl.org/ ENCRYPTED_NYPL_OAUTH_ID=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGswaQYJKoZIhvcNAQcGoFwwWgIBADBVBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDMLKVUQA58B6vprNcAIBEIAoaz0lI9EL2M9NyTuEwT8JDmPBt6aXfMiFs027DEuwsCN0wS0qWeFL1g== ENCRYPTED_NYPL_OAUTH_SECRET=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAIcwgYQGCSqGSIb3DQEHBqB3MHUCAQAwcAYJKoZIhvcNAQcBMB4GCWCGSAFlAwQBLjARBAyWz91LOP2YP5fg0q0CARCAQ9inO9SV1M8R0Pkkx84r7UdwlU1FxfXvIjk/z6Qs81KBAVELhby2iD5LawQyDrR9tjhuMbotS6QnydwwMR/p8+qJXHI= -NYPL_CORE_VERSION=v2.37 +NYPL_CORE_VERSION=v2.39 LOG_LEVEL=info FEATURES=on-site-edd diff --git a/config/qa.env b/config/qa.env index a1b87a36..05a98812 100644 --- a/config/qa.env +++ b/config/qa.env @@ -12,7 +12,7 @@ NYPL_OAUTH_URL=https://isso.nypl.org/ ENCRYPTED_NYPL_OAUTH_ID=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGswaQYJKoZIhvcNAQcGoFwwWgIBADBVBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDMLKVUQA58B6vprNcAIBEIAoaz0lI9EL2M9NyTuEwT8JDmPBt6aXfMiFs027DEuwsCN0wS0qWeFL1g== ENCRYPTED_NYPL_OAUTH_SECRET=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAIcwgYQGCSqGSIb3DQEHBqB3MHUCAQAwcAYJKoZIhvcNAQcBMB4GCWCGSAFlAwQBLjARBAyWz91LOP2YP5fg0q0CARCAQ9inO9SV1M8R0Pkkx84r7UdwlU1FxfXvIjk/z6Qs81KBAVELhby2iD5LawQyDrR9tjhuMbotS6QnydwwMR/p8+qJXHI= -NYPL_CORE_VERSION=v2.37 +NYPL_CORE_VERSION=v2.39 LOG_LEVEL=debug FEATURES=on-site-edd diff --git a/config/test.env b/config/test.env index 3883094a..9267f131 100644 --- a/config/test.env +++ b/config/test.env @@ -10,7 +10,7 @@ NYPL_OAUTH_URL=http://oauth.example.com NYPL_OAUTH_ID=encrypted-nypl-oauth-id NYPL_OAUTH_SECRET=encrypted-nypl-oauth-id -NYPL_CORE_VERSION=v2.37 +NYPL_CORE_VERSION=v2.39 LOG_LEVEL=error FEATURES=on-site-edd diff --git a/lib/contributors.js b/lib/contributors.js index 0e7321de..4ccf874c 100644 --- a/lib/contributors.js +++ b/lib/contributors.js @@ -100,21 +100,21 @@ module.exports = function (app, _private = null) { } /** - * Builds an aggregation query that checks the resource index for counts on the contributorRoleLiteral field for a list of contributors. + * Builds an aggregation query that checks the resource index for counts on the browseableContributorRole_packed field for a list of contributors. */ const buildElasticRoleCountQuery = function (contributorList) { return { size: 0, query: { terms: { - contributorRoleLiteral: contributorList + browseableContributorRole_packed: contributorList } }, aggs: { contributor_role: { terms: { script: { - source: 'def results = []; for (val in doc["contributorRoleLiteral"]) { int pos = val.indexOf("||"); if (pos != -1) { String name = val.substring(0, pos); if (params.targets.contains(name)) { results.add(val); } } } return results;', + source: 'def results = []; for (val in doc["browseableContributorRole_packed"]) { int pos = val.indexOf("||"); if (pos != -1) { String name = val.substring(0, pos); if (params.targets.contains(name)) { results.add(val); } } } return results;', params: { targets: contributorList } diff --git a/lib/display-field-unpacker.js b/lib/display-field-unpacker.js new file mode 100644 index 00000000..5ae5b26e --- /dev/null +++ b/lib/display-field-unpacker.js @@ -0,0 +1,25 @@ +const parseValueAndLabel = (delimitedString) => { + if (!delimitedString.includes('||')) { + return { value: delimitedString, display: null } + } + const [value, display] = delimitedString.split('||') + return { value, display } +} + +module.exports = (elasticSearchResponse) => { + elasticSearchResponse.hits.hits.forEach((bib) => { + // Contributors and creators are packed like so || where + // can have prefix, title, and roles. We'd like to unpack them in a friendly format for the frontend + // to display the full label and use the isolated name for link-building + Object.entries(bib._source).forEach(([key, value]) => { + if (key.endsWith('_displayPacked')) { + const fieldName = key.replace('_displayPacked', '') + bib._source[fieldName + 'Display'] = value.map((packedValue) => parseValueAndLabel(packedValue)) + delete bib._source[key] + } + }) + + return bib + }) + return elasticSearchResponse +} diff --git a/lib/elasticsearch/config.js b/lib/elasticsearch/config.js index a7ae053d..bb74f353 100644 --- a/lib/elasticsearch/config.js +++ b/lib/elasticsearch/config.js @@ -73,7 +73,8 @@ const SEARCH_SCOPES = { }, standard_number: { // We do custom field matching for this search-scope - } + }, + cql: {} // see cql/index_mapping for this search scope } const FILTER_CONFIG = { @@ -89,7 +90,7 @@ const FILTER_CONFIG = { mediaType: { operator: 'match', field: ['mediaType.id', 'mediaType.label'], repeatable: true }, carrierType: { operator: 'match', field: ['carrierType.id', 'carrierType.label'], repeatable: true }, publisher: { operator: 'match', field: ['publisherLiteral.raw'], repeatable: true }, - contributorLiteral: { operator: 'match', field: ['contributorLiteral.keywordLowercased', 'parallelContributor.raw', 'creatorLiteral.keywordLowercased', 'parallelCreatorLiteral.raw'], repeatable: true }, + contributorLiteral: { operator: 'match', field: ['contributorLiteral.keywordLowercased', 'parallelContributorLiteral.raw', 'creatorLiteral.keywordLowercased', 'parallelCreatorLiteral.raw'], repeatable: true }, creatorLiteral: { operator: 'match', field: ['creatorLiteral.raw', 'parallelCreatorLiteral.raw'], repeatable: true }, issuance: { operator: 'match', field: ['issuance.id', 'issuance.label'], repeatable: true }, createdYear: { operator: 'match', field: ['createdYear'], repeatable: true }, diff --git a/lib/elasticsearch/cql/index-mapping.js b/lib/elasticsearch/cql/index-mapping.js new file mode 100644 index 00000000..0befbc87 --- /dev/null +++ b/lib/elasticsearch/cql/index-mapping.js @@ -0,0 +1,138 @@ +const indexMapping = { + keyword: { + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded', + // Try to detect shelfmark searches (e.g. JFD 16-5143) + { field: 'items.shelfMark', on: (q) => /^[A-Z]{1,3} \d{2,}/.test(q) } + ], + exact_fields: [ + 'title.keywordLowercasedStripped', + // missing description + 'subjectLiteral.raw', + 'creatorLiteral.keywordLowercased', + 'contributorLiteral.keywordLowercased', + // note.label is missing + 'publisherLiteral.raw', + 'seriesStatement.raw', + 'titleAlt.raw', + // titleDisplay missing + // contentsTitle missing + // tableOfContents missing + 'genreForm.raw', + 'donor.raw', + // parallelTitle missing + // parallelTitleDisplay missing + 'parallelTitleAlt.raw', + 'parallelSeriesStatement.raw', + 'parallelCreatorLiteral.raw', + // parallelPublisher/parallelPublisherLiteral missing + 'uniformTitle.raw', + 'parallelUniformTitle.raw', + // formerTitle missing + 'addedAuthorTitle.raw', + 'placeOfPublication', + { field: 'items.shelfMark.raw', on: (q) => /^[A-Z]{1,3} \d{2,}/.test(q) } + ], + term: [ + { field: 'items.idBarcode', on: (q) => /\d{6,}/.test(q) } + ] + }, + title: { + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + exact_fields: [ + 'title.keywordLowercasedStripped', + 'seriesStatement.raw', + 'titleAlt.raw', + // titleDisplay missing + // contentsTitle missing + // tableOfContents missing + 'donor.raw', + // parallelTitle missing + // parallelTitleDisplay missing + 'parallelTitleAlt.raw', + 'parallelSeriesStatement.raw', + 'parallelCreatorLiteral.raw', + 'uniformTitle.raw', + 'parallelUniformTitle.raw', + // formerTitle missing + 'addedAuthorTitle.raw', + 'placeOfPublication' + ] + }, + author: { + fields: ['creatorLiteral', 'creatorLiteral.folded', 'contributorLiteral.folded', 'parallelCreatorLiteral.folded', 'parallelContributorLiteral.folded'], + exact_fields: [ + 'creatorLiteral.keywordLowercased', 'contributorLiteral.keywordLowercased', + 'parallelCreatorLiteral.raw', 'parallelContributorLiteral.raw' + ] + }, + callnumber: { + term: ['shelfMark.keywordLowercased', 'items.shelfMark.keywordLowercased'] + }, + identifier: { + prefix: ['identifierV2.value', 'items.shelfMark.keywordLowercased'], + term: ['uri', 'items.idBarcode', 'idIsbn.clean', 'idIssn.clean'] + }, + subject: { + fields: ['subjectLiteral', 'subjectLiteral.folded', 'parallelSubjectLiteral.folded'], + exact_fields: ['subjectLiteral.raw'] + }, + language: { term: ['language.id', 'language.label'] }, + date: { fields: ['dates.range'] }, + series: { + term: ['series', 'parallelSeries'] + }, + genre: { fields: ['genreForm'], exact_fields: ['genreForm.raw'] }, + center: { term: ['buildingLocationIds'] }, + division: { term: ['collectionIds'] }, + format: { term: ['formatId'] } +} + +module.exports = { + indexMapping +} diff --git a/lib/elasticsearch/cql_grammar.js b/lib/elasticsearch/cql_grammar.js new file mode 100644 index 00000000..81f9aafc --- /dev/null +++ b/lib/elasticsearch/cql_grammar.js @@ -0,0 +1,130 @@ +const { Grammars } = require('ebnf') + +function reverseGrammar (grammar) { + return grammar.split('\n') + .map(line => + (line.split('::=') + .map(side => + (side.split('|') + .map(dis => + (dis.split(' ') + .map(word => + (word.includes('"') ? word.split('').reverse().join('') : word)) + .reverse().join(' ')) + ).join('|'))).join('::= '))).join('\n') +} + +const leftCql = ` + query ::= query whitespace connective whitespace sub_query | sub_query + connective ::= "AND NOT" | "AND" | "OR" | "NOT" + sub_query ::= atomic_query | "(" query ")" + atomic_query ::= scope relation quoted_term + scope ::= scope_term whitespace | scope_term + relation ::= relation_term whitespace | relation_term + scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format" + relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "==" | "=" | "within" | "encloses" + quoted_term ::= quote phrase quote + phrase ::= phrase whitespace word | word + whitespace ::= [#x20#x09#x0A#x0D]+ + word ::= word escaped_char | word regular_char | escaped_char | regular_char + regular_char ::= [^#x22#x5c#x20#x09#x0A#x0D] + escaped_char ::= slash char + slash ::= [#x5c] + char ::= [a-z]|[^a-z] + quote ::= [#x22] +` + +const rightCql = reverseGrammar(leftCql) + +function simplify (ast) { + switch (ast.type) { + case 'query': { + const children = ast.children.filter(child => child.type !== 'whitespace').map(child => simplify(child)) + return children.length > 1 ? children : children[0] + } + case 'connective': + return ast.text + case 'sub_query': + return simplify(ast.children.find(child => child.type.includes('query'))) + case 'atomic_query': + return ast.children.map(child => simplify(child)) + case 'scope': + return simplify(ast.children.find(child => child.type.includes('scope_term'))) + case 'relation': + return simplify(ast.children.find(child => child.type.includes('relation_term'))) + case 'scope_term': + return ast.text + case 'relation_term': + return ast.text + case 'quoted_term': + return simplify(ast.children.find(child => child.type.includes('phrase'))) + case 'phrase': { + const word = ast.children.find(child => child.type === 'word') + const phrase = ast.children.find(child => child.type === 'phrase') + return [simplify(word)].concat(phrase ? simplify(phrase) : []) + } + case 'word': + return ast.text + default: + break + } +} + +function reverseString (string) { + return string.split('').reverse().join('') +} + +function reverseAST (tree) { + if (!tree) return null + tree.text = reverseString(tree.text) + tree.children = tree.children.map(child => reverseAST(child)).reverse() + return tree +} + +const rightCqlParser = new Grammars.W3C.Parser(rightCql) + +// we want to associate operators to the left, but we have a right parser. +// so: reverse the grammar and the input string, then reverse the output +function parseRight (string, parser) { + return reverseAST(parser.getAST(reverseString(string))) +} +function parseWithRightCql (string) { + return parseRight(string, rightCqlParser) +} + +function parsedASTtoNestedArray (ast) { + if (!ast.type.includes('query')) { + return reverseString(ast.text) + } + + const childTypes = [ + 'atomic_query', 'sub_query', 'query', 'connective', + 'scope', 'relation', 'quoted_term' + ] + + const children = ast.children + .filter(child => childTypes.includes(child.type)) + .map(child => parsedASTtoNestedArray(child)) + + if (children.length === 1) { + return children[0] + } + + return children +} + +// we need to reverse the error message since `parseWithRightCql` doesn't +function displayParsed (string) { + const parsed = parseWithRightCql(string) + if (!parsed) return {} + if (parsed.errors.length) { + return { + error: parsed.errors.map(error => + `Parsing error likely near end of "${reverseString(error.token.rest)}"` + ).join('\n') + } + } + return { parsed: parsedASTtoNestedArray(parsed) } +} + +module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql, rightCqlParser, reverseString, displayParsed } diff --git a/lib/elasticsearch/cql_query_builder.js b/lib/elasticsearch/cql_query_builder.js new file mode 100644 index 00000000..0f10f9bf --- /dev/null +++ b/lib/elasticsearch/cql_query_builder.js @@ -0,0 +1,346 @@ +const { parseWithRightCql } = require('./cql_grammar') +const { indexMapping } = require('./cql/index-mapping') +const ElasticQueryBuilder = require('./elastic-query-builder') + +function buildEsQuery (cqlQuery, request = null) { + const filterQuery = buildFilterQuery(request) + return { + bool: { + should: [ + buildEsQueryFromTree( + parseWithRightCql(cqlQuery.trim()) + ) + ], + ...filterQuery + } + } +} + +function buildFilterQuery (request) { + if (!request) return {} + const queryJson = ElasticQueryBuilder.forApiRequest(request).query.toJson() + if (queryJson.bool && queryJson.bool.filter) { + return { filter: queryJson.bool.filter } + } + return {} +} + +/** + this is mostly there but needs to handle exact strings + */ + +function buildEsQueryFromTree (tree) { + switch (tree.type) { + case 'query': { + const queries = tree.children.filter(child => child.type.includes('query')) + const connectives = tree.children.filter(child => child.type === 'connective') + if (connectives.length) { + return buildBoolean(connectives[0].text, queries) + } + return buildEsQueryFromTree(queries[0]) + } + case 'sub_query': { + const query = tree.children.filter(child => child.type.includes('query'))[0] + return buildEsQueryFromTree(query) + } + case 'atomic_query': { + return buildAtomic(atomicQueryParams(tree)) + } + default: + break + } +} + +function buildBoolean (operator, queries) { + if (['NOT', 'AND NOT'].includes(operator)) return buildNegation(queries) + const esOperator = operator === 'AND' ? 'must' : 'should' + return { + bool: { + [esOperator]: queries.map(query => buildEsQueryFromTree(query)) + } + } +} + +function buildNegation (queries) { + return { + bool: { + must: [buildEsQueryFromTree(queries[0])], + must_not: [buildEsQueryFromTree(queries[1])] + } + } +} + +/** + A convienience method that collect the scope, relation, the full query (i.e term), and + all the separate words in the query (i.e. the terms) + */ +function atomicQueryParams (atomicQuery) { + return { + scope: atomicQuery.children.find(child => child.type === 'scope').text.trim(), + relation: atomicQuery.children.find(child => child.type === 'relation').text.trim(), + term: findTopPhrase(atomicQuery), + terms: findTopWords(atomicQuery) + } +} + +/** + Find the highest (i.e. most inclusive) phrase node and return its text + Ex: if the query was keyword="Hamlet Shakespeare", there will be phrase nodes + for Hamlet Shakespeare, Hamlet, and Shakespeare, and this will return Hamlet Shakespeare + */ +function findTopPhrase (tree) { + if (tree.type === 'phrase') return tree.text + const topPhrases = tree.children.map(child => findTopPhrase(child)).filter(x => x) + return topPhrases.length ? topPhrases[0] : null +} + +/** + Return a list of all the words that aren't fragments of larger words + E.g. Hamlet Shakespeare => [Hamlet, Shakespeare], and doesn't include the text + of word nodes for H, Ha, Ham, etc... + */ +function findTopWords (tree) { + if (tree.type === 'word') return [tree.text] + return tree.children.map(child => findTopWords(child)).flat() +} + +/** + For an object where the values are arrays, apply the given filter and map + to each of the arrays. + */ +function nestedFilterAndMap (obj, filter, map) { + return Object.assign({}, + ...(Object.entries(obj) + .map(([k, v]) => ({ [k]: v.filter(filter).map(map) })) + ) + ) +} + +/** + Return truthy value if and only if one of the values is a non-empty array + */ +function hasFields (obj) { + return Object.values(obj).some(arr => arr.length) +} + +/** + build atomic: + - identify the scope fields that match the term + - separate out into main, items, holdings + - boolean(main, items, holdings) + - items/holds = nested(items/holdings, main) + - main: + - if operator is any/all, take all query terms not starting with ^ and put in multi-match with all regular fields + - if operator is any/all, take all query terms starting with ^ and put in prefix with all regular fields + - if operator is =/adj, and query term doesn't start with ^, take all query terms and put in phrase match with all regular fields + - if operator is =/adj and query term starts with ^, strip it out and use phrase_prefix + - put all terms in prefix match with prefix fields + - put all terms in term matches with term fields + */ + +function buildAtomic ({ scope, relation, terms, term }) { + const allFields = nestedFilterAndMap( + indexMapping[scope], + field => typeof field === 'string' || field.on(term), + field => (typeof field === 'string' ? field : field.field) + ) + + const bibFields = nestedFilterAndMap( + allFields, + (field) => !['items', 'holdings'].some(prefix => field.startsWith(prefix)), + field => field + ) + + const itemFields = nestedFilterAndMap( + allFields, + (field) => field.startsWith('items'), + field => field + ) + + const holdingsFields = nestedFilterAndMap( + allFields, + (field) => field.startsWith('holdings'), + field => field + ) + + return { + bool: { + should: [ + buildAtomicMain({ fields: bibFields, relation, terms, term }), + (hasFields(itemFields) && buildAtomicNested('items', { fields: itemFields, relation, terms, term })), + (hasFields(holdingsFields) && buildAtomicNested('holdings', { fields: holdingsFields, relation, terms, term })) + ].filter(x => x) + } + } +} + +function buildAtomicNested (name, { fields, relation, terms, term }) { + return { + nested: { + path: name, + query: buildAtomicMain({ fields, relation, terms, term }) + } + } +} + +/** + - main: + - if operator is any/all, take all query terms not starting with ^ and put in multi-match with all regular fields + - if operator is any/all, take all query terms starting with ^ and put in prefix with all regular fields + - if operator is =/adj, and query term doesn't start with ^, take all query terms and put in phrase match with all regular fields + - if operator is =/adj and query term starts with ^, strip it out and use phrase_prefix + - put all terms in prefix match with prefix fields + - put all terms in term matches with term fields + */ +function buildAtomicMain ({ fields, relation, terms, term }) { + switch (relation) { + case 'any': + case 'all': + return anyAllQueries({ fields, relation, terms }) + case '=': + case '==': + case 'adj': + return adjEqQueries({ fields, relation, terms, term }) + case '>': + case '<': + case '<=': + case '>=': + case 'within': + case 'encloses': + return dateQueries({ fields, relation, terms, term }) + default: + break + } +} + +function anyAllQueries ({ fields, relation, terms }) { + if (!['any', 'all'].includes(relation)) { return null } + const operator = (relation === 'any' ? 'should' : 'must') + return { + bool: { + [operator]: terms.map(term => matchTermWithFields(fields, term, 'cross_fields')) + } + } +} + +function adjEqQueries ({ fields, relation, terms, term }) { + if (!['=', '==', 'adj'].includes(relation)) { return null } + const type = (relation === '==') ? 'exact' : 'phrase' + return matchTermWithFields(fields, term, type) +} + +// depending on the type of cql query supplied by the user, +// we may need to modify the es query from the type indicated by the index +// mapping. +// e.g. in case the user indicates a prefix query, all `term` queries should be +// mapped to `prefix` queries +// X represents field types that should be excluded e.g. for exact matching, +// exclude regular fields and use matching `exact_fields` instead +const esQueryMappingByCqlQueryType = { + exact: { term: 'term', prefix: 'prefix', fields: 'X', exact_fields: 'term' }, + prefix: { term: 'prefix', prefix: 'prefix', fields: 'X', exact_fields: 'prefix' }, + basic: { term: 'term', prefix: 'prefix', fields: 'multi_match', exact_fields: 'X' } +} + +// used to turn the above table inside out, e.g. +// in case of queryType = `prefix`, +// will gather together, for a given set of fields, all the query tyoes that +// need to be included under `selector` +// so e.g. `term`, 'prefix', and `exact_fields` fields all need to be included +// in the `prefix` matcher, since they are all mapped to `prefix` in this case +const selectFields = (queryType, fields) => (selector) => { + return Object.entries(fields) + .filter(([fieldType, fieldNames]) => { + return esQueryMappingByCqlQueryType[queryType][fieldType] === selector + }) + .map(([fieldType, fieldNames]) => fieldNames) + .flat() +} + +function matchTermWithFields (fields, term, type) { + const queryType = term[0] === '^' ? 'prefix' : (type === 'exact' ? 'exact' : 'basic') + if (term[0] === '^') term = term.slice(1) + + const selector = selectFields(queryType, fields) + + const queries = [ + ...multiMatch(selector('multi_match'), term, type), + ...(selector('term').map(termField => termQuery(termField, term))), + ...(selector('prefix').map(prefixField => prefixQuery(prefixField, term))) + ] + + return { + bool: { + should: queries + } + } +} + +function dateQueries ({ fields, relation, terms, term }) { + if (!Object.values(fields).some(fieldType => fieldType.some(field => field.includes('date')))) { return null } + let range + switch (relation) { + case '<': + range = { lt: terms[0] } + break + case '>': + range = { gt: terms[0] } + break + case '>=': + range = { gte: terms[0] } + break + case '<=': + range = { lte: terms[0] } + break + case 'encloses': + range = { gt: terms[0], lt: terms[1] } + break + case 'within': + range = { gte: terms[0], lte: terms[1] } + break + default: + break + } + + return { + nested: { + path: 'dates', + query: { + range: { + 'dates.range': range + } + } + } + } +} + +function termQuery (field, term) { + return { term: { [field]: term } } +} + +function prefixQuery (field, term) { + return { prefix: { [field]: term } } +} + +function multiMatch (fields, term, type) { + if (!fields || !fields.length) return [] + + return [{ + multi_match: { + query: term, + fields, + type + } + }] +} + +module.exports = { + buildEsQuery, + buildEsQueryFromTree, + buildBoolean, + buildAtomic, + buildAtomicMain, + nestedFilterAndMap, + selectFields, + indexMapping +} diff --git a/lib/elasticsearch/elastic-query-builder.js b/lib/elasticsearch/elastic-query-builder.js index 70efa58f..d5e0d1b0 100644 --- a/lib/elasticsearch/elastic-query-builder.js +++ b/lib/elasticsearch/elastic-query-builder.js @@ -36,6 +36,8 @@ class ElasticQueryBuilder { case 'callnumber': this.buildCallnumberQuery() break + case 'cql': + break case 'all': default: this.buildAllQuery() @@ -181,7 +183,7 @@ class ElasticQueryBuilder { * Concat contributor + role if role param is provided */ applyContributorRole () { - this.query.addMust(termMatch('contributorRoleLiteral', this.request.params.filters.contributorLiteral + '||' + this.request.params.role)) + this.query.addMust(termMatch('browseableContributorRole_packed', this.request.params.filters.contributorLiteral + '||' + this.request.params.role)) } /** diff --git a/lib/jsonld_serializers.js b/lib/jsonld_serializers.js index 31081f7e..b3b1ca36 100644 --- a/lib/jsonld_serializers.js +++ b/lib/jsonld_serializers.js @@ -301,12 +301,14 @@ ResourceSerializer.getFormattedFormat = function (formatId) { ResourceSerializer.formatCollection = function (collectionId) { const prefLabel = nyplCore.collections()[`nyplCollection:${collectionId}`]?.label + const locationsPath = nyplCore.collections()[`nyplCollection:${collectionId}`]?.locationsPath const buildingLocationLabel = buildingLocations.find((loc) => loc.value === collectionId.slice(0, 2))?.label if (!prefLabel) return null return { '@id': collectionId, prefLabel, - buildingLocationLabel + buildingLocationLabel, + locationsPath } } @@ -374,10 +376,15 @@ class ItemResourceSerializer extends JsonLdItemSerializer { }) } + if (this.body.holdingLocation) { + stmts.holdingLocation = ItemResourceSerializer.getFormattedHoldingLocation(this.body.holdingLocation) + } + // Override default serialization of item.electronicLocator statements (full digital surrogates): if (this.body.electronicLocator) { stmts.electronicLocator = this.body.electronicLocator.map((link) => ResourceSerializer.formatElectronicResourceBlankNode(link, 'nypl:ElectronicLocation')) } + return stmts } @@ -386,6 +393,19 @@ class ItemResourceSerializer extends JsonLdItemSerializer { return (new ItemResourceSerializer(resp, options)).format() } + static getFormattedHoldingLocation (location) { + const loc = Array.isArray(location) ? location[0] : null + if (!loc) return [] + const locationId = loc['@id']?.split(':')[1] + const sierraLocations = nyplCore.sierraLocations() + const collectionAccessType = sierraLocations?.[locationId]?.collectionAccessType + return [{ + '@id': loc['@id'], + prefLabel: loc?.prefLabel, + collectionAccessType + }] + } + // Given an item, returns item with an added `identifier` // of form 'urn:[sourceIdentifierPrefix]:[sourceIdentifier]' // e.g. diff --git a/lib/resources.js b/lib/resources.js index 31177f59..5c6dd4e9 100644 --- a/lib/resources.js +++ b/lib/resources.js @@ -19,6 +19,8 @@ const { parseParams, deepValue } = require('../lib/util') const ApiRequest = require('./api-request') const ElasticQueryBuilder = require('./elasticsearch/elastic-query-builder') +const cqlQueryBuilder = require('./elasticsearch/cql_query_builder') +const { displayParsed } = require('./elasticsearch/cql_grammar') const { FILTER_CONFIG, SEARCH_SCOPES, AGGREGATIONS_SPEC } = require('./elasticsearch/config') const errors = require('./errors') @@ -46,6 +48,10 @@ const SORT_FIELDS = { initialDirection: 'asc', field: 'creator_sort' }, + callnumber: { + initialDirection: 'asc', + field: 'shelfMark.keywordLowercased' + }, relevance: {} } @@ -644,6 +650,22 @@ module.exports = function (app, _private = null) { app.logger.debug('Parsed params: ', params) + let parsed = {} + + if (params.search_scope === 'cql') { + try { + parsed = displayParsed(params.q) // ? + } catch (e) { + throw new IndexSearchError('Unknown parsing error. Error most likely near end of string') + } + if (parsed.error) { + throw new IndexSearchError(parsed.error) + } + if (!parsed.parsed) { + throw new IndexSearchError('Unknown parsing error. Error most likely near end of string') + } + } + let body = buildElasticBody(params) // Strip unnecessary _source fields @@ -651,7 +673,9 @@ module.exports = function (app, _private = null) { excludes: EXCLUDE_FIELDS.concat(['items']) } - body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) + if (params.search_scope !== 'cql') { + body = addInnerHits(body, { merge_checkin_card_items: params.merge_checkin_card_items }) + } app.logger.debug('Resources#search', RESOURCES_INDEX, body) @@ -680,7 +704,8 @@ module.exports = function (app, _private = null) { resp.debug = { relevanceReport, - query: body + query: body, + ...parsed } return resp }) @@ -887,6 +912,10 @@ const buildElasticBody = function (params) { */ const buildElasticQuery = function (params) { const request = ApiRequest.fromParams(params) + if (params.search_scope === 'cql') { + const query = cqlQueryBuilder.buildEsQuery(params.q, request) + return query + } const builder = ElasticQueryBuilder.forApiRequest(request) return builder.query.toJson() diff --git a/lib/response_massager.js b/lib/response_massager.js index a1eb17ec..034580a5 100644 --- a/lib/response_massager.js +++ b/lib/response_massager.js @@ -1,6 +1,7 @@ const LocationLabelUpdater = require('./location_label_updater') const AvailabilityResolver = require('./availability_resolver.js') const parallelFieldsExtractor = require('./parallel-fields-extractor') +const displayFieldUnpacker = require('./display-field-unpacker') const { isAeonUrl, sortOnPropWithUndefinedLast } = require('../lib/util') const FulfillmentResolver = require('./fulfillment_resolver') const fixItemRequestability = require('./requestability_resolver') @@ -73,6 +74,9 @@ class ResponseMassager { // Rename parallel fields: response = parallelFieldsExtractor(response) + // Extract display values and labels from packed fields + response = displayFieldUnpacker(response) + // Update ES response with updated availability from SCSB: const updatedWithAvailability = (new AvailabilityResolver(response)) .responseWithUpdatedAvailability(options) diff --git a/lib/vocabularies.js b/lib/vocabularies.js index b7627e48..451f1e81 100644 --- a/lib/vocabularies.js +++ b/lib/vocabularies.js @@ -17,7 +17,8 @@ module.exports = function (app, _private = null) { (val) => ({ value: val.code, label: val.label, - holdingLocations: val.holdingLocations + holdingLocations: val.holdingLocations, + locationsPath: val.locationsPath }) ) return ({ formats, collections, languages: languages.values, buildingLocations }) diff --git a/package-lock.json b/package-lock.json index e142a8e2..0ba9982a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,10 +11,11 @@ "dependencies": { "@aws-sdk/client-kms": "^3.948.0", "@elastic/elasticsearch": "~8.12.0", - "@nypl/nypl-core-objects": "3.0.4", + "@nypl/nypl-core-objects": "3.0.5", "@nypl/nypl-data-api-client": "^2.0.0", "@nypl/scsb-rest-client": "3.0.0", "dotenv": "^16.4.5", + "ebnf": "^1.9.1", "express": "^4.18.3", "research-catalog-indexer": "git+https://github.com/NYPL/research-catalog-indexer.git#a292adff0d3ed594a0c2996561d9e17b0f9dc04a", "winston": "3.12.0" @@ -3588,9 +3589,9 @@ } }, "node_modules/@nypl/nypl-core-objects": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/@nypl/nypl-core-objects/-/nypl-core-objects-3.0.4.tgz", - "integrity": "sha512-xFo14urEMO2VUL2/YcjG3io1deU/UCKtYlI2ADCOTEwOcCpwzzvkvOvTI2E2CvgstkHplVejBjdvd4l6ak0inQ==", + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/@nypl/nypl-core-objects/-/nypl-core-objects-3.0.5.tgz", + "integrity": "sha512-XXQ15ekqBgn/XX9eNmU40EE+OpIYSyTkq/TdAEhmTRiHWC+tohT8cfDNABz8lH+xnuqiwam6Fmvla3g5KI2DHQ==", "dependencies": { "axios": "^1.6.8", "csv": "^5.3.2", @@ -6194,6 +6195,15 @@ "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", "devOptional": true }, + "node_modules/ebnf": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ebnf/-/ebnf-1.9.1.tgz", + "integrity": "sha512-uW2UKSsuty9ANJ3YByIQE4ANkD8nqUPO7r6Fwcc1ADKPe9FRdcPpMl3VEput4JSvKBJ4J86npIC2MLP0pYkCuw==", + "license": "MIT", + "bin": { + "ebnf": "dist/bin.js" + } + }, "node_modules/ecc-jsbn": { "version": "0.1.2", "dev": true, @@ -7245,9 +7255,9 @@ "license": "MIT" }, "node_modules/follow-redirects": { - "version": "1.15.6", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", - "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==", + "version": "1.15.11", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", + "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", "funding": [ { "type": "individual", @@ -10379,6 +10389,27 @@ "node": ">=10" } }, + "node_modules/research-catalog-indexer/node_modules/@nypl/nypl-core-objects": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@nypl/nypl-core-objects/-/nypl-core-objects-3.0.4.tgz", + "integrity": "sha512-xFo14urEMO2VUL2/YcjG3io1deU/UCKtYlI2ADCOTEwOcCpwzzvkvOvTI2E2CvgstkHplVejBjdvd4l6ak0inQ==", + "dependencies": { + "axios": "^1.6.8", + "csv": "^5.3.2", + "csv-stringify": "^5.6.0", + "just-flatten": "^1.0.0" + } + }, + "node_modules/research-catalog-indexer/node_modules/@nypl/nypl-core-objects/node_modules/axios": { + "version": "1.13.6", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.6.tgz", + "integrity": "sha512-ChTCHMouEe2kn713WHbQGcuYrr6fXTBiu460OTwWrWob16g1bXn4vtz07Ope7ewMozJAnEquLk5lWQWtBig9DQ==", + "dependencies": { + "follow-redirects": "^1.15.11", + "form-data": "^4.0.5", + "proxy-from-env": "^1.1.0" + } + }, "node_modules/research-catalog-indexer/node_modules/@nypl/scsb-rest-client": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/@nypl/scsb-rest-client/-/scsb-rest-client-2.0.0.tgz", @@ -10397,6 +10428,11 @@ "form-data": "^4.0.0" } }, + "node_modules/research-catalog-indexer/node_modules/csv-stringify": { + "version": "5.6.5", + "resolved": "https://registry.npmjs.org/csv-stringify/-/csv-stringify-5.6.5.tgz", + "integrity": "sha512-PjiQ659aQ+fUTQqSrd1XEDnOr52jh30RBurfzkscaE2tPaFsDH5wOAHJiw8XAHphRknCwMUE9KRayc4K/NbO8A==" + }, "node_modules/research-catalog-indexer/node_modules/debug": { "version": "4.3.7", "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", diff --git a/package.json b/package.json index 3383b465..48803e25 100644 --- a/package.json +++ b/package.json @@ -4,10 +4,11 @@ "dependencies": { "@aws-sdk/client-kms": "^3.948.0", "@elastic/elasticsearch": "~8.12.0", - "@nypl/nypl-core-objects": "3.0.4", + "@nypl/nypl-core-objects": "3.0.5", "@nypl/nypl-data-api-client": "^2.0.0", "@nypl/scsb-rest-client": "3.0.0", "dotenv": "^16.4.5", + "ebnf": "^1.9.1", "express": "^4.18.3", "research-catalog-indexer": "git+https://github.com/NYPL/research-catalog-indexer.git#a292adff0d3ed594a0c2996561d9e17b0f9dc04a", "winston": "3.12.0" diff --git a/test/cql_grammar.test.js b/test/cql_grammar.test.js new file mode 100644 index 00000000..24d5d5d6 --- /dev/null +++ b/test/cql_grammar.test.js @@ -0,0 +1,110 @@ +const { expect } = require('chai') + +const { simplify, parseWithRightCql } = require('../lib/elasticsearch/cql_grammar') + +function validateAtomicQuery (parsed, scope, relation, quotedTerm) { + expect(parsed.type).to.equal('query') + expect(parsed.children.length).to.equal(1) + const subQuery = parsed.children[0] + expect(subQuery.type).to.equal('sub_query') + expect(subQuery.children.length).to.equal(1) + const atomicQuery = subQuery.children[0] + expect(atomicQuery.type).to.equal('atomic_query') + const scopeNode = atomicQuery.children.find(child => child.type === 'scope') + const scopeTerm = scopeNode.children.find(child => child.type === 'scope_term') + expect(scopeTerm.text).to.equal(scope) + const relationNode = atomicQuery.children.find(child => child.type === 'relation') + const relationTerm = relationNode.children.find(child => child.type === 'relation_term') + expect(relationTerm.text).to.equal(relation) + const quotedTermNode = atomicQuery.children.find(child => child.type === 'quoted_term') + expect(quotedTermNode.text).to.equal(quotedTerm) +} + +describe('CQL Grammar', function () { + describe('parsing queries', function () { + it('parses atomic queries', function () { + validateAtomicQuery(parseWithRightCql('title="hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + validateAtomicQuery(parseWithRightCql('keyword any "hamlet shakespeare"'), 'keyword', 'any', '"hamlet shakespeare"') + validateAtomicQuery(parseWithRightCql('subject all "hamlet shakespeare"'), 'subject', 'all', '"hamlet shakespeare"') + }) + + it('allows whitespace variants', function () { + validateAtomicQuery(parseWithRightCql('title ="hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('title= "hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('title = "hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('title = "hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('title = "hamlet"'), 'title', '=', '"hamlet"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + validateAtomicQuery(parseWithRightCql('author adj "shakespeare"'), 'author', 'adj', '"shakespeare"') + }) + + it('correctly escapes escape characters', function () { + validateAtomicQuery(parseWithRightCql('keyword="Notes on \\"The Underground\\""'), 'keyword', '=', '"Notes on \\"The Underground\\""') + validateAtomicQuery(parseWithRightCql('title="This title ends in a slash \\\\"'), 'title', '=', '"This title ends in a slash \\\\"') + }) + + it('identifies words correctly', function () { + const parsed = parseWithRightCql('keyword adj "A multiword keyword"') + const words = [] + let nodes = [parsed] + while (nodes.length) { + const node = nodes.shift() + if (node.type === 'word') { + words.push(node.text) + } else { + nodes = nodes.concat(node.children) + } + } + const expectedWords = ['A', 'multiword', 'keyword'] + words.forEach(word => { + expect(expectedWords).to.include(word) + }) + expect(words.length).to.equal(3) + }) + + it('parses boolean queries', function () { + expect(simplify(parseWithRightCql( + 'title="dogs" AND keyword="cats"' + ))).to.deep.equal( + [['title', '=', ['dogs']], 'AND', ['keyword', '=', ['cats']]] + ) + + expect(simplify(parseWithRightCql( + 'title="dogs" AND keyword="cats" OR author adj "Bird"' + ))).to.deep.equal( + [ + [ + [ + 'title', '=', ['dogs'] + ], + 'AND', + [ + 'keyword', '=', ['cats'] + ] + ], + 'OR', + [ + 'author', 'adj', ['Bird'] + ] + ] + ) + }) + + it('parses queries with parentheses', function () { + expect(simplify(parseWithRightCql( + 'title="dogs" AND (keyword="cats" OR author adj "Bird")' + ))) + .to.deep.equal( + [ + ['title', '=', ['dogs']], 'AND', [['keyword', '=', ['cats']], + 'OR', + ['author', 'adj', ['Bird']] + ] + ] + ) + }) + }) +}) diff --git a/test/cql_query_builder.test.js b/test/cql_query_builder.test.js new file mode 100644 index 00000000..cf856b5c --- /dev/null +++ b/test/cql_query_builder.test.js @@ -0,0 +1,199 @@ +const { expect } = require('chai') + +const { buildEsQuery } = require('../lib/elasticsearch/cql_query_builder') +const ApiRequest = require('../lib/api-request') +const { + simpleAdjQuery, + simpleAnyQuery, + simpleAllQuery, + prefixPhraseQuery, + anyWithPrefixQuery, + keywordQueryForBarcode, + keywordQueryForShelfMark, + keywordQueryForGeneralTerm, + identifierQuery, + binaryBooleanQuery, + ternaryBooleanQuery, + queryWithParentheses, + negationQuery, + dateBeforeQuery, + dateBeforeOrOnQuery, + dateAfterQuery, + dateAfterOrOnQuery, + dateWithinQuery, + dateEnclosesQuery, + filterQuery, + multiAdjQuery, + exactMatchQuery +} = require('./fixtures/cql_fixtures') + +describe('CQL Query Builder', function () { + it('Simple = query', function () { + expect(buildEsQuery('title="Hamlet"')) + .to.deep.equal( + simpleAdjQuery + ) + }) + + it('Simple adj query', function () { + expect(buildEsQuery('title adj "Hamlet"')) + .to.deep.equal( + simpleAdjQuery + ) + }) + + it('Multi-word adj query', function () { + expect(buildEsQuery('title adj "Hamlet, Prince"')) + .to.deep.equal( + multiAdjQuery + ) + }) + + it('Simple any query', function () { + expect(buildEsQuery('title any "Hamlet Othello"')) + .to.deep.equal( + simpleAnyQuery + ) + }) + + it('Simple all query', function () { + expect(buildEsQuery('title all "Hamlet Othello"')) + .to.deep.equal( + simpleAllQuery + ) + }) + + it('Prefix phrase query', function () { + expect(buildEsQuery('title = "^The Tragedy of Hamlet, Prince of Denmark"')) + .to.deep.equal( + prefixPhraseQuery + ) + }) + + it('Prefix queries mixed into any query', function () { + expect(buildEsQuery('title any "^Tragedy ^Comedy Hamlet Othello"')) + .to.deep.equal( + anyWithPrefixQuery + ) + }) + + it('Keyword query for barcode', function () { + expect(buildEsQuery('keyword = "123456"')) + .to.deep.equal( + keywordQueryForBarcode + ) + }) + + it('Keyword query for shelfMark', function () { + expect(buildEsQuery('keyword = "B 12"')) + .to.deep.equal( + keywordQueryForShelfMark + ) + }) + + it('Keyword query for general term', function () { + expect(buildEsQuery('keyword = "Hamlet"')) + .to.deep.equal( + keywordQueryForGeneralTerm + ) + }) + + it('Identifier query', function () { + expect(buildEsQuery('identifier = "b1234"')) + .to.deep.equal( + identifierQuery + ) + }) + + it('Binary boolean query', function () { + expect(buildEsQuery('author = "Shakespeare" AND language = "English"')) + .to.deep.equal( + binaryBooleanQuery + ) + }) + + it('Ternary boolean query', function () { + expect(buildEsQuery('author = "Shakespeare" AND language = "English" OR genre = "tragedy"')) + .to.deep.equal( + ternaryBooleanQuery + ) + }) + + it('Boolean query with parentheses', function () { + expect(buildEsQuery('author = "Shakespeare" AND (language = "English" OR genre = "tragedy")')) + .to.deep.equal( + queryWithParentheses + ) + }) + + it('Query with NOT', function () { + expect(buildEsQuery('author = "Shakespeare" NOT language = "English"')) + .to.deep.equal( + negationQuery + ) + }) + + it('Query with AND NOT', function () { + expect(buildEsQuery('author = "Shakespeare" AND NOT language = "English"')) + .to.deep.equal( + negationQuery + ) + }) + + it('Date after query', function () { + expect(buildEsQuery('date > "1990"')) + .to.deep.equal( + dateAfterQuery + ) + }) + + it('Date after or on query', function () { + expect(buildEsQuery('date >= "1990"')) + .to.deep.equal( + dateAfterOrOnQuery + ) + }) + + it('Date before query', function () { + expect(buildEsQuery('date < "1990"')) + .to.deep.equal( + dateBeforeQuery + ) + }) + + it('Date dateBeforeOrOnQuery query', function () { + expect(buildEsQuery('date <= "1990"')) + .to.deep.equal( + dateBeforeOrOnQuery + ) + }) + + it('Date within query', function () { + expect(buildEsQuery('date within "1990 2000"')) + .to.deep.equal( + dateWithinQuery + ) + }) + + it('Date encloses query', function () { + expect(buildEsQuery('date encloses "1990 2000"')) + .to.deep.equal( + dateEnclosesQuery + ) + }) + + it('Query with applied filters', function () { + const apiRequest = new ApiRequest({ filters: { language: ['Klingon'] }, search_scope: 'cql' }) + expect(buildEsQuery('author="Shakespeare"', apiRequest)) + .to.deep.equal( + filterQuery + ) + }) + + it('Exact match query', function () { + expect(buildEsQuery('author == "William Shakespeare"')) + .to.deep.equal( + exactMatchQuery + ) + }) +}) diff --git a/test/display-field-unpacker.test.js b/test/display-field-unpacker.test.js new file mode 100644 index 00000000..3f768393 --- /dev/null +++ b/test/display-field-unpacker.test.js @@ -0,0 +1,22 @@ +const { expect } = require('chai') +const displayFieldsUnpacker = require('../lib/display-field-unpacker') +const packedDisplayBib = require('./fixtures/packed-display-response.json') + +describe('Display field unpacker', () => { + describe('When a bib has a packed display property', () => { + it('adds each of the items in that array as unpacked objects', () => { + const displayFieldsUnpacked = displayFieldsUnpacker(packedDisplayBib).hits.hits[0]._source + expect(Object.keys(displayFieldsUnpacked).length).to.equal(2) + expect(displayFieldsUnpacked).to.deep.equal({ + testDisplay: [ + { value: 'someValue', display: 'someDisplay' }, + { value: 'someValueB', display: 'someDisplayB' }, + { value: 'someValueC', display: null } + ], + testOtherDisplay: [ + { value: 'otherValue', display: 'otherDisplay' } + ] + }) + }) + }) +}) diff --git a/test/elastic-query-builder.test.js b/test/elastic-query-builder.test.js index 48c08440..062a9ec0 100644 --- a/test/elastic-query-builder.test.js +++ b/test/elastic-query-builder.test.js @@ -272,7 +272,7 @@ describe('ElasticQueryBuilder', () => { const inst = ElasticQueryBuilder.forApiRequest(request) expect(inst.query.toJson()).to.nested - .include({ 'bool.must[0].term.contributorRoleLiteral.value': 'Patinkin, Mandy||performer.' }) + .include({ 'bool.must[0].term.browseableContributorRole_packed.value': 'Patinkin, Mandy||performer.' }) }) }) diff --git a/test/fixtures/cql_fixtures.js b/test/fixtures/cql_fixtures.js new file mode 100644 index 00000000..552333c0 --- /dev/null +++ b/test/fixtures/cql_fixtures.js @@ -0,0 +1,1180 @@ +const simpleAdjQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'phrase' + } + } + ] + } + } + ] + } + } + ] + } +} + +const multiAdjQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet, Prince', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'phrase' + } + } + ] + } + } + ] + } + } + ] + } +} + +const prefixPhraseQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + prefix: { + 'title.keywordLowercasedStripped': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'seriesStatement.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'titleAlt.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'donor.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'parallelTitleAlt.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'parallelSeriesStatement.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'parallelCreatorLiteral.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'uniformTitle.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'parallelUniformTitle.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + 'addedAuthorTitle.raw': 'The Tragedy of Hamlet, Prince of Denmark' + } + }, + { + prefix: { + placeOfPublication: 'The Tragedy of Hamlet, Prince of Denmark' + } + } + ] + } + } + ] + } + } + ] + } +} + +const simpleAnyQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + }, + { + bool: { + should: [ + { + multi_match: { + query: 'Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + } + ] + } + } + ] + } + } + ] + } +} + +const anyWithPrefixQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + prefix: { + 'title.keywordLowercasedStripped': 'Tragedy' + } + }, + { + prefix: { 'seriesStatement.raw': 'Tragedy' } + }, + { prefix: { 'titleAlt.raw': 'Tragedy' } }, + { prefix: { 'donor.raw': 'Tragedy' } }, + { + prefix: { 'parallelTitleAlt.raw': 'Tragedy' } + }, + { + prefix: { 'parallelSeriesStatement.raw': 'Tragedy' } + }, + { + prefix: { 'parallelCreatorLiteral.raw': 'Tragedy' } + }, + { prefix: { 'uniformTitle.raw': 'Tragedy' } }, + { + prefix: { 'parallelUniformTitle.raw': 'Tragedy' } + }, + { + prefix: { 'addedAuthorTitle.raw': 'Tragedy' } + }, + { prefix: { placeOfPublication: 'Tragedy' } } + ] + } + }, + { + bool: { + should: [ + { + prefix: { + 'title.keywordLowercasedStripped': 'Comedy' + } + }, + { prefix: { 'seriesStatement.raw': 'Comedy' } }, + { prefix: { 'titleAlt.raw': 'Comedy' } }, + { prefix: { 'donor.raw': 'Comedy' } }, + { + prefix: { 'parallelTitleAlt.raw': 'Comedy' } + }, + { + prefix: { 'parallelSeriesStatement.raw': 'Comedy' } + }, + { + prefix: { 'parallelCreatorLiteral.raw': 'Comedy' } + }, + { prefix: { 'uniformTitle.raw': 'Comedy' } }, + { + prefix: { 'parallelUniformTitle.raw': 'Comedy' } + }, + { + prefix: { 'addedAuthorTitle.raw': 'Comedy' } + }, + { prefix: { placeOfPublication: 'Comedy' } } + ] + } + }, + { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + }, + { + bool: { + should: [ + { + multi_match: { + query: 'Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + } + ] + } + } + ] + } + } + ] + } +} + +const simpleAllQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + }, + { + bool: { + should: [ + { + multi_match: { + query: 'Othello', + fields: [ + 'title', + 'title.folded', + 'titleAlt.folded', + 'uniformTitle.folded', + 'titleDisplay.folded', + 'seriesStatement.folded', + 'contentsTitle.folded', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelSeriesStatement.folded', + 'parallelTitleAlt.folded', + 'parallelCreatorLiteral.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle' + ], + type: 'cross_fields' + } + } + ] + } + } + ] + } + } + ] + } + } + ] + } +} + +const keywordQueryForBarcode = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: '123456', + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' + ], + type: 'phrase' + } + } + ] + } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [{ term: { 'items.idBarcode': '123456' } }] + } + } + } + } + ] + } + } + ] + } +} + +const keywordQueryForShelfMark = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'B 12', + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' + ], + type: 'phrase' + } + } + ] + } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { + multi_match: { + query: 'B 12', + fields: ['items.shelfMark'], + type: 'phrase' + } + } + ] + } + } + } + } + ] + } + } + ] + } +} + +const keywordQueryForGeneralTerm = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Hamlet', + fields: [ + 'title', + 'title.folded', + 'description.foldedStemmed', + 'subjectLiteral', + 'subjectLiteral.folded', + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'note.label.foldedStemmed', + 'publisherLiteral.folded', + 'seriesStatement.folded', + 'titleAlt.folded', + 'titleDisplay.folded', + 'contentsTitle.folded', + 'tableOfContents.folded', + 'genreForm', + 'donor.folded', + 'parallelTitle.folded', + 'parallelTitleDisplay.folded', + 'parallelTitleAlt.folded', + 'parallelSeriesStatement.folded', + 'parallelCreatorLiteral.folded', + 'parallelPublisher', + 'parallelPublisherLiteral', + 'uniformTitle.folded', + 'parallelUniformTitle', + 'formerTitle', + 'addedAuthorTitle', + 'placeOfPublication.folded' + ], + type: 'phrase' + } + } + ] + } + } + ] + } + } + ] + } +} + +const identifierQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { term: { uri: 'b1234' } }, + { term: { 'idIsbn.clean': 'b1234' } }, + { term: { 'idIssn.clean': 'b1234' } }, + { prefix: { 'identifierV2.value': 'b1234' } } + ] + } + }, + { + nested: { + path: 'items', + query: { + bool: { + should: [ + { term: { 'items.idBarcode': 'b1234' } }, + { + prefix: { + 'items.shelfMark.keywordLowercased': 'b1234' + } + } + ] + } + } + } + } + ] + } + } + ] + } +} + +const binaryBooleanQuery = { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], + type: 'phrase' + } + } + ] + } + } + ] + } + }, + { + bool: { + should: [ + { + bool: { + should: [ + { term: { 'language.id': 'English' } }, + { term: { 'language.label': 'English' } } + ] + } + } + ] + } + } + ] + } + } + ] + } +} + +const ternaryBooleanQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], + type: 'phrase' + } + } + ] + } + } + ] + } + }, + { + bool: { + should: [ + { + bool: { + should: [ + { term: { 'language.id': 'English' } }, + { term: { 'language.label': 'English' } } + ] + } + } + ] + } + } + ] + } + }, + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'tragedy', + fields: ['genreForm'], + type: 'phrase' + } + } + ] + } + } + ] + } + } + ] + } + } + ] + } +} + +const queryWithParentheses = { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], + type: 'phrase' + } + } + ] + } + } + ] + } + }, + { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { term: { 'language.id': 'English' } }, + { term: { 'language.label': 'English' } } + ] + } + } + ] + } + }, + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'tragedy', + fields: ['genreForm'], + type: 'phrase' + } + } + ] + } + } + ] + } + } + ] + } + } + ] + } + } + ] + } +} + +const negationQuery = { + bool: { + should: [ + { + bool: { + must: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], + type: 'phrase' + } + } + ] + } + } + ] + } + } + ], + must_not: [ + { + bool: { + should: [ + { + bool: { + should: [ + { term: { 'language.id': 'English' } }, + { term: { 'language.label': 'English' } } + ] + } + } + ] + } + } + ] + } + } + ] + } +} + +const dateAfterQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { gt: '1990' } } } + } + } + ] + } + } + ] + } +} + +const dateBeforeQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { lt: '1990' } } } + } + } + ] + } + } + ] + } +} + +const dateBeforeOrOnQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { lte: '1990' } } } + } + } + ] + } + } + ] + } +} + +const dateAfterOrOnQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { range: { 'dates.range': { gte: '1990' } } } + } + } + ] + } + } + ] + } +} + +const dateWithinQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { + range: { 'dates.range': { gte: '1990', lte: '2000' } } + } + } + } + ] + } + } + ] + } +} + +const dateEnclosesQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + nested: { + path: 'dates', + query: { + range: { 'dates.range': { gt: '1990', lt: '2000' } } + } + } + } + ] + } + } + ] + } +} + +const filterQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + multi_match: { + query: 'Shakespeare', + fields: [ + 'creatorLiteral', + 'creatorLiteral.folded', + 'contributorLiteral.folded', + 'parallelCreatorLiteral.folded', + 'parallelContributorLiteral.folded' + ], + type: 'phrase' + } + } + ] + } + } + ] + } + } + ], + filter: [ + { + bool: { + should: [ + { term: { 'language.id': 'Klingon' } }, + { term: { 'language.label': 'Klingon' } } + ] + } + } + ] + } +} + +const exactMatchQuery = { + bool: { + should: [ + { + bool: { + should: [ + { + bool: { + should: [ + { + term: { + 'creatorLiteral.keywordLowercased': 'William Shakespeare' + } + }, + { + term: { + 'contributorLiteral.keywordLowercased': 'William Shakespeare' + } + }, + { + term: { + 'parallelCreatorLiteral.raw': 'William Shakespeare' + } + }, + { + term: { + 'parallelContributorLiteral.raw': 'William Shakespeare' + } + } + ] + } + } + ] + } + } + ] + } +} + +module.exports = { + simpleAdjQuery, + simpleAnyQuery, + simpleAllQuery, + prefixPhraseQuery, + anyWithPrefixQuery, + keywordQueryForBarcode, + keywordQueryForShelfMark, + keywordQueryForGeneralTerm, + identifierQuery, + binaryBooleanQuery, + ternaryBooleanQuery, + queryWithParentheses, + negationQuery, + dateBeforeQuery, + dateBeforeOrOnQuery, + dateAfterQuery, + dateAfterOrOnQuery, + dateWithinQuery, + dateEnclosesQuery, + filterQuery, + multiAdjQuery, + exactMatchQuery +} diff --git a/test/fixtures/packed-display-response.json b/test/fixtures/packed-display-response.json new file mode 100644 index 00000000..3a304c50 --- /dev/null +++ b/test/fixtures/packed-display-response.json @@ -0,0 +1,18 @@ +{ + "hits": { + "hits": [ + { + "_source": { + "test_displayPacked": [ + "someValue||someDisplay", + "someValueB||someDisplayB", + "someValueC" + ], + "testOther_displayPacked": [ + "otherValue||otherDisplay" + ] + } + } + ] + } +} diff --git a/test/integration/delivery-locations-by-barcode.test.js b/test/integration/delivery-locations-by-barcode.test.js index 2302b12b..23245370 100644 --- a/test/integration/delivery-locations-by-barcode.test.js +++ b/test/integration/delivery-locations-by-barcode.test.js @@ -1,17 +1,25 @@ -require('dotenv').config('config/qa.env') -const axios = require('axios') +const { loadConfig } = require('../../lib/load-config') const { expectations, ptypes } = require('./delivery-locations-constants') +const { makeNyplDataApiClient } = require('../../lib/data-api-client') const checkLocationsForPtype = async (ptype) => { const problems = [] const match = [] - await Promise.all(Object.values(expectations).map(async (expectation) => { - const deliveryLocationsFromApi = await getDeliveryLocations(expectation.barcode, ptypes[ptype]) + + await Promise.all(Object.entries(expectations).map(async ([holdingLocation, expectation], i) => { + let deliveryLocationsFromApi let totalMatch = true const registerProblem = (problem) => { - problems.push({ barcode: expectation.barcode, deliveryLocationsFromApi, ...problem }) + problems.push({ holdingLocation, barcode: expectation.barcode, deliveryLocationsFromApi, ...problem }) totalMatch = false } + try { + deliveryLocationsFromApi = await getDeliveryLocations(expectation.barcode, ptypes[ptype]) + } catch (e) { + registerProblem({ lookUpFailed: true }) + return + } + const checkForValue = (expectedValue, action) => { const includedValueIncluded = deliveryLocationsFromApi.some((label) => label.includes(expectedValue)) const match = action === 'include' ? includedValueIncluded : !includedValueIncluded @@ -27,20 +35,30 @@ const checkLocationsForPtype = async (ptype) => { } const getDeliveryLocations = async (barcode, patronId) => { - const { data: { itemListElement: deliveryLocationsPerRecord } } = await axios.get(`http://localhost:8082/api/v0.1/request/deliveryLocationsByBarcode?barcodes[]=${barcode}&patronId=${patronId}`) - // per record - return deliveryLocationsPerRecord[0] - .deliveryLocation.map(loc => loc.prefLabel.toLowerCase()) + try { + const { itemListElement: itemData } = await makeNyplDataApiClient().get(`request/deliveryLocationsByBarcode?barcodes[]=${barcode}&patronId=${patronId}`) + // per record + return itemData[0] + .deliveryLocation.map(loc => loc.prefLabel.toLowerCase()) + } catch (e) { + console.error(e) + } } const theThing = async () => { + await loadConfig() const results = await Promise.all(Object.keys(ptypes).map((checkLocationsForPtype))) - Object.keys(ptypes).forEach((ptype, i) => { + const resultsHaveProblems = Object.keys(ptypes).some((ptype, i) => { const resultsForPtype = results[i] if (resultsForPtype.problems.length) { console.error(`Error with ${ptype} ptype delivery results, `, resultsForPtype.problems) - } else console.log(`All delivery location checks for ${ptype} patron type successful`) + return true + } else { + console.log(`All delivery location checks for ${ptype} patron type successful`) + return false + } }) + if (resultsHaveProblems) throw new Error('Delivery location checks failed.') } theThing() diff --git a/test/integration/delivery-locations-constants.js b/test/integration/delivery-locations-constants.js index b46d812c..6b70cfd7 100644 --- a/test/integration/delivery-locations-constants.js +++ b/test/integration/delivery-locations-constants.js @@ -38,11 +38,11 @@ const expectations = { scholar: { includes: [schomburg], excludes: [scholar, sasb, lpa] }, general: { includes: [schomburg], excludes: [scholar, sasb, lpa] } }, - // nyplM1: { - // barcode: null, - // scholar: { includes: [sasb], excludes: [scholar, lpa, schomburg] }, - // general: { includes: [sasb], excludes: [scholar, lpa, schomburg] } - // }, + nyplM1: { + barcode: '33433084847221', + scholar: { includes: [sasb, scholar], excludes: [lpa, schomburg] }, + general: { includes: [sasb], excludes: [scholar, lpa, schomburg] } + }, nyplM2: { barcode: '33333069027734', scholar: { includes: [sasb, scholar], excludes: [lpa, schomburg] }, diff --git a/test/item-resource-serializer.test.js b/test/item-resource-serializer.test.js index 57429231..3387c8c8 100644 --- a/test/item-resource-serializer.test.js +++ b/test/item-resource-serializer.test.js @@ -60,7 +60,19 @@ describe('ItemResourceSerializer', () => { expect(doc.idNyplSourceId['@value']).to.eq('9876543210') }) }) - + describe('getFormattedHoldingLocation', () => { + it('should return holding location with id, label, and collection access type', () => { + const locationEntity = ItemResourceSerializer.getFormattedHoldingLocation([ + { + '@id': 'loc:maff1', + prefLabel: 'Schwarzman Building - Dorot Jewish Division Reference Room 111' + } + ]) + expect(locationEntity[0].prefLabel).to.equal('Schwarzman Building - Dorot Jewish Division Reference Room 111') + expect(locationEntity[0]['@id']).to.equal('loc:maff1') + expect(locationEntity[0].collectionAccessType).to.equal('shelf') + }) + }) describe('addSourceIdentifier', () => { it('adds source identifier for NYPL', async () => { const item = { uri: 'i1234' } diff --git a/test/resource_serializer.test.js b/test/resource_serializer.test.js index be0427e6..133671ae 100644 --- a/test/resource_serializer.test.js +++ b/test/resource_serializer.test.js @@ -9,11 +9,13 @@ describe('Resource Serializer', () => { { '@id': 'mal', buildingLocationLabel: 'Stephen A. Schwarzman Building (SASB)', + locationsPath: 'locations/schwarzman/general-research-division', prefLabel: 'General Research Division' }, { '@id': 'bur', buildingLocationLabel: 'Stavros Niarchos Foundation Library (SNFL)', + locationsPath: 'locations/snfl/yoseloff-business', prefLabel: 'Yoseloff Business Center' } ]) @@ -30,6 +32,7 @@ describe('Resource Serializer', () => { expect(collectionEntity.prefLabel).to.equal('Art & Architecture Collection') expect(collectionEntity['@id']).to.equal('mab') expect(collectionEntity.buildingLocationLabel).to.equal('Stephen A. Schwarzman Building (SASB)') + expect(collectionEntity.locationsPath).to.equal('locations/schwarzman/wallach-division/art-architecture-collection') }) }) describe('.formatItemFilterAggregations()', () => { diff --git a/test/vocabularies.test.js b/test/vocabularies.test.js index d3bdb48d..81c87a6e 100644 --- a/test/vocabularies.test.js +++ b/test/vocabularies.test.js @@ -57,6 +57,6 @@ describe('Vocabularies', function () { const results = await app.vocabularies({}, { baseUrl: app.baseUrl }) - expect(results.collections[0]).to.have.keys(['value', 'label', 'holdingLocations']) + expect(results.collections[0]).to.have.keys(['value', 'label', 'holdingLocations', 'locationsPath']) }) })