Skip to content
3 changes: 3 additions & 0 deletions app.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
const express = require('express')
const NyplSourceMapper = require('research-catalog-indexer/lib/utils/nypl-source-mapper')
const ControlledVocabularies = require('./lib/models/ControlledVocabularies')

const esClient = require('./lib/elasticsearch/client')
const loadConfig = require('./lib/load-config')
Expand Down Expand Up @@ -70,6 +71,8 @@ app.init = async () => {
handleError(err, req, res, next, app.logger)
})

await ControlledVocabularies.initialize(app)

return app
}

Expand Down
1 change: 1 addition & 0 deletions lib/elasticsearch/cql/index-mapping.js
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ const indexMapping = {
]
},
callnumber: {
fields: ['shelfMark'],
term: ['shelfMark.keywordLowercased', 'items.shelfMark.keywordLowercased']
},
identifier: {
Expand Down
52 changes: 29 additions & 23 deletions lib/elasticsearch/cql_grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,39 @@ function reverseGrammar (grammar) {
const leftCql = `
query ::= query whitespace connective whitespace sub_query | sub_query
connective ::= "AND NOT" | "AND" | "OR" | "NOT"
sub_query ::= atomic_query | "(" query ")"
atomic_query ::= scope relation quoted_term
sub_query ::= atomic_query | lparen_space query rparen_space
atomic_query ::= scope relation search_term
search_term ::= quoted_term | unquoted_word
scope ::= scope_term whitespace | scope_term
relation ::= relation_term whitespace | relation_term
scope_term ::= "title" | "author" | "keyword" | "callnumber" | "identifier" | "subject" | "language" | "date" | "series"| "genre" | "center" | "division" | "format"
relation_term ::= "any" | "adj" | "all" | "<=" | ">=" | "<" | ">" | "==" | "=" | "within" | "encloses"
quoted_term ::= quote phrase quote
phrase ::= phrase whitespace word | word
phrase ::= phrase whitespace_or_word | whitespace_or_word
whitespace_or_word ::= whitespace | word
whitespace ::= [#x20#x09#x0A#x0D]+
word ::= word escaped_char | word regular_char | escaped_char | regular_char
regular_char ::= [^#x22#x5c#x20#x09#x0A#x0D]
unquoted_word ::= unquoted_word escaped_char | unquoted_word unquoted_char | escaped_char | unquoted_char
unquoted_char ::= [^#x22#x5c#x20#x09#x0A#x0D=<>()]
escaped_char ::= slash char
slash ::= [#x5c]
char ::= [a-z]|[^a-z]
quote ::= [#x22]
lparen_space ::= lparen whitespace | lparen
rparen_space ::= whitespace rparen | rparen
lparen ::= [#x28]
rparen ::= [#x29]
`
function makeCaseInsensitiveLiterals (grammar) {
// Transform literals (e.g. "and not") into case-insensitive EBNF matches
return grammar.replace(/"([a-zA-Z ]+)"/g, (match, p1) => {
return p1.split('').map(c => c === ' ' ? 'whitespace' : `[${c.toLowerCase()}${c.toUpperCase()}]`).join(' ')
})
}

const rightCql = reverseGrammar(leftCql)
const processedLeftCql = makeCaseInsensitiveLiterals(leftCql)
const rightCql = reverseGrammar(processedLeftCql)

function simplify (ast) {
switch (ast.type) {
Expand All @@ -56,14 +71,19 @@ function simplify (ast) {
return ast.text
case 'relation_term':
return ast.text
case 'search_term':
return simplify(ast.children.find(child => child.type.includes('quoted_term') || child.type.includes('word')))
case 'quoted_term':
return simplify(ast.children.find(child => child.type.includes('phrase')))
case 'phrase': {
const word = ast.children.find(child => child.type === 'word')
const word = ast.children.find(child => child.type === 'whitespace_or_word')
const phrase = ast.children.find(child => child.type === 'phrase')
return [simplify(word)].concat(phrase ? simplify(phrase) : [])
return [simplify(word)].filter(x => x).concat(phrase ? simplify(phrase) : [])
}
case 'whitespace_or_word':
return simplify(ast.children.find(child => child.type === 'word'))
case 'word':
case 'unquoted_word':
return ast.text
default:
break
Expand Down Expand Up @@ -94,12 +114,12 @@ function parseWithRightCql (string) {

function parsedASTtoNestedArray (ast) {
if (!ast.type.includes('query')) {
return reverseString(ast.text)
return ast.text.trim()
}

const childTypes = [
'atomic_query', 'sub_query', 'query', 'connective',
'scope', 'relation', 'quoted_term'
'scope', 'relation', 'search_term'
]

const children = ast.children
Expand All @@ -113,18 +133,4 @@ function parsedASTtoNestedArray (ast) {
return children
}

// we need to reverse the error message since `parseWithRightCql` doesn't
function displayParsed (string) {
const parsed = parseWithRightCql(string)
if (!parsed) return {}
if (parsed.errors.length) {
return {
error: parsed.errors.map(error =>
`Parsing error likely near end of "${reverseString(error.token.rest)}"`
).join('\n')
}
}
return { parsed: parsedASTtoNestedArray(parsed) }
}

module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql, rightCqlParser, reverseString, displayParsed }
module.exports = { simplify, reverseAST, reverseGrammar, parseRight, parseWithRightCql, rightCqlParser, reverseString, parsedASTtoNestedArray }
Loading
Loading