From ca9e38fca803bf8c3376ae3a0acd147cd5f92d79 Mon Sep 17 00:00:00 2001 From: missinglink Date: Fri, 13 Dec 2019 16:08:24 +0100 Subject: [PATCH 1/3] feat(explicitly_set_analyzers): require "analyzer" & "search_analyzer" to be set for all text fields --- mappings/document.js | 12 ++++- mappings/partial/admin.json | 2 + mappings/partial/postalcode.json | 4 +- test/compile.js | 70 ++++++++++++++++++++++--- test/document.js | 11 ++++ test/fixtures/expected.json | 88 ++++++++++++++++++++++++++++---- 6 files changed, 168 insertions(+), 19 deletions(-) diff --git a/mappings/document.js b/mappings/document.js index f1f88b7c..723fb429 100644 --- a/mappings/document.js +++ b/mappings/document.js @@ -26,26 +26,32 @@ var schema = { name: { type: 'text', analyzer: 'keyword', + search_analyzer: 'keyword' }, unit: { type: 'text', analyzer: 'peliasUnit', + search_analyzer: 'peliasUnit' }, number: { type: 'text', analyzer: 'peliasHousenumber', + search_analyzer: 'peliasHousenumber' }, street: { type: 'text', analyzer: 'peliasStreet', + search_analyzer: 'peliasStreet' }, cross_street: { type: 'text', analyzer: 'peliasStreet', + search_analyzer: 'peliasStreet' }, zip: { type: 'text', analyzer: 'peliasZip', + search_analyzer: 'peliasZip' }, } }, @@ -152,7 +158,8 @@ var schema = { match_mapping_type: 'string', mapping: { type: 'text', - analyzer: 'peliasIndexOneEdgeGram' + analyzer: 'peliasIndexOneEdgeGram', + search_analyzer: 'peliasQuery' } }, },{ @@ -161,7 +168,8 @@ var schema = { match_mapping_type: 'string', mapping: { type: 'text', - analyzer: 'peliasPhrase' + analyzer: 'peliasPhrase', + search_analyzer: 'peliasQuery' } } },{ diff --git a/mappings/partial/admin.json b/mappings/partial/admin.json index 4907eb71..09307e06 100644 --- a/mappings/partial/admin.json +++ b/mappings/partial/admin.json @@ -1,10 +1,12 @@ { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } diff --git a/mappings/partial/postalcode.json b/mappings/partial/postalcode.json index e65f34f5..8f44b240 100644 --- a/mappings/partial/postalcode.json +++ b/mappings/partial/postalcode.json @@ -1,10 +1,12 @@ { "type": "text", "analyzer": "peliasZip", + "search_analyzer": "peliasZip", "fields": { "ngram": { "type": "text", - "analyzer": "peliasIndexOneEdgeGram" + "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasZip" } } } diff --git a/test/compile.js b/test/compile.js index bfd7b115..705c896f 100644 --- a/test/compile.js +++ b/test/compile.js @@ -1,8 +1,17 @@ +const _ = require('lodash'); const path = require('path'); const schema = require('../'); const fixture = require('./fixtures/expected.json'); const config = require('pelias-config').generate(); +const forEachDeep = (obj, cb) => + _.forEach(obj, (val, key) => { + cb(val, key); + if (_.isPlainObject(val) || _.isArray(val)){ + forEachDeep(val, cb); + } + }); + module.exports.tests = {}; module.exports.tests.compile = function(test, common) { @@ -13,11 +22,11 @@ module.exports.tests.compile = function(test, common) { }); }; -// admin indeces are explicitly specified in order to specify a custom +// admin indices are explicitly specified in order to specify a custom // dynamic_template and to avoid 'type not found' errors when deploying // the api codebase against an index without admin data -module.exports.tests.indeces = function(test, common) { - test('explicitly specify some admin indeces and their analyzer', function(t) { +module.exports.tests.indices = function(test, common) { + test('explicitly specify some admin indices and their analyzer', function(t) { const _type = config.schema.typeName; t.equal(typeof schema.mappings[_type], 'object', 'mappings present'); t.equal(schema.mappings[_type].dynamic_templates[0].nameGram.mapping.analyzer, 'peliasIndexOneEdgeGram'); @@ -35,10 +44,59 @@ module.exports.tests.dynamic_templates = function(test, common) { t.equal(template.match_mapping_type, 'string'); t.deepEqual(template.mapping, { type: 'text', - analyzer: 'peliasIndexOneEdgeGram' + analyzer: 'peliasIndexOneEdgeGram', + search_analyzer: 'peliasQuery' }); t.end(); }); + test('dynamic_templates: phrase', function (t) { + const _type = config.schema.typeName; + t.equal(typeof schema.mappings[_type].dynamic_templates[1].phrase, 'object', 'phrase template specified'); + var template = schema.mappings[_type].dynamic_templates[1].phrase; + t.equal(template.path_match, 'phrase.*'); + t.equal(template.match_mapping_type, 'string'); + t.deepEqual(template.mapping, { + type: 'text', + analyzer: 'peliasPhrase', + search_analyzer: 'peliasQuery' + }); + t.end(); + }); + test('dynamic_templates: addendum', function (t) { + const _type = config.schema.typeName; + t.equal(typeof schema.mappings[_type].dynamic_templates[2].addendum, 'object', 'addendum template specified'); + var template = schema.mappings[_type].dynamic_templates[2].addendum; + t.equal(template.path_match, 'addendum.*'); + t.equal(template.match_mapping_type, 'string'); + t.deepEqual(template.mapping, { + type: 'keyword', + index: false, + doc_values: false + }); + t.end(); + }); +}; + +// ensure both "analyzer" and "search_analyzer" are set for stringy fields +module.exports.tests.analyzers = function (test, common) { + test('analyzers: ensure "analyzer" and "search_analyzer" are set', function (t) { + + const stringyTypes = ['string', 'text']; + const stringyFields = []; + + forEachDeep(schema, (value, key) => { + if (!_.isPlainObject(value)) { return; } + if (!stringyTypes.includes(_.get(value, 'type', ''))) { return; } + stringyFields.push({ key: key, value: value }); + }); + + stringyFields.forEach(field => { + t.true(_.has(field.value, 'analyzer'), `analyzer not set on ${field.key}`) + t.true(_.has(field.value, 'search_analyzer'), `search_analyzer not set on ${field.key}`) + }) + + t.end(); + }); }; // current schema (compiled) - requires schema to be copied and settings to @@ -69,8 +127,8 @@ module.exports.tests.current_schema = function(test, common) { // console.error( JSON.stringify( schemaCopy, null, 2 ) ); // code to write expected output to the fixture - //const fs = require('fs'); - //fs.writeFileSync(path.resolve( __dirname + '/fixtures/expected.json' ), JSON.stringify(schemaCopy, null, 2)); + // const fs = require('fs'); + // fs.writeFileSync(path.resolve( __dirname + '/fixtures/expected.json' ), JSON.stringify(schemaCopy, null, 2)); t.deepEqual(schemaCopy, fixture); t.end(); diff --git a/test/document.js b/test/document.js index d07f2ea7..5e5b1fc7 100644 --- a/test/document.js +++ b/test/document.js @@ -49,6 +49,7 @@ module.exports.tests.address_analysis = function(test, common) { test('name', function(t) { t.equal(prop.name.type, 'text'); t.equal(prop.name.analyzer, 'keyword'); + t.equal(prop.name.search_analyzer, 'keyword'); t.end(); }); @@ -56,6 +57,7 @@ module.exports.tests.address_analysis = function(test, common) { test('unit', function(t) { t.equal(prop.unit.type, 'text', 'unit has full text type'); t.equal(prop.unit.analyzer, 'peliasUnit', 'unit analyzer is peliasUnit'); + t.equal(prop.unit.search_analyzer, 'peliasUnit', 'unit search_analyzer is peliasUnit'); t.end(); }); @@ -63,6 +65,7 @@ module.exports.tests.address_analysis = function(test, common) { test('number', function(t) { t.equal(prop.number.type, 'text'); t.equal(prop.number.analyzer, 'peliasHousenumber'); + t.equal(prop.number.search_analyzer, 'peliasHousenumber'); t.end(); }); @@ -70,6 +73,7 @@ module.exports.tests.address_analysis = function(test, common) { test('street', function(t) { t.equal(prop.street.type, 'text'); t.equal(prop.street.analyzer, 'peliasStreet'); + t.equal(prop.street.search_analyzer, 'peliasStreet'); t.end(); }); @@ -79,6 +83,7 @@ module.exports.tests.address_analysis = function(test, common) { test('zip', function(t) { t.equal(prop.zip.type, 'text'); t.equal(prop.zip.analyzer, 'peliasZip'); + t.equal(prop.zip.search_analyzer, 'peliasZip'); t.end(); }); }; @@ -125,12 +130,14 @@ module.exports.tests.parent_analysis = function(test, common) { t.equal(prop[field].analyzer, 'peliasAdmin', `${field} analyzer is peliasAdmin`); t.equal(prop[field+'_a'].type, 'text', `${field}_a type is text`); t.equal(prop[field+'_a'].analyzer, 'peliasAdmin', `${field}_a analyzer is peliasAdmin`); + t.equal(prop[field+'_a'].search_analyzer, 'peliasAdmin', `${field}_a analyzer is peliasAdmin`); t.equal(prop[field+'_id'].type, 'keyword', `${field}_id type is keyword`); t.equal(prop[field+'_id'].index, undefined, `${field}_id index left at default`); // subfields t.equal(prop[field].fields.ngram.type, 'text', `${field}.ngram type is full text`); t.equal(prop[field].fields.ngram.analyzer, 'peliasIndexOneEdgeGram', `${field}.ngram analyzer is peliasIndexOneEdgeGram`); + t.equal(prop[field].fields.ngram.search_analyzer, 'peliasAdmin', `${field}.ngram analyzer is peliasIndexOneEdgeGram`); t.end(); }); @@ -139,8 +146,10 @@ module.exports.tests.parent_analysis = function(test, common) { test('postalcode', function(t) { t.equal(prop['postalcode'].type, 'text', 'postalcode is full text field'); t.equal(prop['postalcode'].analyzer, 'peliasZip', 'postalcode analyzer is peliasZip'); + t.equal(prop['postalcode'].search_analyzer, 'peliasZip', 'postalcode analyzer is peliasZip'); t.equal(prop['postalcode'+'_a'].type, 'text', 'postalcode_a is full text field'); t.equal(prop['postalcode'+'_a'].analyzer, 'peliasZip', 'postalcode_a analyzer is peliasZip'); + t.equal(prop['postalcode'+'_a'].search_analyzer, 'peliasZip', 'postalcode_a analyzer is peliasZip'); t.equal(prop['postalcode'+'_id'].type, 'keyword', 'postalcode_id field is keyword type'); t.equal(prop['postalcode'+'_id'].index, undefined, 'postalcode_id index left at default'); @@ -157,6 +166,7 @@ module.exports.tests.dynamic_templates = function(test, common) { t.equal(template.mapping.type, 'text', 'set to full text type'); t.equal(template.mapping.fielddata, undefined, 'fielddata is left to default (disabled)'); t.equal(template.mapping.analyzer, 'peliasIndexOneEdgeGram', 'analyzer set'); + t.equal(template.mapping.search_analyzer, 'peliasQuery', 'search_analyzer set'); t.end(); }); test('dynamic_templates: phrase', function(t) { @@ -167,6 +177,7 @@ module.exports.tests.dynamic_templates = function(test, common) { t.equal(template.mapping.type, 'text', 'set to full text type'); t.equal(template.mapping.fielddata, undefined, 'fielddata is left to default (disabled)'); t.equal(template.mapping.analyzer, 'peliasPhrase', 'analyzer set'); + t.equal(template.mapping.search_analyzer, 'peliasQuery', 'search_analyzer set'); t.end(); }); }; diff --git a/test/fixtures/expected.json b/test/fixtures/expected.json index 31e1638c..822d94d3 100644 --- a/test/fixtures/expected.json +++ b/test/fixtures/expected.json @@ -594,27 +594,33 @@ "properties": { "name": { "type": "text", - "analyzer": "keyword" + "analyzer": "keyword", + "search_analyzer": "keyword" }, "unit": { "type": "text", - "analyzer": "peliasUnit" + "analyzer": "peliasUnit", + "search_analyzer": "peliasUnit" }, "number": { "type": "text", - "analyzer": "peliasHousenumber" + "analyzer": "peliasHousenumber", + "search_analyzer": "peliasHousenumber" }, "street": { "type": "text", - "analyzer": "peliasStreet" + "analyzer": "peliasStreet", + "search_analyzer": "peliasStreet" }, "cross_street": { "type": "text", - "analyzer": "peliasStreet" + "analyzer": "peliasStreet", + "search_analyzer": "peliasStreet" }, "zip": { "type": "text", - "analyzer": "peliasZip" + "analyzer": "peliasZip", + "search_analyzer": "peliasZip" } } }, @@ -625,10 +631,12 @@ "continent": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -636,10 +644,12 @@ "continent_a": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -651,10 +661,12 @@ "ocean": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -662,10 +674,12 @@ "ocean_a": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -677,10 +691,12 @@ "empire": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -688,10 +704,12 @@ "empire_a": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -703,10 +721,12 @@ "country": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -714,10 +734,12 @@ "country_a": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -729,10 +751,12 @@ "dependency": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -740,10 +764,12 @@ "dependency_a": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -755,10 +781,12 @@ "marinearea": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -766,10 +794,12 @@ "marinearea_a": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -781,10 +811,12 @@ "macroregion": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -792,10 +824,12 @@ "macroregion_a": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -807,10 +841,12 @@ "region": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -818,10 +854,12 @@ "region_a": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -833,10 +871,12 @@ "macrocounty": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -844,10 +884,12 @@ "macrocounty_a": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -859,10 +901,12 @@ "county": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -870,10 +914,12 @@ "county_a": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -885,10 +931,12 @@ "locality": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -896,10 +944,12 @@ "locality_a": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -911,10 +961,12 @@ "borough": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -922,10 +974,12 @@ "borough_a": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -937,10 +991,12 @@ "localadmin": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -948,10 +1004,12 @@ "localadmin_a": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -963,10 +1021,12 @@ "neighbourhood": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -974,10 +1034,12 @@ "neighbourhood_a": { "type": "text", "analyzer": "peliasAdmin", + "search_analyzer": "peliasAdmin", "fields": { "ngram": { "type": "text", "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasAdmin", "doc_values": false } } @@ -989,20 +1051,24 @@ "postalcode": { "type": "text", "analyzer": "peliasZip", + "search_analyzer": "peliasZip", "fields": { "ngram": { "type": "text", - "analyzer": "peliasIndexOneEdgeGram" + "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasZip" } } }, "postalcode_a": { "type": "text", "analyzer": "peliasZip", + "search_analyzer": "peliasZip", "fields": { "ngram": { "type": "text", - "analyzer": "peliasIndexOneEdgeGram" + "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasZip" } } }, @@ -1050,7 +1116,8 @@ "match_mapping_type": "string", "mapping": { "type": "text", - "analyzer": "peliasIndexOneEdgeGram" + "analyzer": "peliasIndexOneEdgeGram", + "search_analyzer": "peliasQuery" } } }, @@ -1060,7 +1127,8 @@ "match_mapping_type": "string", "mapping": { "type": "text", - "analyzer": "peliasPhrase" + "analyzer": "peliasPhrase", + "search_analyzer": "peliasQuery" } } }, From af0929eeb7357fbf5980f8135be9e39480c04771 Mon Sep 17 00:00:00 2001 From: missinglink Date: Fri, 13 Dec 2019 16:27:24 +0100 Subject: [PATCH 2/3] feat(normalizers): add optional normalizer for keyword fields --- .../source_layer_sourceid_filtering.js | 32 +++++++++---------- mappings/partial/keyword.json | 1 + mappings/partial/keyword_with_doc_values.json | 3 +- settings.js | 9 ++++++ test/compile.js | 23 +++++++++++++ test/fixtures/expected.json | 32 +++++++++++++++++-- test/settings.js | 16 ++++++++++ 7 files changed, 97 insertions(+), 19 deletions(-) diff --git a/integration/source_layer_sourceid_filtering.js b/integration/source_layer_sourceid_filtering.js index dbdbceae..a4be2b6e 100644 --- a/integration/source_layer_sourceid_filtering.js +++ b/integration/source_layer_sourceid_filtering.js @@ -57,6 +57,22 @@ module.exports.tests.source_filter = function(test, common){ }); }); + // case insensitive + suite.assert( function( done ){ + suite.client.search({ + index: suite.props.index, + type: config.schema.typeName, + body: { query: { + term: { + source: 'OSM' + } + }} + }, function( err, res ){ + t.equal( res.hits.total, 2 ); + done(); + }); + }); + // find all 'address' layers suite.assert( function( done ){ suite.client.search({ @@ -104,22 +120,6 @@ module.exports.tests.source_filter = function(test, common){ }); }); - // case sensitive - suite.assert( function( done ){ - suite.client.search({ - index: suite.props.index, - type: config.schema.typeName, - body: { query: { - term: { - source: 'OSM' - } - }} - }, function( err, res ){ - t.equal( res.hits.total, 0 ); - done(); - }); - }); - // keyword analysis - no partial matching suite.assert( function( done ){ suite.client.search({ diff --git a/mappings/partial/keyword.json b/mappings/partial/keyword.json index 5f325492..49f379cf 100644 --- a/mappings/partial/keyword.json +++ b/mappings/partial/keyword.json @@ -1,4 +1,5 @@ { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false } diff --git a/mappings/partial/keyword_with_doc_values.json b/mappings/partial/keyword_with_doc_values.json index 16a0e08c..edb254b9 100644 --- a/mappings/partial/keyword_with_doc_values.json +++ b/mappings/partial/keyword_with_doc_values.json @@ -1,3 +1,4 @@ { - "type": "keyword" + "type": "keyword", + "normalizer": "peliasKeywordNormalizer" } diff --git a/settings.js b/settings.js index f55c41b9..ff510363 100644 --- a/settings.js +++ b/settings.js @@ -34,6 +34,15 @@ function generate(){ "pattern": "[\\s,/\\\\-]+" } }, + "normalizer": { + "peliasKeywordNormalizer": { + "type": "custom", + "filter": [ + "lowercase", + "icu_folding" + ] + } + }, "analyzer": { "peliasAdmin": { "type": "custom", diff --git a/test/compile.js b/test/compile.js index 705c896f..033e5170 100644 --- a/test/compile.js +++ b/test/compile.js @@ -99,6 +99,29 @@ module.exports.tests.analyzers = function (test, common) { }); }; +// note: this test is commented out for now because it's valid for some keyword +// fields such as bounding_box and addendum to use the null normalizer, but it's +// not easy to test because it's not possible to specify them as null in the mapping. + +// ensure "normalizer" is set for keyword fields +// module.exports.tests.normalizers = function (test, common) { +// test('normalizers: ensure "normalizer" is set', function (t) { +// const keywordFields = []; + +// forEachDeep(schema, (value, key) => { +// if (!_.isPlainObject(value)) { return; } +// if (_.get(value, 'type', '') !== 'keyword') { return; } +// keywordFields.push({ key: key, value: value }); +// }); + +// keywordFields.forEach(field => { +// t.true(_.has(field.value, 'normalizer'), `normalizer not set on ${field.key}`) +// }) + +// t.end(); +// }); +// }; + // current schema (compiled) - requires schema to be copied and settings to // be regenerated from a fixture in order to pass in CI environments. module.exports.tests.current_schema = function(test, common) { diff --git a/test/fixtures/expected.json b/test/fixtures/expected.json index 822d94d3..e72c2d5b 100644 --- a/test/fixtures/expected.json +++ b/test/fixtures/expected.json @@ -11,6 +11,15 @@ "pattern": "[\\s,/\\\\-]+" } }, + "normalizer": { + "peliasKeywordNormalizer": { + "type": "custom", + "filter": [ + "lowercase", + "icu_folding" + ] + } + }, "analyzer": { "peliasAdmin": { "type": "custom", @@ -575,10 +584,12 @@ "doc": { "properties": { "source": { - "type": "keyword" + "type": "keyword", + "normalizer": "peliasKeywordNormalizer" }, "layer": { - "type": "keyword" + "type": "keyword", + "normalizer": "peliasKeywordNormalizer" }, "name": { "type": "object", @@ -656,6 +667,7 @@ }, "continent_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "ocean": { @@ -686,6 +698,7 @@ }, "ocean_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "empire": { @@ -716,6 +729,7 @@ }, "empire_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "country": { @@ -746,6 +760,7 @@ }, "country_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "dependency": { @@ -776,6 +791,7 @@ }, "dependency_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "marinearea": { @@ -806,6 +822,7 @@ }, "marinearea_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "macroregion": { @@ -836,6 +853,7 @@ }, "macroregion_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "region": { @@ -866,6 +884,7 @@ }, "region_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "macrocounty": { @@ -896,6 +915,7 @@ }, "macrocounty_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "county": { @@ -926,6 +946,7 @@ }, "county_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "locality": { @@ -956,6 +977,7 @@ }, "locality_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "borough": { @@ -986,6 +1008,7 @@ }, "borough_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "localadmin": { @@ -1016,6 +1039,7 @@ }, "localadmin_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "neighbourhood": { @@ -1046,6 +1070,7 @@ }, "neighbourhood_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "postalcode": { @@ -1074,6 +1099,7 @@ }, "postalcode_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false } } @@ -1090,10 +1116,12 @@ }, "source_id": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "category": { "type": "keyword", + "normalizer": "peliasKeywordNormalizer", "doc_values": false }, "population": { diff --git a/test/settings.js b/test/settings.js index 360370af..19f65a26 100644 --- a/test/settings.js +++ b/test/settings.js @@ -48,6 +48,22 @@ module.exports.tests.analysis = function(test, common) { }); }; +// -- normalizers -- + +module.exports.tests.peliasKeywordNormalizer = function (test, common) { + test('has pelias keyword normalizer', function (t) { + var s = settings(); + t.equal(typeof s.analysis.normalizer.peliasKeywordNormalizer, 'object', 'there is a pelias keyword normalizer'); + var normalizer = s.analysis.normalizer.peliasKeywordNormalizer; + t.equal(normalizer.type, 'custom', 'custom normalizer'); + t.deepEqual(normalizer.filter, [ + "lowercase", + "icu_folding" + ]); + t.end(); + }); +}; + // -- analyzers -- module.exports.tests.peliasAdminAnalyzer = function(test, common) { From cec0b194f9303393ca1968c7c6cbc76bff603f57 Mon Sep 17 00:00:00 2001 From: missinglink Date: Fri, 13 Dec 2019 16:48:59 +0100 Subject: [PATCH 3/3] feat(normalizers): add "trim" to "peliasKeywordNormalizer" filters --- settings.js | 3 ++- test/fixtures/expected.json | 5 +++-- test/settings.js | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/settings.js b/settings.js index ff510363..c1798912 100644 --- a/settings.js +++ b/settings.js @@ -39,7 +39,8 @@ function generate(){ "type": "custom", "filter": [ "lowercase", - "icu_folding" + "icu_folding", + "trim" ] } }, diff --git a/test/fixtures/expected.json b/test/fixtures/expected.json index e72c2d5b..ff2c8375 100644 --- a/test/fixtures/expected.json +++ b/test/fixtures/expected.json @@ -16,7 +16,8 @@ "type": "custom", "filter": [ "lowercase", - "icu_folding" + "icu_folding", + "trim" ] } }, @@ -1184,4 +1185,4 @@ "dynamic": "strict" } } -} \ No newline at end of file +} diff --git a/test/settings.js b/test/settings.js index 19f65a26..2bdcf351 100644 --- a/test/settings.js +++ b/test/settings.js @@ -58,7 +58,8 @@ module.exports.tests.peliasKeywordNormalizer = function (test, common) { t.equal(normalizer.type, 'custom', 'custom normalizer'); t.deepEqual(normalizer.filter, [ "lowercase", - "icu_folding" + "icu_folding", + "trim" ]); t.end(); });