diff --git a/.github/workflows/_integration_tests.yml b/.github/workflows/_integration_tests.yml index ee163d8e..1e594c90 100644 --- a/.github/workflows/_integration_tests.yml +++ b/.github/workflows/_integration_tests.yml @@ -23,7 +23,7 @@ jobs: - name: Run integration tests run: | if [ "${{ matrix.icuTokenizer }}" = "true" ]; then - jq -n '{ schema: { icuTokenizer: true } }' > $(pwd)/config-icu.json + jq -n '{ featureFlags: { icuTokenizer: true } }' > $(pwd)/config-icu.json export PELIAS_CONFIG=$(pwd)/config-icu.json fi npm install diff --git a/.github/workflows/_unit_tests.yml b/.github/workflows/_unit_tests.yml index d0ef57f0..2b3081f2 100644 --- a/.github/workflows/_unit_tests.yml +++ b/.github/workflows/_unit_tests.yml @@ -19,7 +19,7 @@ jobs: run: | npm install if [ "${{ matrix.icuTokenizer }}" = "true" ]; then - jq -n '{ schema: { icuTokenizer: true } }' > $(pwd)/config-icu.json + jq -n '{ featureFlags: { icuTokenizer: true } }' > $(pwd)/config-icu.json export PELIAS_CONFIG=$(pwd)/config-icu.json fi npm run test \ No newline at end of file diff --git a/configValidation.js b/configValidation.js index 02d462ed..9699c9c8 100644 --- a/configValidation.js +++ b/configValidation.js @@ -2,14 +2,16 @@ const Joi = require('@hapi/joi'); // Schema Configuration // schema.indexName: populated by defaults if not overridden -// schema.icuTokenizer: boolean, optional, defaults to false // esclient: object, validation performed by elasticsearch module +// featureFlags.icuTokenizer: boolean, optional, defaults to false const schema = Joi.object().required().keys({ schema: Joi.object().required().keys({ - indexName: Joi.string().required(), - icuTokenizer: Joi.boolean().optional() + indexName: Joi.string().required() }), - esclient: Joi.object().required() + esclient: Joi.object().required(), + featureFlags: Joi.object().optional().keys({ + icuTokenizer: Joi.boolean().optional() + }).unknown(true) }).unknown(true); module.exports = { diff --git a/integration/analyzer_peliasIndexOneEdgeGram.js b/integration/analyzer_peliasIndexOneEdgeGram.js index 523c092f..17438ccc 100644 --- a/integration/analyzer_peliasIndexOneEdgeGram.js +++ b/integration/analyzer_peliasIndexOneEdgeGram.js @@ -85,7 +85,7 @@ module.exports.tests.analyze = function(test, common){ assertAnalysis( 'british_american_english', 'town theatre', ['0:town', '1:theatre', '1:theater'] ); assertAnalysis( 'british_american_english', 'town theater', ['0:town', '1:theater', '1:theatre'] ); - if (config.schema.icuTokenizer) { + if (config.featureFlags?.icuTokenizer) { assertAnalysis('thai_address', 'ซอยเพชรบุรี๑foo', [ '0:ซ', '0:ซอ', '0:ซอย', '1:เพชรบุรี1', '1:เพชรบุรี', '1:เพชรบุร', '1:เพชรบุ', '1:เพชรบ', '1:เพชร', '1:เพช', '1:เพ', '1:เ', diff --git a/integration/analyzer_peliasQuery.js b/integration/analyzer_peliasQuery.js index fc5a579d..bee117fa 100644 --- a/integration/analyzer_peliasQuery.js +++ b/integration/analyzer_peliasQuery.js @@ -24,7 +24,7 @@ module.exports.tests.analyze = function(test, common){ assertAnalysis('thai_digits', '๐๑๒๓๔๕๖๗ ๘๙', ['01234567', '89']); // leading zero remains assertAnalysis('thai_digits', '๑๒๓๔๕๖๗๐ ๘๙', ['12345670', '89']); assertAnalysis('digit_glued_to_word', 'john doe42', ['john', 'doe42']); - if (config.schema.icuTokenizer) { + if (config.featureFlags?.icuTokenizer) { assertAnalysis('thai_tonemarks', 'ก่ก้ก๊ก๋ข่ข้ข๊ข๋ค่ค้ค๊ค๋ฆ่ฆ้ฆ๊ฆ๋', ['กก', 'กก', 'ขขขขคคคคฆฆฆฆ']); assertAnalysis('chinese_address', '北京市朝阳区东三环中路1号国际大厦A座1001室', ['北京市', '朝阳', '区', '东', '三', '环', '中路', '1', '号', '国际', '大厦', 'a', '座', '1001', '室']); } else { @@ -68,7 +68,7 @@ module.exports.tests.functional = function(test, common){ assertAnalysis( 'address', '101 mapzen place', [ '101', 'mapzen', 'place' ]); // complicated tokenization for some Asian languages - if (config.schema.icuTokenizer) { + if (config.featureFlags?.icuTokenizer) { assertAnalysis('thai_address1', 'ซอยเพชรบุรี๑', ['ซอย', 'เพชรบุรี1'] ); assertAnalysis('thai_address2', 'ซอยเพชรบุรี๑foo', ['ซอย', 'เพชรบุรี1', 'foo'] ); assertAnalysis('thai_address3', 'บ้านเลขที่๑๒๓ถนนสุขุมวิทแขวงคลองตันเหนือเขตวัฒนา กรุงเทพมหานคร๑๐๑๑๐', ["บาน", "เลข", "ที123ถนน", "สุขุมวิท", "แขวง", "คลองตัน", "เหนือ", "เขต", "วัฒนา", "กรุงเทพมหานคร10110"]); diff --git a/integration/analyzer_peliasStreet.js b/integration/analyzer_peliasStreet.js index 7f861f9a..d2960574 100644 --- a/integration/analyzer_peliasStreet.js +++ b/integration/analyzer_peliasStreet.js @@ -24,7 +24,7 @@ module.exports.tests.analyze = function(test, common){ assertAnalysis( 'remove_ordinals', 'Ast th 101st', ['ast','th','101'] ); // complicated tokenization for some Asian languages - if (config.schema.icuTokenizer) { + if (config.featureFlags?.icuTokenizer) { assertAnalysis('thai_address1', 'ซอยเพชรบุรี๑', ['ซอย', 'เพชรบุรี1'] ); assertAnalysis('thai_address2', 'ซอยเพชรบุรี๑foo', ['ซอย', 'เพชรบุรี1', 'foo'] ); assertAnalysis('thai_address3', 'บ้านเลขที่๑๒๓ถนนสุขุมวิทแขวงคลองตันเหนือเขตวัฒนา กรุงเทพมหานคร๑๐๑๑๐', ["บาน", "เลข", "ที123ถนน", "สุขุมวิท", "แขวง", "คลองตัน", "เหนือ", "เขต", "วัฒนา", "กรุงเทพมหานคร10110"]); diff --git a/settings-icu.js b/settings-icu.js index 2ebfa2c0..081eb3ba 100644 --- a/settings-icu.js +++ b/settings-icu.js @@ -4,7 +4,7 @@ const _ = require('lodash'); * This module contains modifications to the Pelias schema to adopt the elastic ICU tokenizer. * This tokenizer improves word-splitting of non-latin alphabets (particularly Asian languages). * - * It can be enabled by setting `config.schema.icuTokenizer` in your `pelias.json` config. + * It can be enabled by setting `config.featureFlags.icuTokenizer` in your `pelias.json` config. * Note: this must be set *before* you create your elasticsearch index or it will have no effect. * * This feature is considered beta, we encourage testing & feedback from the community in order diff --git a/settings.js b/settings.js index be3ad673..03de559b 100644 --- a/settings.js +++ b/settings.js @@ -293,7 +293,7 @@ function generate(){ }); // Experimental ICU tokenizer - if (config.schema.icuTokenizer) { + if (config.featureFlags?.icuTokenizer) { settings = settingsICU(settings); } diff --git a/test/configValidation.js b/test/configValidation.js index f5469b6a..f65cbd34 100644 --- a/test/configValidation.js +++ b/test/configValidation.js @@ -5,8 +5,31 @@ const configValidation = require('../configValidation'); module.exports.tests = {}; module.exports.tests.interface = function(test, common) { + + test('does not throw on unknown feature flags', function(t) { + const config = { + schema: { + indexName: "pelias" + }, + esclient: {}, + featureFlags: { + unknown_feature_flag: { + nested: true + }, + unknown_feature_flag2: true + } + }; + + t.doesNotThrow(function () { + configValidation.validate(config); + }); + t.end(); + + }); + + test('config without schema should throw error', function(t) { - var config = { + const config = { esclient: {} }; @@ -18,7 +41,7 @@ module.exports.tests.interface = function(test, common) { }); test('config without schema.indexName should throw error', function(t) { - var config = { + const config = { schema: {}, esclient: {} }; @@ -32,7 +55,7 @@ module.exports.tests.interface = function(test, common) { test('config with non-string schema.indexName should throw error', function(t) { [null, 17, {}, [], false].forEach((value) => { - var config = { + const config = { schema: { indexName: value, }, @@ -51,7 +74,7 @@ module.exports.tests.interface = function(test, common) { test('config with non-object esclient should throw error', function(t) { [null, 17, [], 'string', true].forEach((value) => { - var config = { + const config = { schema: { indexName: 'example_index', }, @@ -69,7 +92,7 @@ module.exports.tests.interface = function(test, common) { }); test('config with string schema.indexName and object esclient should not throw error', function(t) { - var config = { + const config = { schema: { indexName: 'example_index', }, @@ -92,7 +115,7 @@ module.exports.all = function (tape, common) { return tape('configValidation: ' + name, testFunction); } - for( var testCase in module.exports.tests ){ + for( const testCase in module.exports.tests ){ module.exports.tests[testCase](test, common); } }; diff --git a/test/fixtures/config-icu-tokenizer.json b/test/fixtures/config-icu-tokenizer.json index 81e70ce7..4112b0e8 100644 --- a/test/fixtures/config-icu-tokenizer.json +++ b/test/fixtures/config-icu-tokenizer.json @@ -8,8 +8,7 @@ } } }, - "schema": { + "featureFlags": { "icuTokenizer": true } } - \ No newline at end of file diff --git a/test/settings.js b/test/settings.js index 37789dfd..b42d8e64 100644 --- a/test/settings.js +++ b/test/settings.js @@ -50,14 +50,14 @@ module.exports.tests.analysis = function(test, common) { }; function mayBeAmpersandMapper() { - if (config.schema.icuTokenizer) { + if (config.featureFlags?.icuTokenizer) { return ['ampersand_mapper']; } return []; } function mayBeAmpersandReplacer() { - if (config.schema.icuTokenizer) { + if (config.featureFlags?.icuTokenizer) { return ['ampersand_replacer']; } return [];