pelias · SiarheiFedartsou · Mar 1, 2025 · Mar 1, 2025 · Mar 1, 2025 · Mar 1, 2025
diff --git a/.github/workflows/_integration_tests.yml b/.github/workflows/_integration_tests.yml
@@ -23,7 +23,7 @@ jobs:
     - name: Run integration tests
       run: |
         if [ "${{ matrix.icuTokenizer }}" = "true" ]; then
-          jq -n '{ schema: { icuTokenizer: true } }' > $(pwd)/config-icu.json
+          jq -n '{ featureFlags: { icuTokenizer: true } }' > $(pwd)/config-icu.json
           export PELIAS_CONFIG=$(pwd)/config-icu.json
         fi
         npm install

diff --git a/.github/workflows/_unit_tests.yml b/.github/workflows/_unit_tests.yml
@@ -19,7 +19,7 @@ jobs:
       run: |
         npm install
         if [ "${{ matrix.icuTokenizer }}" = "true" ]; then
-          jq -n '{ schema: { icuTokenizer: true } }' > $(pwd)/config-icu.json
+          jq -n '{ featureFlags: { icuTokenizer: true } }' > $(pwd)/config-icu.json
           export PELIAS_CONFIG=$(pwd)/config-icu.json
         fi
         npm run test
diff --git a/configValidation.js b/configValidation.js
@@ -2,14 +2,16 @@ const Joi = require('@hapi/joi');
 
 // Schema Configuration
 // schema.indexName: populated by defaults if not overridden
-// schema.icuTokenizer: boolean, optional, defaults to false
 // esclient: object, validation performed by elasticsearch module
+// featureFlags.icuTokenizer: boolean, optional, defaults to false
 const schema = Joi.object().required().keys({
   schema: Joi.object().required().keys({
-    indexName: Joi.string().required(),
-    icuTokenizer: Joi.boolean().optional()
+    indexName: Joi.string().required()
   }),
-  esclient: Joi.object().required()
+  esclient: Joi.object().required(),
+  featureFlags: Joi.object().optional().keys({
+    icuTokenizer: Joi.boolean().optional()
+  }).unknown(true)
 }).unknown(true);
 
 module.exports = {

diff --git a/integration/analyzer_peliasIndexOneEdgeGram.js b/integration/analyzer_peliasIndexOneEdgeGram.js
@@ -85,7 +85,7 @@ module.exports.tests.analyze = function(test, common){
 
     assertAnalysis( 'british_american_english', 'town theatre', ['0:town', '1:theatre', '1:theater'] );
     assertAnalysis( 'british_american_english', 'town theater', ['0:town', '1:theater', '1:theatre'] );
-    if (config.schema.icuTokenizer) {
+    if (config.featureFlags?.icuTokenizer) {
       assertAnalysis('thai_address', 'ซอยเพชรบุรี๑foo', [
         '0:ซ', '0:ซอ', '0:ซอย',
         '1:เพชรบุรี1', '1:เพชรบุรี', '1:เพชรบุร', '1:เพชรบุ', '1:เพชรบ', '1:เพชร', '1:เพช', '1:เพ', '1:เ',

diff --git a/integration/analyzer_peliasQuery.js b/integration/analyzer_peliasQuery.js
@@ -24,7 +24,7 @@ module.exports.tests.analyze = function(test, common){
     assertAnalysis('thai_digits', '๐๑๒๓๔๕๖๗ ๘๙', ['01234567', '89']); // leading zero remains
     assertAnalysis('thai_digits', '๑๒๓๔๕๖๗๐ ๘๙', ['12345670', '89']);
     assertAnalysis('digit_glued_to_word', 'john doe42', ['john', 'doe42']);
-    if (config.schema.icuTokenizer) {
+    if (config.featureFlags?.icuTokenizer) {
       assertAnalysis('thai_tonemarks', 'ก่ก้ก๊ก๋ข่ข้ข๊ข๋ค่ค้ค๊ค๋ฆ่ฆ้ฆ๊ฆ๋', ['กก', 'กก', 'ขขขขคคคคฆฆฆฆ']);
       assertAnalysis('chinese_address', '北京市朝阳区东三环中路1号国际大厦A座1001室', ['北京市', '朝阳', '区', '东', '三', '环', '中路', '1', '号', '国际', '大厦', 'a', '座', '1001', '室']);  
     } else {
@@ -68,7 +68,7 @@ module.exports.tests.functional = function(test, common){
     assertAnalysis( 'address', '101 mapzen place', [ '101', 'mapzen', 'place' ]);
 
      // complicated tokenization for some Asian languages
-    if (config.schema.icuTokenizer) {
+    if (config.featureFlags?.icuTokenizer) {
       assertAnalysis('thai_address1', 'ซอยเพชรบุรี๑', ['ซอย', 'เพชรบุรี1'] );
       assertAnalysis('thai_address2', 'ซอยเพชรบุรี๑foo', ['ซอย', 'เพชรบุรี1', 'foo'] );
       assertAnalysis('thai_address3', 'บ้านเลขที่๑๒๓ถนนสุขุมวิทแขวงคลองตันเหนือเขตวัฒนา กรุงเทพมหานคร๑๐๑๑๐', ["บาน", "เลข", "ที123ถนน", "สุขุมวิท", "แขวง", "คลองตัน", "เหนือ", "เขต", "วัฒนา", "กรุงเทพมหานคร10110"]);

diff --git a/integration/analyzer_peliasStreet.js b/integration/analyzer_peliasStreet.js
@@ -24,7 +24,7 @@ module.exports.tests.analyze = function(test, common){
     assertAnalysis( 'remove_ordinals', 'Ast th 101st', ['ast','th','101'] );
 
     // complicated tokenization for some Asian languages
-    if (config.schema.icuTokenizer) {
+    if (config.featureFlags?.icuTokenizer) {
       assertAnalysis('thai_address1', 'ซอยเพชรบุรี๑', ['ซอย', 'เพชรบุรี1'] );
       assertAnalysis('thai_address2', 'ซอยเพชรบุรี๑foo', ['ซอย', 'เพชรบุรี1', 'foo'] );
       assertAnalysis('thai_address3', 'บ้านเลขที่๑๒๓ถนนสุขุมวิทแขวงคลองตันเหนือเขตวัฒนา กรุงเทพมหานคร๑๐๑๑๐', ["บาน", "เลข", "ที123ถนน", "สุขุมวิท", "แขวง", "คลองตัน", "เหนือ", "เขต", "วัฒนา", "กรุงเทพมหานคร10110"]);

diff --git a/settings-icu.js b/settings-icu.js
@@ -4,7 +4,7 @@ const _ = require('lodash');
  * This module contains modifications to the Pelias schema to adopt the elastic ICU tokenizer.
  * This tokenizer improves word-splitting of non-latin alphabets (particularly Asian languages).
  * 
- * It can be enabled by setting `config.schema.icuTokenizer` in your `pelias.json` config.
+ * It can be enabled by setting `config.featureFlags.icuTokenizer` in your `pelias.json` config.
  * Note: this must be set *before* you create your elasticsearch index or it will have no effect.
  * 
  * This feature is considered beta, we encourage testing & feedback from the community in order 

diff --git a/settings.js b/settings.js
@@ -293,7 +293,7 @@ function generate(){
   });
 
   // Experimental ICU tokenizer
-  if (config.schema.icuTokenizer) {
+  if (config.featureFlags?.icuTokenizer) {
     settings = settingsICU(settings);
   }
 

diff --git a/test/configValidation.js b/test/configValidation.js
@@ -5,8 +5,31 @@ const configValidation = require('../configValidation');
 module.exports.tests = {};
 
 module.exports.tests.interface = function(test, common) {
+
+  test('does not throw on unknown feature flags', function(t) {
+    const config = {
+      schema: {
+        indexName: "pelias"
+      },
+      esclient: {},
+      featureFlags: {
+        unknown_feature_flag: {
+          nested: true
+        },
+        unknown_feature_flag2: true
+      }
+    };
+
+    t.doesNotThrow(function () {
+      configValidation.validate(config);
+    });
+    t.end();
+
+  });
+
+
   test('config without schema should throw error', function(t) {
-    var config = {
+    const config = {
       esclient: {}
     };
 
@@ -18,7 +41,7 @@ module.exports.tests.interface = function(test, common) {
   });
 
   test('config without schema.indexName should throw error', function(t) {
-    var config = {
+    const config = {
       schema: {},
       esclient: {}
     };
@@ -32,7 +55,7 @@ module.exports.tests.interface = function(test, common) {
 
   test('config with non-string schema.indexName should throw error', function(t) {
     [null, 17, {}, [], false].forEach((value) => {
-      var config = {
+      const config = {
         schema: {
           indexName: value,
         },
@@ -51,7 +74,7 @@ module.exports.tests.interface = function(test, common) {
 
   test('config with non-object esclient should throw error', function(t) {
     [null, 17, [], 'string', true].forEach((value) => {
-      var config = {
+      const config = {
         schema: {
           indexName: 'example_index',
         },
@@ -69,7 +92,7 @@ module.exports.tests.interface = function(test, common) {
   });
 
   test('config with string schema.indexName and object esclient should not throw error', function(t) {
-    var config = {
+    const config = {
       schema: {
         indexName: 'example_index',
       },
@@ -92,7 +115,7 @@ module.exports.all = function (tape, common) {
     return tape('configValidation: ' + name, testFunction);
   }
 
-  for( var testCase in module.exports.tests ){
+  for( const testCase in module.exports.tests ){
     module.exports.tests[testCase](test, common);
   }
 };
diff --git a/test/fixtures/config-icu-tokenizer.json b/test/fixtures/config-icu-tokenizer.json
@@ -8,8 +8,7 @@
         }
       }
     },
-    "schema": {
+    "featureFlags": {
         "icuTokenizer": true
     }
 }
-
diff --git a/test/settings.js b/test/settings.js
@@ -50,14 +50,14 @@ module.exports.tests.analysis = function(test, common) {
 };
 
 function mayBeAmpersandMapper() {
-  if (config.schema.icuTokenizer) {
+  if (config.featureFlags?.icuTokenizer) {
     return ['ampersand_mapper'];
   }
   return [];
 }
 
 function mayBeAmpersandReplacer() {
-  if (config.schema.icuTokenizer) {
+  if (config.featureFlags?.icuTokenizer) {
     return ['ampersand_replacer'];
   }
   return [];