Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/_integration_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
- name: Run integration tests
run: |
if [ "${{ matrix.icuTokenizer }}" = "true" ]; then
jq -n '{ schema: { icuTokenizer: true } }' > $(pwd)/config-icu.json
jq -n '{ featureFlags: { icuTokenizer: true } }' > $(pwd)/config-icu.json
export PELIAS_CONFIG=$(pwd)/config-icu.json
fi
npm install
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
run: |
npm install
if [ "${{ matrix.icuTokenizer }}" = "true" ]; then
jq -n '{ schema: { icuTokenizer: true } }' > $(pwd)/config-icu.json
jq -n '{ featureFlags: { icuTokenizer: true } }' > $(pwd)/config-icu.json
export PELIAS_CONFIG=$(pwd)/config-icu.json
fi
npm run test
10 changes: 6 additions & 4 deletions configValidation.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@ const Joi = require('@hapi/joi');

// Schema Configuration
// schema.indexName: populated by defaults if not overridden
// schema.icuTokenizer: boolean, optional, defaults to false
// esclient: object, validation performed by elasticsearch module
// featureFlags.icuTokenizer: boolean, optional, defaults to false
const schema = Joi.object().required().keys({
schema: Joi.object().required().keys({
indexName: Joi.string().required(),
icuTokenizer: Joi.boolean().optional()
indexName: Joi.string().required()
}),
esclient: Joi.object().required()
esclient: Joi.object().required(),
featureFlags: Joi.object().optional().keys({
icuTokenizer: Joi.boolean().optional()
}).unknown(true)
}).unknown(true);

module.exports = {
Expand Down
2 changes: 1 addition & 1 deletion integration/analyzer_peliasIndexOneEdgeGram.js
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ module.exports.tests.analyze = function(test, common){

assertAnalysis( 'british_american_english', 'town theatre', ['0:town', '1:theatre', '1:theater'] );
assertAnalysis( 'british_american_english', 'town theater', ['0:town', '1:theater', '1:theatre'] );
if (config.schema.icuTokenizer) {
if (config.featureFlags?.icuTokenizer) {
assertAnalysis('thai_address', 'ซอยเพชรบุรี๑foo', [
'0:ซ', '0:ซอ', '0:ซอย',
'1:เพชรบุรี1', '1:เพชรบุรี', '1:เพชรบุร', '1:เพชรบุ', '1:เพชรบ', '1:เพชร', '1:เพช', '1:เพ', '1:เ',
Expand Down
4 changes: 2 additions & 2 deletions integration/analyzer_peliasQuery.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ module.exports.tests.analyze = function(test, common){
assertAnalysis('thai_digits', '๐๑๒๓๔๕๖๗ ๘๙', ['01234567', '89']); // leading zero remains
assertAnalysis('thai_digits', '๑๒๓๔๕๖๗๐ ๘๙', ['12345670', '89']);
assertAnalysis('digit_glued_to_word', 'john doe42', ['john', 'doe42']);
if (config.schema.icuTokenizer) {
if (config.featureFlags?.icuTokenizer) {
assertAnalysis('thai_tonemarks', 'ก่ก้ก๊ก๋ข่ข้ข๊ข๋ค่ค้ค๊ค๋ฆ่ฆ้ฆ๊ฆ๋', ['กก', 'กก', 'ขขขขคคคคฆฆฆฆ']);
assertAnalysis('chinese_address', '北京市朝阳区东三环中路1号国际大厦A座1001室', ['北京市', '朝阳', '区', '东', '三', '环', '中路', '1', '号', '国际', '大厦', 'a', '座', '1001', '室']);
} else {
Expand Down Expand Up @@ -68,7 +68,7 @@ module.exports.tests.functional = function(test, common){
assertAnalysis( 'address', '101 mapzen place', [ '101', 'mapzen', 'place' ]);

// complicated tokenization for some Asian languages
if (config.schema.icuTokenizer) {
if (config.featureFlags?.icuTokenizer) {
assertAnalysis('thai_address1', 'ซอยเพชรบุรี๑', ['ซอย', 'เพชรบุรี1'] );
assertAnalysis('thai_address2', 'ซอยเพชรบุรี๑foo', ['ซอย', 'เพชรบุรี1', 'foo'] );
assertAnalysis('thai_address3', 'บ้านเลขที่๑๒๓ถนนสุขุมวิทแขวงคลองตันเหนือเขตวัฒนา กรุงเทพมหานคร๑๐๑๑๐', ["บาน", "เลข", "ที123ถนน", "สุขุมวิท", "แขวง", "คลองตัน", "เหนือ", "เขต", "วัฒนา", "กรุงเทพมหานคร10110"]);
Expand Down
2 changes: 1 addition & 1 deletion integration/analyzer_peliasStreet.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ module.exports.tests.analyze = function(test, common){
assertAnalysis( 'remove_ordinals', 'Ast th 101st', ['ast','th','101'] );

// complicated tokenization for some Asian languages
if (config.schema.icuTokenizer) {
if (config.featureFlags?.icuTokenizer) {
assertAnalysis('thai_address1', 'ซอยเพชรบุรี๑', ['ซอย', 'เพชรบุรี1'] );
assertAnalysis('thai_address2', 'ซอยเพชรบุรี๑foo', ['ซอย', 'เพชรบุรี1', 'foo'] );
assertAnalysis('thai_address3', 'บ้านเลขที่๑๒๓ถนนสุขุมวิทแขวงคลองตันเหนือเขตวัฒนา กรุงเทพมหานคร๑๐๑๑๐', ["บาน", "เลข", "ที123ถนน", "สุขุมวิท", "แขวง", "คลองตัน", "เหนือ", "เขต", "วัฒนา", "กรุงเทพมหานคร10110"]);
Expand Down
2 changes: 1 addition & 1 deletion settings-icu.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ const _ = require('lodash');
* This module contains modifications to the Pelias schema to adopt the elastic ICU tokenizer.
* This tokenizer improves word-splitting of non-latin alphabets (particularly Asian languages).
*
* It can be enabled by setting `config.schema.icuTokenizer` in your `pelias.json` config.
* It can be enabled by setting `config.featureFlags.icuTokenizer` in your `pelias.json` config.
* Note: this must be set *before* you create your elasticsearch index or it will have no effect.
*
* This feature is considered beta, we encourage testing & feedback from the community in order
Expand Down
2 changes: 1 addition & 1 deletion settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ function generate(){
});

// Experimental ICU tokenizer
if (config.schema.icuTokenizer) {
if (config.featureFlags?.icuTokenizer) {
settings = settingsICU(settings);
}

Expand Down
35 changes: 29 additions & 6 deletions test/configValidation.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,31 @@ const configValidation = require('../configValidation');
module.exports.tests = {};

module.exports.tests.interface = function(test, common) {

test('does not throw on unknown feature flags', function(t) {
const config = {
schema: {
indexName: "pelias"
},
esclient: {},
featureFlags: {
unknown_feature_flag: {
nested: true
},
unknown_feature_flag2: true
}
};

t.doesNotThrow(function () {
configValidation.validate(config);
});
t.end();

});


test('config without schema should throw error', function(t) {
var config = {
const config = {
esclient: {}
};

Expand All @@ -18,7 +41,7 @@ module.exports.tests.interface = function(test, common) {
});

test('config without schema.indexName should throw error', function(t) {
var config = {
const config = {
schema: {},
esclient: {}
};
Expand All @@ -32,7 +55,7 @@ module.exports.tests.interface = function(test, common) {

test('config with non-string schema.indexName should throw error', function(t) {
[null, 17, {}, [], false].forEach((value) => {
var config = {
const config = {
schema: {
indexName: value,
},
Expand All @@ -51,7 +74,7 @@ module.exports.tests.interface = function(test, common) {

test('config with non-object esclient should throw error', function(t) {
[null, 17, [], 'string', true].forEach((value) => {
var config = {
const config = {
schema: {
indexName: 'example_index',
},
Expand All @@ -69,7 +92,7 @@ module.exports.tests.interface = function(test, common) {
});

test('config with string schema.indexName and object esclient should not throw error', function(t) {
var config = {
const config = {
schema: {
indexName: 'example_index',
},
Expand All @@ -92,7 +115,7 @@ module.exports.all = function (tape, common) {
return tape('configValidation: ' + name, testFunction);
}

for( var testCase in module.exports.tests ){
for( const testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};
3 changes: 1 addition & 2 deletions test/fixtures/config-icu-tokenizer.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
}
}
},
"schema": {
"featureFlags": {
"icuTokenizer": true
}
}

4 changes: 2 additions & 2 deletions test/settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ module.exports.tests.analysis = function(test, common) {
};

function mayBeAmpersandMapper() {
if (config.schema.icuTokenizer) {
if (config.featureFlags?.icuTokenizer) {
return ['ampersand_mapper'];
}
return [];
}

function mayBeAmpersandReplacer() {
if (config.schema.icuTokenizer) {
if (config.featureFlags?.icuTokenizer) {
return ['ampersand_replacer'];
}
return [];
Expand Down