Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add 'trim' to the 'peliasKeywordNormalizer' filters #416

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions integration/source_layer_sourceid_filtering.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,22 @@ module.exports.tests.source_filter = function(test, common){
});
});

// case insensitive
suite.assert( function( done ){
suite.client.search({
index: suite.props.index,
type: config.schema.typeName,
body: { query: {
term: {
source: 'OSM'
}
}}
}, function( err, res ){
t.equal( res.hits.total, 2 );
done();
});
});

// find all 'address' layers
suite.assert( function( done ){
suite.client.search({
Expand Down Expand Up @@ -104,22 +120,6 @@ module.exports.tests.source_filter = function(test, common){
});
});

// case sensitive
suite.assert( function( done ){
suite.client.search({
index: suite.props.index,
type: config.schema.typeName,
body: { query: {
term: {
source: 'OSM'
}
}}
}, function( err, res ){
t.equal( res.hits.total, 0 );
done();
});
});

// keyword analysis - no partial matching
suite.assert( function( done ){
suite.client.search({
Expand Down
12 changes: 10 additions & 2 deletions mappings/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,26 +26,32 @@ var schema = {
name: {
type: 'text',
analyzer: 'keyword',
search_analyzer: 'keyword'
},
unit: {
type: 'text',
analyzer: 'peliasUnit',
search_analyzer: 'peliasUnit'
},
number: {
type: 'text',
analyzer: 'peliasHousenumber',
search_analyzer: 'peliasHousenumber'
},
street: {
type: 'text',
analyzer: 'peliasStreet',
search_analyzer: 'peliasStreet'
},
cross_street: {
type: 'text',
analyzer: 'peliasStreet',
search_analyzer: 'peliasStreet'
},
zip: {
type: 'text',
analyzer: 'peliasZip',
search_analyzer: 'peliasZip'
},
}
},
Expand Down Expand Up @@ -152,7 +158,8 @@ var schema = {
match_mapping_type: 'string',
mapping: {
type: 'text',
analyzer: 'peliasIndexOneEdgeGram'
analyzer: 'peliasIndexOneEdgeGram',
search_analyzer: 'peliasQuery'
}
},
},{
Expand All @@ -161,7 +168,8 @@ var schema = {
match_mapping_type: 'string',
mapping: {
type: 'text',
analyzer: 'peliasPhrase'
analyzer: 'peliasPhrase',
search_analyzer: 'peliasQuery'
}
}
},{
Expand Down
2 changes: 2 additions & 0 deletions mappings/partial/admin.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
{
"type": "text",
"analyzer": "peliasAdmin",
"search_analyzer": "peliasAdmin",
"fields": {
"ngram": {
"type": "text",
"analyzer": "peliasIndexOneEdgeGram",
"search_analyzer": "peliasAdmin",
"doc_values": false
}
}
Expand Down
1 change: 1 addition & 0 deletions mappings/partial/keyword.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"type": "keyword",
"normalizer": "peliasKeywordNormalizer",
"doc_values": false
}
3 changes: 2 additions & 1 deletion mappings/partial/keyword_with_doc_values.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{
"type": "keyword"
"type": "keyword",
"normalizer": "peliasKeywordNormalizer"
}
4 changes: 3 additions & 1 deletion mappings/partial/postalcode.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
{
"type": "text",
"analyzer": "peliasZip",
"search_analyzer": "peliasZip",
"fields": {
"ngram": {
"type": "text",
"analyzer": "peliasIndexOneEdgeGram"
"analyzer": "peliasIndexOneEdgeGram",
"search_analyzer": "peliasZip"
}
}
}
10 changes: 10 additions & 0 deletions settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@ function generate(){
"pattern": "[\\s,/\\\\-]+"
}
},
"normalizer": {
"peliasKeywordNormalizer": {
"type": "custom",
"filter": [
"lowercase",
"icu_folding",
"trim"
]
}
},
"analyzer": {
"peliasAdmin": {
"type": "custom",
Expand Down
93 changes: 87 additions & 6 deletions test/compile.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
const _ = require('lodash');
const path = require('path');
const schema = require('../');
const fixture = require('./fixtures/expected.json');
const config = require('pelias-config').generate();

const forEachDeep = (obj, cb) =>
_.forEach(obj, (val, key) => {
cb(val, key);
if (_.isPlainObject(val) || _.isArray(val)){
forEachDeep(val, cb);
}
});

module.exports.tests = {};

module.exports.tests.compile = function(test, common) {
Expand All @@ -13,11 +22,11 @@ module.exports.tests.compile = function(test, common) {
});
};

// admin indeces are explicitly specified in order to specify a custom
// admin indices are explicitly specified in order to specify a custom
// dynamic_template and to avoid 'type not found' errors when deploying
// the api codebase against an index without admin data
module.exports.tests.indeces = function(test, common) {
test('explicitly specify some admin indeces and their analyzer', function(t) {
module.exports.tests.indices = function(test, common) {
test('explicitly specify some admin indices and their analyzer', function(t) {
const _type = config.schema.typeName;
t.equal(typeof schema.mappings[_type], 'object', 'mappings present');
t.equal(schema.mappings[_type].dynamic_templates[0].nameGram.mapping.analyzer, 'peliasIndexOneEdgeGram');
Expand All @@ -35,12 +44,84 @@ module.exports.tests.dynamic_templates = function(test, common) {
t.equal(template.match_mapping_type, 'string');
t.deepEqual(template.mapping, {
type: 'text',
analyzer: 'peliasIndexOneEdgeGram'
analyzer: 'peliasIndexOneEdgeGram',
search_analyzer: 'peliasQuery'
});
t.end();
});
test('dynamic_templates: phrase', function (t) {
const _type = config.schema.typeName;
t.equal(typeof schema.mappings[_type].dynamic_templates[1].phrase, 'object', 'phrase template specified');
var template = schema.mappings[_type].dynamic_templates[1].phrase;
t.equal(template.path_match, 'phrase.*');
t.equal(template.match_mapping_type, 'string');
t.deepEqual(template.mapping, {
type: 'text',
analyzer: 'peliasPhrase',
search_analyzer: 'peliasQuery'
});
t.end();
});
test('dynamic_templates: addendum', function (t) {
const _type = config.schema.typeName;
t.equal(typeof schema.mappings[_type].dynamic_templates[2].addendum, 'object', 'addendum template specified');
var template = schema.mappings[_type].dynamic_templates[2].addendum;
t.equal(template.path_match, 'addendum.*');
t.equal(template.match_mapping_type, 'string');
t.deepEqual(template.mapping, {
type: 'keyword',
index: false,
doc_values: false
});
t.end();
});
};

// ensure both "analyzer" and "search_analyzer" are set for stringy fields
module.exports.tests.analyzers = function (test, common) {
test('analyzers: ensure "analyzer" and "search_analyzer" are set', function (t) {

const stringyTypes = ['string', 'text'];
const stringyFields = [];

forEachDeep(schema, (value, key) => {
if (!_.isPlainObject(value)) { return; }
if (!stringyTypes.includes(_.get(value, 'type', ''))) { return; }
stringyFields.push({ key: key, value: value });
});

stringyFields.forEach(field => {
t.true(_.has(field.value, 'analyzer'), `analyzer not set on ${field.key}`)
t.true(_.has(field.value, 'search_analyzer'), `search_analyzer not set on ${field.key}`)
})

t.end();
});
};

// note: this test is commented out for now because it's valid for some keyword
// fields such as bounding_box and addendum to use the null normalizer, but it's
// not easy to test because it's not possible to specify them as null in the mapping.

// ensure "normalizer" is set for keyword fields
// module.exports.tests.normalizers = function (test, common) {
// test('normalizers: ensure "normalizer" is set', function (t) {
// const keywordFields = [];

// forEachDeep(schema, (value, key) => {
// if (!_.isPlainObject(value)) { return; }
// if (_.get(value, 'type', '') !== 'keyword') { return; }
// keywordFields.push({ key: key, value: value });
// });

// keywordFields.forEach(field => {
// t.true(_.has(field.value, 'normalizer'), `normalizer not set on ${field.key}`)
// })

// t.end();
// });
// };

// current schema (compiled) - requires schema to be copied and settings to
// be regenerated from a fixture in order to pass in CI environments.
module.exports.tests.current_schema = function(test, common) {
Expand Down Expand Up @@ -69,8 +150,8 @@ module.exports.tests.current_schema = function(test, common) {
// console.error( JSON.stringify( schemaCopy, null, 2 ) );

// code to write expected output to the fixture
//const fs = require('fs');
//fs.writeFileSync(path.resolve( __dirname + '/fixtures/expected.json' ), JSON.stringify(schemaCopy, null, 2));
// const fs = require('fs');
// fs.writeFileSync(path.resolve( __dirname + '/fixtures/expected.json' ), JSON.stringify(schemaCopy, null, 2));

t.deepEqual(schemaCopy, fixture);
t.end();
Expand Down
11 changes: 11 additions & 0 deletions test/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,27 +49,31 @@ module.exports.tests.address_analysis = function(test, common) {
test('name', function(t) {
t.equal(prop.name.type, 'text');
t.equal(prop.name.analyzer, 'keyword');
t.equal(prop.name.search_analyzer, 'keyword');
t.end();
});

// $unit analysis
test('unit', function(t) {
t.equal(prop.unit.type, 'text', 'unit has full text type');
t.equal(prop.unit.analyzer, 'peliasUnit', 'unit analyzer is peliasUnit');
t.equal(prop.unit.search_analyzer, 'peliasUnit', 'unit search_analyzer is peliasUnit');
t.end();
});

// $number analysis is discussed in: https://github.com/pelias/schema/pull/77
test('number', function(t) {
t.equal(prop.number.type, 'text');
t.equal(prop.number.analyzer, 'peliasHousenumber');
t.equal(prop.number.search_analyzer, 'peliasHousenumber');
t.end();
});

// $street analysis is discussed in: https://github.com/pelias/schema/pull/77
test('street', function(t) {
t.equal(prop.street.type, 'text');
t.equal(prop.street.analyzer, 'peliasStreet');
t.equal(prop.street.search_analyzer, 'peliasStreet');
t.end();
});

Expand All @@ -79,6 +83,7 @@ module.exports.tests.address_analysis = function(test, common) {
test('zip', function(t) {
t.equal(prop.zip.type, 'text');
t.equal(prop.zip.analyzer, 'peliasZip');
t.equal(prop.zip.search_analyzer, 'peliasZip');
t.end();
});
};
Expand Down Expand Up @@ -125,12 +130,14 @@ module.exports.tests.parent_analysis = function(test, common) {
t.equal(prop[field].analyzer, 'peliasAdmin', `${field} analyzer is peliasAdmin`);
t.equal(prop[field+'_a'].type, 'text', `${field}_a type is text`);
t.equal(prop[field+'_a'].analyzer, 'peliasAdmin', `${field}_a analyzer is peliasAdmin`);
t.equal(prop[field+'_a'].search_analyzer, 'peliasAdmin', `${field}_a analyzer is peliasAdmin`);
t.equal(prop[field+'_id'].type, 'keyword', `${field}_id type is keyword`);
t.equal(prop[field+'_id'].index, undefined, `${field}_id index left at default`);

// subfields
t.equal(prop[field].fields.ngram.type, 'text', `${field}.ngram type is full text`);
t.equal(prop[field].fields.ngram.analyzer, 'peliasIndexOneEdgeGram', `${field}.ngram analyzer is peliasIndexOneEdgeGram`);
t.equal(prop[field].fields.ngram.search_analyzer, 'peliasAdmin', `${field}.ngram analyzer is peliasIndexOneEdgeGram`);

t.end();
});
Expand All @@ -139,8 +146,10 @@ module.exports.tests.parent_analysis = function(test, common) {
test('postalcode', function(t) {
t.equal(prop['postalcode'].type, 'text', 'postalcode is full text field');
t.equal(prop['postalcode'].analyzer, 'peliasZip', 'postalcode analyzer is peliasZip');
t.equal(prop['postalcode'].search_analyzer, 'peliasZip', 'postalcode analyzer is peliasZip');
t.equal(prop['postalcode'+'_a'].type, 'text', 'postalcode_a is full text field');
t.equal(prop['postalcode'+'_a'].analyzer, 'peliasZip', 'postalcode_a analyzer is peliasZip');
t.equal(prop['postalcode'+'_a'].search_analyzer, 'peliasZip', 'postalcode_a analyzer is peliasZip');
t.equal(prop['postalcode'+'_id'].type, 'keyword', 'postalcode_id field is keyword type');
t.equal(prop['postalcode'+'_id'].index, undefined, 'postalcode_id index left at default');

Expand All @@ -157,6 +166,7 @@ module.exports.tests.dynamic_templates = function(test, common) {
t.equal(template.mapping.type, 'text', 'set to full text type');
t.equal(template.mapping.fielddata, undefined, 'fielddata is left to default (disabled)');
t.equal(template.mapping.analyzer, 'peliasIndexOneEdgeGram', 'analyzer set');
t.equal(template.mapping.search_analyzer, 'peliasQuery', 'search_analyzer set');
t.end();
});
test('dynamic_templates: phrase', function(t) {
Expand All @@ -167,6 +177,7 @@ module.exports.tests.dynamic_templates = function(test, common) {
t.equal(template.mapping.type, 'text', 'set to full text type');
t.equal(template.mapping.fielddata, undefined, 'fielddata is left to default (disabled)');
t.equal(template.mapping.analyzer, 'peliasPhrase', 'analyzer set');
t.equal(template.mapping.search_analyzer, 'peliasQuery', 'search_analyzer set');
t.end();
});
};
Expand Down
Loading