Skip to content

Commit

Permalink
feat(admin): handle two letter country codes as synonyms
Browse files Browse the repository at this point in the history
Handle autocomplete queries that include a 2-letter country code (that is not a prefix of the 3-letter), such as `mexico city, mx`. Acceptance tests in pelias/acceptance-tests#537


Original commits:

* feat(country_code_synonyms): enable country code synonym file for all admin fields

* fix(country_code_synonyms): remove synonyms where the alpha2 is a prefix of the alpha3

* fix(country_code_synonyms): move country_a analysis to its own (newly created) analyzers

* fix(country_code_synonyms): re-enable all country code synonyms

* fix(country_code_synonyms): remove from synonyms/country_codes from name_synonyms_multiplexer
  • Loading branch information
missinglink authored Feb 17, 2021
1 parent 3f4883b commit 210a328
Show file tree
Hide file tree
Showing 11 changed files with 1,182 additions and 77 deletions.
350 changes: 350 additions & 0 deletions integration/admin_abbreviations.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,350 @@
const elastictest = require('elastictest');
const config = require('pelias-config').generate();
const getTotalHits = require('./_hits_total_helper');

module.exports.tests = {};

/**
* this test ensures that 'admin_abbreviation' fields
* include a synonym mapping for country code abbreviations
* which maps between alpha2 and alpha3 variants.
*/
module.exports.tests.synonyms = function (test, common) {
test('synonyms - alpha3 does not share a prefix with alpha2', function (t) {

var suite = new elastictest.Suite(common.clientOpts, common.create);
suite.action(done => setTimeout(done, 500)); // wait for es to bring some shards up

// index document 1 with country_a='MEX'
suite.action(done => {
suite.client.index({
index: suite.props.index,
type: config.schema.typeName,
id: '1',
body: {
parent: {
country_a: ['MEX']
}
}
}, done);
});

// index document 2 with country_a='MX'
suite.action(done => {
suite.client.index({
index: suite.props.index,
type: config.schema.typeName,
id: '2',
body: {
parent: {
country_a: ['MX']
}
}
}, done);
});

// search for 'MEX' on 'parent.country_a'
suite.assert(done => {
suite.client.search({
index: suite.props.index,
type: config.schema.typeName,
searchType: 'dfs_query_then_fetch',
body: {
query: {
match: {
'parent.country_a': {
'query': 'mex'
}
}
}
}
}, (err, res) => {
t.equal(err, undefined);
t.equal(getTotalHits(res.hits), 2, 'matches both documents');
t.equal(res.hits.hits[0]._score, res.hits.hits[1]._score, 'scores match');
done();
});
});

// search for 'MX' on 'parent.country_a'
suite.assert(done => {
suite.client.search({
index: suite.props.index,
type: config.schema.typeName,
searchType: 'dfs_query_then_fetch',
body: {
query: {
match: {
'parent.country_a': {
'query': 'mx'
}
}
}
}
}, (err, res) => {
t.equal(err, undefined);
t.equal(getTotalHits(res.hits), 2, 'matches both documents');
t.equal(res.hits.hits[0]._score, res.hits.hits[1]._score, 'scores match');
done();
});
});

// search for 'MEX' on 'parent.country_a.ngram'
suite.assert(done => {
suite.client.search({
index: suite.props.index,
type: config.schema.typeName,
searchType: 'dfs_query_then_fetch',
body: {
query: {
match: {
'parent.country_a.ngram': {
'query': 'mex'
}
}
}
}
}, (err, res) => {
t.equal(err, undefined);
t.equal(getTotalHits(res.hits), 2, 'matches both documents');
t.equal(res.hits.hits[0]._score, res.hits.hits[1]._score, 'scores match');
done();
});
});

// search for 'MX' on 'parent.country_a.ngram'
suite.assert(done => {
suite.client.search({
index: suite.props.index,
type: config.schema.typeName,
searchType: 'dfs_query_then_fetch',
body: {
query: {
match: {
'parent.country_a.ngram': {
'query': 'mx'
}
}
}
}
}, (err, res) => {
t.equal(err, undefined);
t.equal(getTotalHits(res.hits), 2, 'matches both documents');
t.equal(res.hits.hits[0]._score, res.hits.hits[1]._score, 'scores match');
done();
});
});

suite.run(t.end);
});

test('synonyms - alpha3 shares a prefix with alpha2', function (t) {

var suite = new elastictest.Suite(common.clientOpts, common.create);
suite.action(done => setTimeout(done, 500)); // wait for es to bring some shards up

// index document 1 with country_a='NZL'
suite.action(done => {
suite.client.index({
index: suite.props.index,
type: config.schema.typeName,
id: '1',
body: {
parent: {
country_a: ['NZL']
}
}
}, done);
});

// index document 2 with country_a='NZ'
suite.action(done => {
suite.client.index({
index: suite.props.index,
type: config.schema.typeName,
id: '2',
body: {
parent: {
country_a: ['NZ']
}
}
}, done);
});

// search for 'NZL' on 'parent.country_a'
suite.assert(done => {
suite.client.search({
index: suite.props.index,
type: config.schema.typeName,
searchType: 'dfs_query_then_fetch',
body: {
query: {
match: {
'parent.country_a': {
'query': 'nzl'
}
}
}
}
}, (err, res) => {
t.equal(err, undefined);
t.equal(getTotalHits(res.hits), 2, 'matches both documents');
t.equal(res.hits.hits[0]._score, res.hits.hits[1]._score, 'scores match');
done();
});
});

// search for 'NZ' on 'parent.country_a'
suite.assert(done => {
suite.client.search({
index: suite.props.index,
type: config.schema.typeName,
searchType: 'dfs_query_then_fetch',
body: {
query: {
match: {
'parent.country_a': {
'query': 'nz'
}
}
}
}
}, (err, res) => {
t.equal(err, undefined);
t.equal(getTotalHits(res.hits), 2, 'matches both documents');
t.equal(res.hits.hits[0]._score, res.hits.hits[1]._score, 'scores match');
done();
});
});

// search for 'NZL' on 'parent.country_a.ngram'
suite.assert(done => {
suite.client.search({
index: suite.props.index,
type: config.schema.typeName,
searchType: 'dfs_query_then_fetch',
body: {
query: {
match: {
'parent.country_a.ngram': {
'query': 'nzl'
}
}
}
}
}, (err, res) => {
t.equal(err, undefined);
t.equal(getTotalHits(res.hits), 2, 'matches both documents');
t.equal(res.hits.hits[0]._score, res.hits.hits[1]._score, 'scores match');
done();
});
});

// search for 'NZ' on 'parent.country_a.ngram'
suite.assert(done => {
suite.client.search({
index: suite.props.index,
type: config.schema.typeName,
searchType: 'dfs_query_then_fetch',
body: {
query: {
match: {
'parent.country_a.ngram': {
'query': 'nz'
}
}
}
}
}, (err, res) => {
t.equal(err, undefined);
t.equal(getTotalHits(res.hits), 2, 'matches both documents');
t.equal(res.hits.hits[0]._score, res.hits.hits[1]._score, 'scores match');
done();
});
});

suite.run(t.end);
});

test('synonyms - additional synonyms do not increase field length', function (t) {

var suite = new elastictest.Suite(common.clientOpts, common.create);
suite.action(done => setTimeout(done, 500)); // wait for es to bring some shards up

// index document 1 with country_a='NZL'
suite.action(done => {
suite.client.index({
index: suite.props.index,
type: config.schema.typeName,
id: '1',
body: {
parent: {
country_a: ['NZL']
}
}
}, done);
});

// index document 2 with country_a='GBR'
suite.action(done => {
suite.client.index({
index: suite.props.index,
type: config.schema.typeName,
id: '2',
body: {
parent: {
country_a: ['GBR']
}
}
}, done);
});

// search for 'NZL' or 'GBR' on 'parent.country_a.ngram'
suite.assert(done => {
suite.client.search({
index: suite.props.index,
type: config.schema.typeName,
searchType: 'dfs_query_then_fetch',
body: {
query: {
bool: {
should: [{
match: {
'parent.country_a.ngram': {
'query': 'nzl',
'analyzer': 'peliasQuery'
}
}
}, {
match: {
'parent.country_a.ngram': {
'query': 'gbr',
'analyzer': 'peliasQuery'
}
}
}]
}
}
}
}, (err, res) => {
t.equal(err, undefined);
t.equal(getTotalHits(res.hits), 2, 'matches both documents');
t.equal(res.hits.hits[0]._score, res.hits.hits[1]._score, 'scores match');
done();
});
});

suite.run(t.end);
});
};

module.exports.all = (tape, common) => {

function test(name, testFunction) {
return tape('multi token synonyms: ' + name, testFunction);
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common);
}
};
3 changes: 2 additions & 1 deletion integration/run.js
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ var tests = [
require('./autocomplete_street_synonym_expansion.js'),
require('./autocomplete_directional_synonym_expansion.js'),
require('./autocomplete_abbreviated_street_names.js'),
require('./multi_token_synonyms.js')
require('./multi_token_synonyms.js'),
require('./admin_abbreviations.js')
];

tests.map(function(t) {
Expand Down
3 changes: 2 additions & 1 deletion mappings/document.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
const admin = require('./partial/admin');
const countryAbbreviation = require('./partial/countryAbbreviation');
const postalcode = require('./partial/postalcode');
const hash = require('./partial/hash');
const multiplier = require('./partial/multiplier');
Expand Down Expand Up @@ -87,7 +88,7 @@ var schema = {

// https://github.com/whosonfirst/whosonfirst-placetypes#country
country: admin,
country_a: admin,
country_a: countryAbbreviation,
country_id: keyword,
country_source: keyword,

Expand Down
15 changes: 15 additions & 0 deletions mappings/partial/countryAbbreviation.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"type": "text",
"analyzer": "peliasIndexCountryAbbreviation",
"search_analyzer": "peliasQuery",
"similarity": "peliasDefaultSimilarity",
"fields": {
"ngram": {
"type": "text",
"analyzer": "peliasIndexCountryAbbreviationOneEdgeGram",
"search_analyzer": "peliasQuery",
"similarity": "peliasDefaultSimilarity",
"doc_values": false
}
}
}
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
"devDependencies": {
"difflet": "^1.0.1",
"elastictest": "^3.0.0",
"iso3166-1": "^0.5.1",
"proxyquire": "^2.0.0",
"tap-spec": "^5.0.0",
"tape": "^4.5.0"
Expand Down
Loading

0 comments on commit 210a328

Please sign in to comment.