From 0ed71c534a289ea25e88f9ac4e5095714bd0806f Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Thu, 6 Jun 2019 18:50:26 +0200 Subject: [PATCH] feat: autocomplete jitter fixes (#44) * fix: remove unused scheme * fix: improve street_name scheme * feat(CompositeClassifier): improvements to composite classifier, see PR notes * feat(test): add autocomplete jitter test cases --- classifier/CompositeClassifier.js | 16 +++++++++++++++- classifier/scheme/street.js | 15 --------------- classifier/scheme/street_name.js | 6 ++++-- test/address.usa.test.js | 18 ++++++++++++++++++ 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/classifier/CompositeClassifier.js b/classifier/CompositeClassifier.js index 4807856d..be5e8dd0 100644 --- a/classifier/CompositeClassifier.js +++ b/classifier/CompositeClassifier.js @@ -49,6 +49,11 @@ class CompositeClassifier extends SectionClassifier { } each (section) { + let phrases = section.graph.findAll('phrase') + + // sort phrases so shorter phrases are matched first + phrases.sort((a, b) => a.norm.length - b.norm.length) + this.schemes.forEach(s => { // invalid scheme if (!Array.isArray(s.scheme)) { return } @@ -57,7 +62,6 @@ class CompositeClassifier extends SectionClassifier { let candidates = [] // compute candidate lists - let phrases = section.graph.findAll('phrase') candidates = s.scheme.map(s => phrases.filter(this.match.bind(null, s))) // no candidates were found for one or more schemes @@ -79,6 +83,16 @@ class CompositeClassifier extends SectionClassifier { } else if (prev && curr.graph.findOne('child:first').graph.findOne('prev') !== prev.graph.findOne('child:last')) { return false } + + // avoid adding tokens to the front of a street classification + // that begins with a street prefix. + // eg. 'A + Ave B' (ave is both a valid prefix & suffix) + if (next && next.classifications.hasOwnProperty('StreetClassification')) { + let firstChild = next.graph.findOne('child') + if (firstChild && firstChild.classifications.hasOwnProperty('StreetPrefixClassification')) { + return false + } + } } return true }) diff --git a/classifier/scheme/street.js b/classifier/scheme/street.js index 374fbb32..eeb14418 100644 --- a/classifier/scheme/street.js +++ b/classifier/scheme/street.js @@ -303,21 +303,6 @@ module.exports = [ } ] }, - { - // West Main Street - confidence: 0.84, - Class: StreetClassification, - scheme: [ - { - is: ['DirectionalClassification'], - not: ['StreetClassification', 'IntersectionClassification'] - }, - { - is: ['StreetClassification'], - not: ['DirectionalClassification'] - } - ] - }, { // Main Street West confidence: 0.88, diff --git a/classifier/scheme/street_name.js b/classifier/scheme/street_name.js index 0f88b653..0a8b5b7b 100644 --- a/classifier/scheme/street_name.js +++ b/classifier/scheme/street_name.js @@ -7,7 +7,8 @@ module.exports = [ Class: StreetNameClassification, scheme: [ { - is: ['StopWordClassification'] + is: ['StopWordClassification'], + not: ['DirectionalClassification'] }, { is: ['AlphaClassification', 'PersonClassification'], @@ -25,7 +26,8 @@ module.exports = [ not: ['StreetClassification', 'IntersectionClassification', 'StopWordClassification', 'StreetPrefixClassification'] }, { - is: ['StopWordClassification'] + is: ['StopWordClassification'], + not: ['DirectionalClassification'] }, { is: ['AlphaClassification', 'PersonClassification'], diff --git a/test/address.usa.test.js b/test/address.usa.test.js index aa41ed1d..bac0c07b 100644 --- a/test/address.usa.test.js +++ b/test/address.usa.test.js @@ -30,6 +30,24 @@ const testcase = (test, common) => { // postcode not allowed in first position otherwise assert('90210 Foo', []) + + // autocomplete street name jitter + // note: we are only testing the street name stays the same throughout + assert('N FISKE AVE', [{ street: 'N FISKE AVE' }], true) + assert('N FISKE AVE P', [{ street: 'N FISKE AVE' }], true) + assert('N FISKE AVE Po', [{ street: 'N FISKE AVE' }, { region: 'Po' }], true) + assert('N FISKE AVE Por', [{ street: 'N FISKE AVE' }, { region: 'Por' }], true) + assert('N FISKE AVE Port', [{ street: 'N FISKE AVE' }, { locality: 'Port' }], true) + assert('N FISKE AVE Portl', [{ street: 'N FISKE AVE' }], true) + assert('N FISKE AVE Portla', [{ street: 'N FISKE AVE' }], true) + assert('N FISKE AVE Portlan', [{ street: 'N FISKE AVE' }], true) + assert('N FISKE AVE Portland', [{ street: 'N FISKE AVE' }, { locality: 'Portland' }], true) + assert('N DWIGHT AVE Portland O', [{ street: 'N DWIGHT AVE' }, { locality: 'Portland' }], true) + assert('N DWIGHT AVE Portland Or', [{ street: 'N DWIGHT AVE' }, { locality: 'Portland' }, { region: 'Or' }], true) + assert('N DWIGHT AVE Portland Ore', [{ street: 'N DWIGHT AVE' }, { locality: 'Portland' }], true) + assert('N DWIGHT AVE Portland Oreg', [{ street: 'N DWIGHT AVE' }, { locality: 'Portland' }], true) + assert('N DWIGHT AVE Portland Orego', [{ street: 'N DWIGHT AVE' }, { locality: 'Portland' }], true) + assert('N DWIGHT AVE Portland Oregon', [{ street: 'N DWIGHT AVE' }, { locality: 'Portland' }, { region: 'Oregon' }], true) } module.exports.all = (tape, common) => {