From ea065bb8b269528c1ada34a8bf4696f144fa5abe Mon Sep 17 00:00:00 2001 From: missinglink Date: Tue, 21 Jun 2022 16:40:39 +0200 Subject: [PATCH] feat(libpostal): discard parse containing single "suburb" label --- controller/libpostal.js | 5 +++ test/unit/controller/libpostal.js | 66 ++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/controller/libpostal.js b/controller/libpostal.js index c2f7cf360..5377a3039 100644 --- a/controller/libpostal.js +++ b/controller/libpostal.js @@ -127,6 +127,11 @@ function patchBuggyResponses(response){ if( response.length === 1 ){ let first = response[0]; + // libpostal classifies some airports as 'suburb' + // when we see a single 'suburb' label, discard it. + // examples: 'john f kennedy international airport', 'soho' + if (first.label === 'suburb') { return []; } + // given only a number, libpostal will attempt to classify it. // if we find a single label which is entirely numeric then it's recast to the // libpostal label 'house', which is mapped to 'query' in our schema. diff --git a/test/unit/controller/libpostal.js b/test/unit/controller/libpostal.js index 1d1881649..43a78ce8d 100644 --- a/test/unit/controller/libpostal.js +++ b/test/unit/controller/libpostal.js @@ -770,7 +770,7 @@ module.exports.tests.bug_fixes = (test, common) => { test('bug fix: recast entirely numeric input - 99', t => { const service = (req, callback) => { callback(null, [{ - 'label': 'suburb', + 'label': 'house_number', 'value': '99' }]); }; @@ -831,6 +831,70 @@ module.exports.tests.bug_fixes = (test, common) => { }); }); + test('bug fix: discard single label of type "suburb"', t => { + const service = (req, callback) => { + callback(null, [{ + 'label': 'suburb', + 'value': 'example' + }]); + }; + const controller = libpostal(service, () => true); + const req = { + clean: { + text: 'example' + }, + errors: [] + }; + controller(req, undefined, () => { + t.deepEquals(req, { + clean: { + text: 'example', + parser: 'libpostal', + parsed_text: { + // discarded + } + }, + errors: [] + }); + + t.end(); + }); + }); + + test('bug fix: do not discard "suburb" when accompanied by another label', t => { + const service = (req, callback) => { + callback(null, [{ + 'label': 'road', + 'value': 'avenue' + },{ + 'label': 'suburb', + 'value': 'example' + }]); + }; + const controller = libpostal(service, () => true); + const req = { + clean: { + text: 'avenue example' + }, + errors: [] + }; + controller(req, undefined, () => { + t.deepEquals(req, { + clean: { + text: 'avenue example', + parser: 'libpostal', + parsed_text: { + street: 'avenue', + neighbourhood: 'example' + } + }, + errors: [] + }); + + t.end(); + }); + }); + }; module.exports.all = (tape, common) => {