From b86d260ded31513607acb5976810df5db896d75f Mon Sep 17 00:00:00 2001 From: missinglink Date: Thu, 12 Mar 2020 10:48:07 +0100 Subject: [PATCH 1/8] feat(sqlite): refactor codebase to use better-sqlite3 client module --- api/extract.js | 19 ++++-- api/near.js | 25 +++++--- api/oa.js | 9 +-- api/osm.js | 9 +-- api/polyline.js | 7 ++- api/search.js | 39 +++++++------ api/street.js | 24 +++++--- api/tiger.js | 9 +-- api/vertices.js | 12 ++-- lib/assert.js | 21 ------- package.json | 3 +- query/attach.js | 12 ---- query/configure.js | 23 +++----- query/extract.js | 82 +++++++++++++++----------- query/indexes.js | 36 +++++------- query/lookup.js | 68 ++++++++++------------ query/near.js | 18 ++---- query/search.js | 73 ++++++++++++++--------- query/street.js | 46 ++++++++++++--- query/tables.js | 98 ++++++++++++++----------------- stream/address/augment.js | 18 +++--- stream/address/import.js | 49 +++++----------- stream/address/lookup.js | 26 +++------ stream/each.js | 84 +++++---------------------- stream/osm/augment.js | 3 - stream/street/import.js | 99 ++++++++++++-------------------- stream/vertices/augment.js | 18 +++--- stream/vertices/lookup.js | 22 ++++--- test/_func.js | 2 +- test/functional/disjoined/run.js | 6 -- test/interface.js | 3 - 31 files changed, 430 insertions(+), 533 deletions(-) delete mode 100644 lib/assert.js delete mode 100644 query/attach.js diff --git a/api/extract.js b/api/extract.js index b78cb7e8..941d2d7c 100644 --- a/api/extract.js +++ b/api/extract.js @@ -1,7 +1,6 @@ -var sqlite3 = require('sqlite3'), +var Database = require('better-sqlite3'), requireDir = require('require-dir'), - pretty = require('../lib/pretty'), query = requireDir('../query'), analyze = require('../lib/analyze'); @@ -9,11 +8,13 @@ var sqlite3 = require('sqlite3'), function setup( addressDbPath, streetDbPath ){ // connect to db - sqlite3.verbose(); - var db = new sqlite3.Database( addressDbPath, sqlite3.OPEN_READONLY ); + var db = new Database( addressDbPath, { + readonly: true, + verbose: console.log + }); // attach street database - query.attach( db, streetDbPath, 'street' ); + db.exec(`ATTACH DATABASE '${streetDbPath}' as 'street'`); // query method var q = function( coord, names, cb ){ @@ -34,7 +35,13 @@ function setup( addressDbPath, streetDbPath ){ if( !normalized.length ){ return cb( 'invalid names' ); } // perform a db lookup for the specified street - query.extract( db, point, normalized, cb ); + try { + const rows = query.extract( db, point, normalized ); + cb(null, rows); + } catch (err) { + // an error occurred + return cb(err, null); + } }; // close method to close db diff --git a/api/near.js b/api/near.js index 6d892e85..aa5f0e1f 100644 --- a/api/near.js +++ b/api/near.js @@ -1,5 +1,5 @@ -var sqlite3 = require('sqlite3'), +var Database = require('better-sqlite3'), polyline = require('@mapbox/polyline'), requireDir = require('require-dir'), query = requireDir('../query'), @@ -13,12 +13,15 @@ var PRECISION = 6; function setup( streetDbPath ){ // connect to db - sqlite3.verbose(); // @todo: this is required as the query uses the 'street.' prefix for tables - var db = new sqlite3.Database( ':memory:', sqlite3.OPEN_READONLY ); + var db = new Database( '/tmp/path', { + memory: true, + readonly: false, + verbose: console.log + }); // attach street database - query.attach( db, streetDbPath, 'street' ); + db.exec(`ATTACH DATABASE '${streetDbPath}' as 'street'`); // query method var q = function( coord, cb ){ @@ -32,11 +35,12 @@ function setup( streetDbPath ){ if( isNaN( point.lat ) ){ return cb( 'invalid latitude' ); } if( isNaN( point.lon ) ){ return cb( 'invalid longitude' ); } - // perform a db lookup for nearby streets - query.near( db, point, function( err, res ){ + try { + // perform a db lookup for nearby streets + const res = query.near( db, point ); - // an error occurred or no results were found - if( err || !res || !res.length ){ return cb( err, null ); } + // no results were found + if( !res || !res.length ){ return cb( null, null ); } // decode polylines res.forEach( function( street, i ){ @@ -48,7 +52,10 @@ function setup( streetDbPath ){ // return streets ordered ASC by distance from point cb( null, ordered ); - }); + } catch (err) { + // an error occurred + return cb(err, null); + } }; // return methods diff --git a/api/oa.js b/api/oa.js index f171eff6..2f42b13d 100644 --- a/api/oa.js +++ b/api/oa.js @@ -1,5 +1,5 @@ -var sqlite3 = require('sqlite3'), +var Database = require('better-sqlite3'), requireDir = require('require-dir'), stream = requireDir('../stream', { recurse: true }), query = requireDir('../query'); @@ -8,12 +8,13 @@ var sqlite3 = require('sqlite3'), function oa(dataStream, addressDbPath, streetDbPath, done){ // connect to db - sqlite3.verbose(); - var db = new sqlite3.Database( process.argv[2] ); + var db = new Database( addressDbPath, { + verbose: console.log + }); query.configure(db); // configure database query.tables.address(db); // create tables only if not already created - query.attach(db, process.argv[3], 'street'); // attach street database + db.exec(`ATTACH DATABASE '${streetDbPath}' as 'street'`); dataStream .pipe( stream.oa.parse() ) // parse openaddresses csv data diff --git a/api/osm.js b/api/osm.js index f4114938..c3aabd91 100644 --- a/api/osm.js +++ b/api/osm.js @@ -1,5 +1,5 @@ -var sqlite3 = require('sqlite3'), +var Database = require('better-sqlite3'), requireDir = require('require-dir'), stream = requireDir('../stream', { recurse: true }), query = requireDir('../query'); @@ -8,12 +8,13 @@ var sqlite3 = require('sqlite3'), function osm(dataStream, addressDbPath, streetDbPath, done){ // connect to db - sqlite3.verbose(); - var db = new sqlite3.Database( process.argv[2] ); + var db = new Database( addressDbPath, { + verbose: console.log + }); query.configure(db); // configure database query.tables.address(db); // create tables only if not already created - query.attach(db, process.argv[3], 'street'); // attach street database + db.exec(`ATTACH DATABASE '${streetDbPath}' as 'street'`); dataStream .pipe( stream.split() ) // split file on newline diff --git a/api/polyline.js b/api/polyline.js index 6446c700..26b863ec 100644 --- a/api/polyline.js +++ b/api/polyline.js @@ -1,5 +1,5 @@ -var sqlite3 = require('sqlite3'), +var Database = require('better-sqlite3'), requireDir = require('require-dir'), stream = requireDir('../stream', { recurse: true }), query = requireDir('../query'); @@ -8,8 +8,9 @@ var sqlite3 = require('sqlite3'), function polyline(dataStream, streetDbPath, done){ // connect to db - sqlite3.verbose(); - var db = new sqlite3.Database(streetDbPath); + var db = new Database(streetDbPath, { + verbose: console.log + }); query.configure(db); // configure database query.tables.street(db, true); // reset database and create tables diff --git a/api/search.js b/api/search.js index e7a86bda..85d27e33 100644 --- a/api/search.js +++ b/api/search.js @@ -1,5 +1,5 @@ -var sqlite3 = require('sqlite3'), +var Database = require('better-sqlite3'), requireDir = require('require-dir'), query = requireDir('../query'), project = require('../lib/project'), @@ -10,15 +10,17 @@ var sqlite3 = require('sqlite3'), function setup( addressDbPath, streetDbPath ){ // connect to db - sqlite3.verbose(); - var db = new sqlite3.Database( addressDbPath, sqlite3.OPEN_READONLY ); + var db = new Database( addressDbPath, { + readonly: true, + verbose: console.log + }); // attach street database - query.attach( db, streetDbPath, 'street' ); + db.exec(`ATTACH DATABASE '${streetDbPath}' as 'street'`); // enable memmapping of database pages - db.run('PRAGMA mmap_size=268435456;'); - db.run('PRAGMA street.mmap_size=268435456;'); + db.exec('PRAGMA mmap_size=268435456;'); + db.exec('PRAGMA street.mmap_size=268435456;'); // query method var q = function( coord, number, street, cb ){ @@ -42,14 +44,16 @@ function setup( addressDbPath, streetDbPath ){ if( isNaN( normalized.number ) ){ return cb( 'invalid number' ); } if( !normalized.street.length ){ return cb( 'invalid street' ); } - // perform a db lookup for the specified street - // @todo: perofmance: only query for part of the table - query.search( db, point, normalized.number, normalized.street, function( err, res ){ + try { + + // perform a db lookup for the specified street + // @todo: perofmance: only query for part of the table + const res = query.search( db, point, normalized.number, normalized.street ); // @note: results can be from multiple different street ids. - // an error occurred or no results were found - if( err || !res || !res.length ){ return cb( err, null ); } + // no results were found + if( !res || !res.length ){ return cb( null, null ); } // try to find an exact match var match = res.find( function( row ){ @@ -132,10 +136,10 @@ function setup( addressDbPath, streetDbPath ){ // if distance = 0 then we can simply use either A or B (they are the same lat/lon) // else we interpolate between the two positions - var point = A; + var point2 = A; if( distance > 0 ){ var ratio = ((normalized.number - before.housenumber) / (after.housenumber - before.housenumber)); - point = geodesic.interpolate( distance, ratio, A, B ); + point2 = geodesic.interpolate( distance, ratio, A, B ); } // return interpolated address @@ -143,10 +147,13 @@ function setup( addressDbPath, streetDbPath ){ type: 'interpolated', source: 'mixed', number: '' + Math.floor( normalized.number ), - lat: parseFloat( project.toDeg( point.lat ).toFixed(7) ), - lon: parseFloat( project.toDeg( point.lon ).toFixed(7) ) + lat: parseFloat( project.toDeg( point2.lat ).toFixed(7) ), + lon: parseFloat( project.toDeg( point2.lon ).toFixed(7) ) }); - }); + } catch (err) { + // an error occurred + return cb(err, null); + } }; // close method to close db diff --git a/api/street.js b/api/street.js index 8f16061e..e709655d 100644 --- a/api/street.js +++ b/api/street.js @@ -1,5 +1,5 @@ -var sqlite3 = require('sqlite3'), +var Database = require('better-sqlite3'), requireDir = require('require-dir'), query = requireDir('../query'); @@ -7,8 +7,10 @@ var sqlite3 = require('sqlite3'), function setup( streetDbPath ){ // connect to db - sqlite3.verbose(); - var db = new sqlite3.Database( streetDbPath, sqlite3.OPEN_READONLY ); + var db = new Database( streetDbPath, { + readonly: true, + verbose: console.log + }); // query method var q = function( ids, cb ){ @@ -24,15 +26,19 @@ function setup( streetDbPath ){ }); if( fail ){ return cb( 'non-numeric id' ); } - // perform a db lookup for the specified street - query.street( db, ids, function( err, res ){ + try { + // perform a db lookup for the specified street + const res = query.street( db, ids ); - // an error occurred or no results were found - if( err || !res ){ return cb( err, null ); } + // results were found + if( !res ){ return cb( null, null ); } // call callback - cb( err, res ); - }); + cb( null, res ); + } catch (err) { + // an error occurred + return cb(err, null); + } }; // close method to close db diff --git a/api/tiger.js b/api/tiger.js index c37a80e5..cd16a222 100644 --- a/api/tiger.js +++ b/api/tiger.js @@ -1,5 +1,5 @@ -var sqlite3 = require('sqlite3'), +var Database = require('better-sqlite3'), requireDir = require('require-dir'), stream = requireDir('../stream', { recurse: true }), query = requireDir('../query'); @@ -8,12 +8,13 @@ var sqlite3 = require('sqlite3'), function tiger(dataStream, addressDbPath, streetDbPath, done){ // connect to db - sqlite3.verbose(); - var db = new sqlite3.Database( process.argv[2] ); + var db = new Database( addressDbPath, { + verbose: console.log + }); query.configure(db); // configure database query.tables.address(db); // create tables only if not already created - query.attach(db, process.argv[3], 'street'); // attach street database + db.exec(`ATTACH DATABASE '${streetDbPath}' as 'street'`); dataStream .pipe( stream.tiger.parse() ) // convert tiger data to generic model diff --git a/api/vertices.js b/api/vertices.js index d5ff114d..3755a5db 100644 --- a/api/vertices.js +++ b/api/vertices.js @@ -1,5 +1,5 @@ -var sqlite3 = require('sqlite3'), +var Database = require('better-sqlite3'), requireDir = require('require-dir'), stream = requireDir('../stream', { recurse: true }), query = requireDir('../query'); @@ -8,14 +8,16 @@ var sqlite3 = require('sqlite3'), function vertices(addressDbPath, streetDbPath, done){ // connect to db - sqlite3.verbose(); - var db = new sqlite3.Database( process.argv[2] ); + var db = new Database(addressDbPath, { + verbose: console.log + }); query.configure(db); // configure database query.tables.address(db); // create tables only if not already created - query.attach(db, process.argv[3], 'street'); // attach street database + db.exec(`ATTACH DATABASE '${streetDbPath}' as 'street'`); - stream.each( db, 'street.polyline', 'WHERE id IN ( SELECT DISTINCT id FROM address )' ) + const sql = `SELECT * FROM street.polyline WHERE id IN (SELECT DISTINCT id FROM address)`; + stream.each(db, sql) .pipe( stream.vertices.lookup( db ) ) .pipe( stream.vertices.augment() ) .pipe( stream.batch( 1000 ) ) // batch up data to import diff --git a/lib/assert.js b/lib/assert.js deleted file mode 100644 index 56e3ba6c..00000000 --- a/lib/assert.js +++ /dev/null @@ -1,21 +0,0 @@ - -/** - a convenience error reporter for sql queries -**/ - -var assert = { transaction: {}, statement: {} }; - -assert.log = function( title ){ - return function( err ){ - if( err ){ console.error( 'sqlite3: ' + title + ': ' + err ); } - }; -}; - -assert.transaction.start = assert.log('BEGIN TRANSACTION'); -assert.transaction.end = assert.log('END TRANSACTION'); -assert.statement.names = assert.log('STATEMENT NAMES'); -assert.statement.rtree = assert.log('STATEMENT RTREE'); -assert.statement.lines = assert.log('STATEMENT LINES'); -assert.statement.address = assert.log('STATEMENT ADDRESS'); - -module.exports = assert; diff --git a/package.json b/package.json index b46b974b..e755fce6 100644 --- a/package.json +++ b/package.json @@ -26,10 +26,12 @@ "dependencies": { "@mapbox/polyline": "^1.0.0", "async": "^3.1.0", + "better-sqlite3": "^6.0.1", "cheerio": "^1.0.0-rc.3", "cli-table3": "^0.5.0", "csv-parse": "^4.4.6", "express": "^4.14.0", + "from2": "^2.3.0", "jsftp": "^2.0.0", "lodash": "^4.17.4", "morgan": "^1.9.0", @@ -41,7 +43,6 @@ "require-dir": "^1.0.0", "serve-index": "^1.8.0", "split2": "^3.0.0", - "sqlite3": "^4.0.0", "superagent": "^5.1.0", "through2": "^3.0.0", "through2-batch": "^1.0.1" diff --git a/query/attach.js b/query/attach.js deleted file mode 100644 index f5a1f79b..00000000 --- a/query/attach.js +++ /dev/null @@ -1,12 +0,0 @@ - -module.exports = function( db, path, name, done ){ - - var sql = 'ATTACH DATABASE \'$path\' as \'$name\';'; - sql = sql.replace( '$path', path ); - sql = sql.replace( '$name', name ); - - db.serialize(function(){ - db.run(sql); - db.wait(done); - }); -}; diff --git a/query/configure.js b/query/configure.js index cbfe97f1..f32d8f04 100644 --- a/query/configure.js +++ b/query/configure.js @@ -1,20 +1,11 @@ // @see: http://sqlite.org/pragma.html -module.exports = function( db, done ){ - db.serialize(function(){ - - // init spatialite extension - // db.run("SELECT InitSpatialMetaData(1);"); // required for mod_spatialite - - db.run('PRAGMA main.foreign_keys=OFF;'); // we don't enforce foreign key constraints - db.run('PRAGMA main.page_size=4096;'); // (default: 1024) - db.run('PRAGMA main.cache_size=-2000;'); // (default: -2000, 2GB) - db.run('PRAGMA main.synchronous=OFF;'); - db.run('PRAGMA main.journal_mode=OFF;'); - db.run('PRAGMA main.temp_store=MEMORY;'); - // db.run('VACUUM'); // can cause long delays on subsequent jobs - - db.wait(done); - }); +module.exports = function( db ){ + db.exec('PRAGMA main.foreign_keys=OFF;'); // we don't enforce foreign key constraints + db.exec('PRAGMA main.page_size=4096;'); // (default: 1024) + db.exec('PRAGMA main.cache_size=-2000;'); // (default: -2000, 2GB) + db.exec('PRAGMA main.synchronous=OFF;'); + db.exec('PRAGMA main.journal_mode=OFF;'); + db.exec('PRAGMA main.temp_store=MEMORY;'); }; diff --git a/query/extract.js b/query/extract.js index e24dc68e..e62246a0 100644 --- a/query/extract.js +++ b/query/extract.js @@ -1,50 +1,68 @@ - // maximum names to match on -var MAX_NAMES = 10; +const MAX_NAMES = 10; // maximum address records to return -var MAX_MATCHES = 5000; // note: this query should only be used for debugging purposes +const MAX_MATCHES = 5000; // note: this query should only be used for debugging purposes + +const SQL = ` + SELECT address.* FROM street.rtree + JOIN street.names ON street.names.id = street.rtree.id + JOIN address ON address.id = street.rtree.id + WHERE ( + street.rtree.minX<=$lon AND street.rtree.maxX>=$lon AND + street.rtree.minY<=$lat AND street.rtree.maxY>=$lat + ) + AND ( %%NAME_CONDITIONS%% ) + ORDER BY address.housenumber ASC // @warning business logic depends on this + LIMIT ${MAX_MATCHES}; +`; -var SQL = [ - 'SELECT address.* FROM street.rtree', - 'JOIN street.names ON street.names.id = street.rtree.id', - 'JOIN address ON address.id = street.rtree.id', - 'WHERE (', - 'street.rtree.minX<=?1 AND street.rtree.maxX>=?1 AND', - 'street.rtree.minY<=?2 AND street.rtree.maxY>=?2', - ')', - 'AND ( %%NAME_CONDITIONS%% )', - 'ORDER BY address.housenumber ASC', // @warning business logic depends on this - 'LIMIT %%MAX_MATCHES%%;' -].join(' '); +// SQL prepared statements dont easily support variable length inputs. +// This function dynamically generates a SQL query based on the number +// of 'name' conditions required. +function generateDynamicSQL(max) { + const conditions = new Array(max.names) + .fill('(street.names.name=$name)') + .map((sql, pos) => sql.replace('$name', `$name${pos}`)); -var NAME_SQL = '(street.names.name=?)'; + return SQL.replace('%%NAME_CONDITIONS%%', conditions.join(' OR ')); +} + +// Reusing prepared statements can have a ~10% perf benefit +// Note: the cache is global and so must be unique per database. +const cache = []; +function statementCache(db, max) { + const key = `${max.names}:${db.name}`; + if (!cache[key]) { + cache[key] = db.prepare(generateDynamicSQL(max)); + } + return cache[key]; +} -module.exports = function( db, point, names, cb ){ +module.exports = function( db, point, names ){ // error checking if( !names || !names.length ){ - return cb( null, [] ); + return []; } // max conditions to search on - var max = { names: Math.min( names.length, MAX_NAMES ) }; + const max = { names: Math.min( names.length, MAX_NAMES ) }; - // use named parameters to avoid sending coordinates twice for rtree conditions - var position = 3; // 1 and 2 are used by lon and lat. + // use a prepared statement from cache (or generate one if not yet cached) + const stmt = statementCache(db, max); - // add name conditions to query - var nameConditions = Array.apply(null, new Array(max.names)).map( function(){ - return NAME_SQL.replace('?', '?' + position++); - }); - - // build unique sql statement - var sql = SQL.replace( '%%NAME_CONDITIONS%%', nameConditions.join(' OR ') ) - .replace( '%%MAX_MATCHES%%', MAX_MATCHES ); + // query params + const params = { + lon: point.lon, + lat: point.lat, + }; - // create a variable array of params for the query - var params = [ point.lon, point.lat ].concat( names.slice(0, max.names) ); + // each name is added in the format: $name0=x, $name1=y + names.slice(0, max.names).forEach((name, pos) => { + params[`name${pos}`] = name; + }); // execute query - db.all( sql, params, cb ); + return stmt.all(params); }; diff --git a/query/indexes.js b/query/indexes.js index b4a21f6b..51d5d05a 100644 --- a/query/indexes.js +++ b/query/indexes.js @@ -1,29 +1,19 @@ -module.exports.street = function( db, done ){ - db.serialize(function(){ - - // names - db.run('CREATE INDEX IF NOT EXISTS names_id_idx ON names(id);'); - db.run('CREATE INDEX IF NOT EXISTS names_name_idx ON names(name, id);'); - - db.wait(done); - }); +module.exports.street = function( db ){ + // names + db.exec('CREATE INDEX IF NOT EXISTS names_id_idx ON names(id);'); + db.exec('CREATE INDEX IF NOT EXISTS names_name_idx ON names(name, id);'); }; -module.exports.address = function( db, done ){ - db.serialize(function(){ - - // create an index on street id - db.run('CREATE INDEX IF NOT EXISTS address_id_idx ON address(id);'); - - // create an unique index on housenumber, this ensures we only store copy of each - // db.run('CREATE UNIQUE INDEX IF NOT EXISTS housenumber_uniq_idx ON address(housenumber);'); +module.exports.address = function( db ){ + // create an index on street id + db.exec('CREATE INDEX IF NOT EXISTS address_id_idx ON address(id);'); - // these indices are not strictly required and consume a large amount of disk space - // db.run('CREATE INDEX IF NOT EXISTS address_source_idx ON address(source);'); - // db.run('CREATE INDEX IF NOT EXISTS address_parity_idx ON address(parity);'); - // db.run('CREATE INDEX IF NOT EXISTS address_housenumber_idx ON address(housenumber);'); + // create an unique index on housenumber, this ensures we only store copy of each + // db.exec('CREATE UNIQUE INDEX IF NOT EXISTS housenumber_uniq_idx ON address(housenumber);'); - db.wait(done); - }); + // these indices are not strictly required and consume a large amount of disk space + // db.exec('CREATE INDEX IF NOT EXISTS address_source_idx ON address(source);'); + // db.exec('CREATE INDEX IF NOT EXISTS address_parity_idx ON address(parity);'); + // db.exec('CREATE INDEX IF NOT EXISTS address_housenumber_idx ON address(housenumber);'); }; diff --git a/query/lookup.js b/query/lookup.js index a2992843..68e40415 100644 --- a/query/lookup.js +++ b/query/lookup.js @@ -1,33 +1,32 @@ - // maximum names to match on -var MAX_NAMES = 10; +const MAX_NAMES = 10; // maximum points to match on -var MAX_POINTS = 4; +const MAX_POINTS = 4; // maximum street segments to return -var MAX_MATCHES = 5; +const MAX_MATCHES = 5; -var SQL = [ - 'SELECT street.polyline.id, street.polyline.line FROM street.polyline', - 'JOIN street.rtree ON street.rtree.id = street.polyline.id', - 'JOIN street.names ON street.names.id = street.rtree.id', - 'WHERE ( %%POINT_CONDITIONS%% )', - 'AND ( %%NAME_CONDITIONS%% )', - 'LIMIT %%MAX_MATCHES%%;' -].join(' '); +const SQL = ` + SELECT street.polyline.id, street.polyline.line FROM street.polyline + JOIN street.rtree ON street.rtree.id = street.polyline.id + JOIN street.names ON street.names.id = street.rtree.id + WHERE ( %%POINT_CONDITIONS%% ) + AND ( %%NAME_CONDITIONS%% ) + LIMIT ${MAX_MATCHES} +`; -var POINT_SQL = '(street.rtree.minX?B AND street.rtree.minY?D)'; -var NAME_SQL = '(street.names.name=?)'; +const POINT_SQL = '(street.rtree.minX<$lon AND street.rtree.maxX>$lon AND street.rtree.minY<$lat AND street.rtree.maxY>$lat)'; +const NAME_SQL = '(street.names.name=$name)'; // sqlite3 prepared statements var stmt = {}; -module.exports = function( db, names, points, cb ){ +module.exports = function( db, names, points ){ // error checking if( !names || !names.length || !points || !points.length ){ - return cb( null, [] ); + return []; } // max conditions to search on @@ -42,48 +41,39 @@ module.exports = function( db, names, points, cb ){ // create prepared statement if one doesn't exist if( !stmt.hasOwnProperty( hash ) ){ - // use named parameters to avoid sending coordinates twice for rtree conditions - var position = 1; - // add point confitions to query - var pointConditions = Array.apply(null, new Array(max.points)).map(function(){ - return POINT_SQL.replace('?A', '?' + position) - .replace('?B', '?' + position++) - .replace('?C', '?' + position) - .replace('?D', '?' + position++); + var pointConditions = Array.apply(null, new Array(max.points)).map(function(__, i){ + return POINT_SQL.replace(/\$lon/g, `$point${i}x`) + .replace(/\$lat/g, `$point${i}y`); }); // add name conditions to query - var nameConditions = Array.apply(null, new Array(max.names)).map( function(){ - return NAME_SQL.replace('?', '?' + position++); + var nameConditions = Array.apply(null, new Array(max.names)).map(function(__, i){ + return NAME_SQL.replace('$name', `$name${i}`); }); // build unique sql statement var sql = SQL.replace( '%%NAME_CONDITIONS%%', nameConditions.join(' OR ') ) - .replace( '%%POINT_CONDITIONS%%', pointConditions.join(' OR ') ) - .replace( '%%MAX_MATCHES%%', MAX_MATCHES ); + .replace( '%%POINT_CONDITIONS%%', pointConditions.join(' OR ') ); // create new prepared statement stmt[hash] = db.prepare( sql ); } // create a variable array of args to bind to query - var args = []; + var args = {}; // add points - points.slice(0, max.points).forEach( function( point ){ - args.push( point.lon, point.lat ); + points.slice(0, max.points).forEach( function( point, i ){ + args[`point${i}x`] = point.lon; + args[`point${i}y`] = point.lat; }); // add names and callback - args = args.concat( names.slice(0, max.names), cb ); + names.slice(0, max.names).forEach(( name, i ) => { + args[`name${i}`] = name; + }); // execute statement - stmt[hash].all.apply(stmt[hash], args); -}; - -module.exports.finalize = function(){ - for( var hash in stmt ){ - stmt[hash].finalize(); - } + return stmt[hash].all(args); }; diff --git a/query/near.js b/query/near.js index b235eecc..1bfd2666 100644 --- a/query/near.js +++ b/query/near.js @@ -18,21 +18,15 @@ var SQL = [ // sqlite3 prepared statements var stmt; -module.exports = function( db, point, cb ){ +module.exports = function( db, point ){ // create prepared statement if one doesn't exist if( !stmt ){ stmt = db.prepare( SQL ); } // execute statement - stmt.all({ - $LON: point.lon, - $LAT: point.lat, - $LIMIT: MAX_MATCHES - }, cb); -}; - -module.exports.finalize = function(){ - if( stmt ){ - stmt.finalize(); - } + return stmt.all({ + LON: point.lon, + LAT: point.lat, + LIMIT: MAX_MATCHES + }); }; diff --git a/query/search.js b/query/search.js index dbb137a7..155cacd2 100644 --- a/query/search.js +++ b/query/search.js @@ -1,9 +1,8 @@ - // maximum names to match on -var MAX_NAMES = 10; +const MAX_NAMES = 10; // maximum address records to return -var MAX_MATCHES = 20; +const MAX_MATCHES = 20; /** this query should only ever return max 3 rows. @@ -11,7 +10,7 @@ var MAX_MATCHES = 20; exact match was found or not. **/ -var SQL = [ +const SQL = [ 'WITH base AS (', 'SELECT id, housenumber, rowid', 'FROM address', @@ -22,8 +21,8 @@ var SQL = [ 'SELECT id', 'FROM street.rtree', 'WHERE (', - 'street.rtree.minX<=?1 AND street.rtree.maxX>=?1 AND', - 'street.rtree.minY<=?2 AND street.rtree.maxY>=?2', + 'street.rtree.minX<=$lon AND street.rtree.maxX>=$lon AND', + 'street.rtree.minY<=$lat AND street.rtree.maxY>=$lat', ')', ')', 'AND ( %%NAME_CONDITIONS%% )', @@ -33,50 +32,68 @@ var SQL = [ 'WHERE rowid IN (', 'SELECT rowid FROM (', 'SELECT * FROM base', - 'WHERE housenumber < "%%TARGET_HOUSENUMBER%%"', + 'WHERE housenumber < $housenumber', 'GROUP BY id HAVING( MAX( housenumber ) )', 'ORDER BY housenumber DESC', ')', 'UNION', 'SELECT rowid FROM (', 'SELECT * FROM base', - 'WHERE housenumber >= "%%TARGET_HOUSENUMBER%%"', + 'WHERE housenumber >= $housenumber', 'GROUP BY id HAVING( MIN( housenumber ) )', 'ORDER BY housenumber ASC', ')', ')', 'ORDER BY housenumber ASC', // @warning business logic depends on this - 'LIMIT %%MAX_MATCHES%%;' + `LIMIT ${MAX_MATCHES};` ].join(' '); -var NAME_SQL = '(street.names.name=?)'; +// SQL prepared statements dont easily support variable length inputs. +// This function dynamically generates a SQL query based on the number +// of 'name' conditions required. +function generateDynamicSQL(max){ + const conditions = new Array(max.names) + .fill('(street.names.name=$name)') + .map((sql, pos) => sql.replace('$name', `$name${pos}`)); + + return SQL.replace('%%NAME_CONDITIONS%%', conditions.join(' OR ')); +} -module.exports = function( db, point, number, names, cb ){ +// Reusing prepared statements can have a ~10% perf benefit +// Note: the cache is global and so must be unique per database. +const cache = []; +function statementCache(db, max){ + const key = `${max.names}:${db.name}`; + if (!cache[key]) { + cache[key] = db.prepare(generateDynamicSQL(max)); + } + return cache[key]; +} +module.exports = function( db, point, number, names ){ // error checking if( !names || !names.length ){ - return cb( null, [] ); + return []; } // max conditions to search on - var max = { names: Math.min( names.length, MAX_NAMES ) }; + const max = { names: Math.min( names.length, MAX_NAMES ) }; - // use named parameters to avoid sending coordinates twice for rtree conditions - var position = 3; // 1 and 2 are used by lon and lat. + // use a prepared statement from cache (or generate one if not yet cached) + const stmt = statementCache(db, max); - // add name conditions to query - var nameConditions = Array.apply(null, new Array(max.names)).map( function(){ - return NAME_SQL.replace('?', '?' + position++); - }); - - // build unique sql statement - var sql = SQL.replace( '%%NAME_CONDITIONS%%', nameConditions.join(' OR ') ) - .replace( '%%MAX_MATCHES%%', MAX_MATCHES ) - .split( '%%TARGET_HOUSENUMBER%%' ).join( number ); + // query params + const params = { + lon: point.lon, + lat: point.lat, + housenumber: number + }; - // create a variable array of params for the query - var params = [ point.lon, point.lat ].concat( names.slice(0, max.names) ); + // each name is added in the format: $name0=x, $name1=y + names.slice(0, max.names).forEach((name, pos) => { + params[`name${pos}`] = name; + }); // execute query - db.all( sql, params, cb ); -}; \ No newline at end of file + return stmt.all(params); +}; diff --git a/query/street.js b/query/street.js index 4691dc61..9114462e 100644 --- a/query/street.js +++ b/query/street.js @@ -1,13 +1,43 @@ +const SQL = ` + SELECT * FROM polyline + JOIN names ON polyline.id = names.id + WHERE polyline.id IN ( %%IDS%% ) + LIMIT 10 +`; -var SQL = [ - 'SELECT * FROM polyline', - 'JOIN names ON polyline.id = names.id', - 'WHERE polyline.id IN ( %%IDS%% )', - 'LIMIT 10;' -].join(' '); +// SQL prepared statements dont easily support variable length inputs. +// This function dynamically generates a SQL query based on the number +// of 'id' conditions required. +function generateDynamicSQL(max) { + const conditions = new Array(max.ids) + .fill('$id') + .map((sql, pos) => sql.replace('$id', `$id${pos}`)); -module.exports = function( db, ids, cb ){ + return SQL.replace('%%IDS%%', conditions.join(',')); +} + +// Reusing prepared statements can have a ~10% perf benefit +// Note: the cache is global and so must be unique per database. +const cache = []; +function statementCache(db, max) { + const key = `${max.ids}:${db.name}`; + if (!cache[key]) { + cache[key] = db.prepare(generateDynamicSQL(max)); + } + return cache[key]; +} + +module.exports = function( db, ids ){ + const stmt = statementCache(db, { ids: ids.length }); + + // query params + const params = {}; + + // each name is added in the format: $id0=x, $id1=y + ids.forEach((id, pos) => { + params[`id${pos}`] = id; + }); // execute statement - db.all( SQL.replace( '%%IDS%%', ids.join(',') ), cb ); + return stmt.all(params); }; diff --git a/query/tables.js b/query/tables.js index 3a1090b6..065e95d2 100644 --- a/query/tables.js +++ b/query/tables.js @@ -1,58 +1,48 @@ -module.exports.street = function( db, rebuild, done ){ - db.serialize(function(){ - - // create rtree table - if( rebuild ){ db.run('DROP TABLE IF EXISTS rtree;'); } - db.run([ - 'CREATE VIRTUAL TABLE IF NOT EXISTS rtree', - 'USING rtree(id, minX, maxX, minY, maxY);' - ].join(' ')); - - // create names table - if( rebuild ){ db.run('DROP TABLE IF EXISTS names;'); } - db.run([ - 'CREATE TABLE IF NOT EXISTS names', - '(rowid INTEGER PRIMARY KEY, id INTEGER, name TEXT);' - ].join(' ')); - - // create fts table - // if( rebuild ){ db.run('DROP TABLE IF EXISTS names;'); } - // db.run([ - // 'CREATE VIRTUAL TABLE IF NOT EXISTS names', - // 'USING fts4(rowid INTEGER PRIMARY KEY, id INTEGER, name TEXT, notindexed=id, tokenize=simple);' - // ].join(' ')); - - // create polyline table - if( rebuild ){ db.run('DROP TABLE IF EXISTS polyline;'); } - db.run([ - 'CREATE TABLE IF NOT EXISTS polyline', - '(id INTEGER PRIMARY KEY, line TEXT);' - ].join(' ')); - - // create geometry table - // if( rebuild ){ db.run('DROP TABLE IF EXISTS geometry;'); } - // db.run('CREATE TABLE IF NOT EXISTS geometry (id INTEGER PRIMARY KEY);'); - // if( rebuild ){ db.run('SELECT AddGeometryColumn('geometry', 'geometry', 4326, 'LINESTRING', 'xy', 1);'); } - - db.wait(done); - }); +module.exports.street = function( db, rebuild ){ + // create rtree table + if( rebuild ){ db.exec('DROP TABLE IF EXISTS rtree;'); } + db.exec([ + 'CREATE VIRTUAL TABLE IF NOT EXISTS rtree', + 'USING rtree(id, minX, maxX, minY, maxY);' + ].join(' ')); + + // create names table + if( rebuild ){ db.exec('DROP TABLE IF EXISTS names;'); } + db.exec([ + 'CREATE TABLE IF NOT EXISTS names', + '(rowid INTEGER PRIMARY KEY, id INTEGER, name TEXT);' + ].join(' ')); + + // create fts table + // if( rebuild ){ db.exec('DROP TABLE IF EXISTS names;'); } + // db.exec([ + // 'CREATE VIRTUAL TABLE IF NOT EXISTS names', + // 'USING fts4(rowid INTEGER PRIMARY KEY, id INTEGER, name TEXT, notindexed=id, tokenize=simple);' + // ].join(' ')); + + // create polyline table + if( rebuild ){ db.exec('DROP TABLE IF EXISTS polyline;'); } + db.exec([ + 'CREATE TABLE IF NOT EXISTS polyline', + '(id INTEGER PRIMARY KEY, line TEXT);' + ].join(' ')); + + // create geometry table + // if( rebuild ){ db.exec('DROP TABLE IF EXISTS geometry;'); } + // db.exec('CREATE TABLE IF NOT EXISTS geometry (id INTEGER PRIMARY KEY);'); + // if( rebuild ){ db.exec('SELECT AddGeometryColumn('geometry', 'geometry', 4326, 'LINESTRING', 'xy', 1);'); } }; -module.exports.address = function( db, rebuild, done ){ - db.serialize(function(){ - - // create address table - if( rebuild ){ db.run('DROP TABLE IF EXISTS address;'); } - db.run([ - 'CREATE TABLE IF NOT EXISTS address', - '(', - 'rowid INTEGER PRIMARY KEY, id INTEGER, source TEXT, source_id TEXT, housenumber REAL,', - 'lat REAL, lon REAL, parity TEXT, proj_lat REAL, proj_lon REAL,', - 'UNIQUE( id, housenumber ) ON CONFLICT IGNORE', - ');' - ].join(' ')); - - db.wait(done); - }); +module.exports.address = function( db, rebuild ){ + // create address table + if( rebuild ){ db.exec('DROP TABLE IF EXISTS address;'); } + db.exec([ + 'CREATE TABLE IF NOT EXISTS address', + '(', + 'rowid INTEGER PRIMARY KEY, id INTEGER, source TEXT, source_id TEXT, housenumber REAL,', + 'lat REAL, lon REAL, parity TEXT, proj_lat REAL, proj_lon REAL,', + 'UNIQUE( id, housenumber ) ON CONFLICT IGNORE', + ');' + ].join(' ')); }; diff --git a/stream/address/augment.js b/stream/address/augment.js index c50ab38f..6f58f9d9 100644 --- a/stream/address/augment.js +++ b/stream/address/augment.js @@ -57,15 +57,15 @@ function streamFactory(db, done){ // push openaddresses values to db this.push({ - $id: nearest.street.id, - $source: address.getSource(), - $source_id: address.getId(), - $housenumber: housenumber, - $lon: point[0], - $lat: point[1], - $parity: parity, - $proj_lon: nearest.proj.point[0], - $proj_lat: nearest.proj.point[1] + id: nearest.street.id, + source: address.getSource(), + source_id: address.getId(), + housenumber: housenumber, + lon: point[0], + lat: point[1], + parity: parity, + proj_lon: nearest.proj.point[0], + proj_lat: nearest.proj.point[1] }); }, this); diff --git a/stream/address/import.js b/stream/address/import.js index 37c4259b..8db512b2 100644 --- a/stream/address/import.js +++ b/stream/address/import.js @@ -1,6 +1,5 @@ var through = require('through2'), - assert = require('../../lib/assert'), Statistics = require('../../lib/statistics'); function streamFactory(db, done){ @@ -20,52 +19,30 @@ function streamFactory(db, done){ // create a new stream return through.obj({ highWaterMark: 2 }, function( batch, _, next ){ - // run serially so we can use transactions - db.serialize(function() { + // start transaction + db.transaction(() => { - // start transaction - db.run('BEGIN TRANSACTION', function(err){ + // import batch + batch.forEach( function( address ){ - // error checking - assert.transaction.start(err); - - // import batch - batch.forEach( function( address ){ - - // insert points in address table - stmt.address.run(address, assert.statement.address); - }); + // insert points in address table + stmt.address.run(address); }); + })(); - // commit transaction - db.run('END TRANSACTION', function(err){ + // update statistics + stats.inc( batch.length ); - // error checking - assert.transaction.end(err); - - // update statistics - stats.inc( batch.length ); - - // wait for transaction to complete before continuing - next(); - }); - }); + // wait for transaction to complete before continuing + next(); }, function( next ){ // stop stats ticker stats.tick( false ); - // clean up - db.serialize(function(){ - - // finalize prepared statements - stmt.address.finalize( assert.log('finalize address') ); - - // we are done - db.wait(done); - next(); - }); + done(); + next(); }); } diff --git a/stream/address/lookup.js b/stream/address/lookup.js index e764d96f..aee3acc4 100644 --- a/stream/address/lookup.js +++ b/stream/address/lookup.js @@ -40,16 +40,10 @@ function streamFactory(db){ // select points to search on var points = selectPoints( batch ); - // console.error( points ); + try { - // call db.all(), appending the callback function - query.lookup(db, names, points, function( err, rows ){ - - // error debug - if( err ){ - console.error( err ); - return next(); - } + // console.error( points ); + let rows = query.lookup(db, names, points ); // no results found if( !rows || !rows.length ){ @@ -83,18 +77,16 @@ function streamFactory(db){ **/ // push downstream - this.push({ + next(null, { batch: batch, - streets: ( longLinesOnly.length > 1 ) ? longLinesOnly : rows + streets: (longLinesOnly.length > 1) ? longLinesOnly : rows }); + } catch (err) { + console.error(err); next(); - }.bind(this)); - - }, function flush(next){ - query.lookup.finalize(); - next(); - }); + } + }.bind(this)); } /** diff --git a/stream/each.js b/stream/each.js index 35c9f39c..c1954563 100644 --- a/stream/each.js +++ b/stream/each.js @@ -1,78 +1,24 @@ - -var through = require('through2'); +const from = require('from2'); /** query for each row in a table; one by one (with stream backpressure). **/ -function streamFactory( db, table, condition ){ - - // create prepared statement - var stmt = db.prepare( 'SELECT * FROM ' + table + ' WHERE id = ?' ); - var readOne; - - // create a passthrough stream which also requests a new record from the - // database after it passed the previous one downstream. - var stream = through.obj( function( row, _, next ){ - this.push( row ); - readOne(); - next(); - }, function flush( next ){ - stmt.finalize(); // finalize prepared statement - next(); - }); - - db.serialize( function(){ - - var cur = 1; // current row number - var max = 0; // maximum rowid in table - - // read the next row from the table - readOne = function(){ - - // reached last record in table - if( cur > max ){ return stream.end(); } - - // get record by id - stmt.get([ cur++ ], function( err, row ){ - - // an error occurred - if( err ){ console.error( err ); } - - // id not found in table (possibly a deleted record?); continue to the next id - if( !row ){ return readOne(); } - - // write row on to stream - stream.write( row ); - }); - }; - - // calculate the highest rowid in the table - - var sql = 'SELECT MAX( rowid ) as max, MIN( rowid ) as min FROM ' + table; - if( 'string' === typeof condition && condition.length ){ - sql += ' ' + condition; - } - - db.get( sql, function( err, row ){ - - // an error occurred - if( err ){ return console.error( err ); } - if( !row || !row.max || !row.min ){ - stream.end(); - return console.error( 'no rows in table', table ); +function streamFactory(db, sql) { + const stmt = db.prepare(sql); + const iterator = stmt.iterate(); + + return from.obj((size, next) => { + var ok = true; + while (ok) { + const elt = iterator.next(); + if (!elt.done) { + ok = next(null, elt.value); + } else { + next(null, null); + break; } - - // minumum/maximum rowid in table - max = parseInt( row.max, 10 ); - cur = parseInt( row.min, 10 ); - - // kick off iteration - readOne(); - }); - + } }); - - return stream; } module.exports = streamFactory; diff --git a/stream/osm/augment.js b/stream/osm/augment.js index 3e343f93..2dd23829 100644 --- a/stream/osm/augment.js +++ b/stream/osm/augment.js @@ -73,9 +73,6 @@ function streamFactory( db ){ next(); }.bind(this)); - }, function flush(next){ - query.near.finalize(); - next(); }); } diff --git a/stream/street/import.js b/stream/street/import.js index a3c83d72..e4652113 100644 --- a/stream/street/import.js +++ b/stream/street/import.js @@ -1,9 +1,8 @@ -var through = require('through2'), - assert = require('../../lib/assert'), - Statistics = require('../../lib/statistics'); +const through = require('through2'); +const Statistics = require('../../lib/statistics'); -function streamFactory(db, done){ +function streamFactory(db, done) { // sqlite3 prepared statements var stmt = { @@ -17,78 +16,54 @@ function streamFactory(db, done){ stats.tick(); // create a new stream - return through.obj(function( batch, _, next ){ + return through.obj(function (batch, _, next) { - // run serially so we can use transactions - db.serialize(function() { + // start transactio + db.transaction(() => { - // start transaction - db.run('BEGIN TRANSACTION', function(err){ + // import batch + batch.forEach(function (street) { - // error checking - assert.transaction.start(err); - - // import batch - batch.forEach( function( street ){ - - // insert names in to lookup table - street.getNames().forEach( function( name ){ - stmt.names.run({ - $id: street.getId(), - $name: name - }, assert.statement.names); + // insert names in to lookup table + street.getNames().forEach(function (name) { + stmt.names.run({ + id: street.getId(), + name: name }); - - // insert bbox in to rtree table - var bbox = street.getBbox(); - stmt.rtree.run({ - $id: street.getId(), - $minX: bbox.minX, - $maxX: bbox.maxX, - $minY: bbox.minY, - $maxY: bbox.maxY - }, assert.statement.rtree); - - // insert line in to polyline table - stmt.line.run({ - $id: street.getId(), - $line: street.getEncodedPolyline() - }, assert.statement.line); - }); - }); - // commit transaction - db.run('END TRANSACTION', function(err){ - - // error checking - assert.transaction.end(err); + // insert bbox in to rtree table + var bbox = street.getBbox(); + stmt.rtree.run({ + id: street.getId(), + minX: bbox.minX, + maxX: bbox.maxX, + minY: bbox.minY, + maxY: bbox.maxY + }); - // update statistics - stats.inc( batch.length ); + // insert line in to polyline table + stmt.line.run({ + id: street.getId(), + line: street.getEncodedPolyline() + }); - // wait for transaction to complete before continuing - next(); }); - }); + }).deferred(); - }, function( next ){ + // update statistics + stats.inc(batch.length); - // stop stats ticker - stats.tick( false ); + // wait for transaction to complete before continuing + next(); - // clean up - db.serialize(function(){ + }, function (next) { - // finalize prepared statements - stmt.rtree.finalize( assert.log('finalize rtree') ); - stmt.names.finalize( assert.log('finalize names') ); - stmt.line.finalize( assert.log('finalize line') ); + // stop stats ticker + stats.tick(false); - // we are done - db.wait(done); - next(); - }); + done(); + next(); }); } diff --git a/stream/vertices/augment.js b/stream/vertices/augment.js index 9deab19c..e814f7d8 100644 --- a/stream/vertices/augment.js +++ b/stream/vertices/augment.js @@ -131,15 +131,15 @@ function streamFactory(db, done){ housenumbers.forEach( function( num ){ if( !num ){ return; } // skip null interpolations this.push({ - $id: data.street.id, - $source: 'VERTEX', - $source_id: undefined, - $housenumber: num.toFixed(3), - $lon: undefined, - $lat: undefined, - $parity: undefined, - $proj_lon: vertex[0], - $proj_lat: vertex[1] + id: data.street.id, + source: 'VERTEX', + source_id: undefined, + housenumber: num.toFixed(3), + lon: undefined, + lat: undefined, + parity: undefined, + proj_lon: vertex[0], + proj_lat: vertex[1] }); }, this); diff --git a/stream/vertices/lookup.js b/stream/vertices/lookup.js index 9696f2b9..187dfcf5 100644 --- a/stream/vertices/lookup.js +++ b/stream/vertices/lookup.js @@ -11,18 +11,16 @@ var through = require('through2'); function streamFactory( db ){ // create prepared statement - var stmt = db.prepare( 'SELECT * FROM address WHERE source != "VERTEX" AND id = ? ORDER BY housenumber ASC' ); + var stmt = db.prepare( `SELECT * FROM address WHERE source != 'VERTEX' AND id = $id ORDER BY housenumber ASC` ); return through.obj( function( street, _, next ){ - // select all addresses which correspond to street.id (excluding existing vertices) - stmt.all([ street.id ], function( err, addresses ){ - - // an error occurred - if( err ){ console.error( err ); } + try { + // select all addresses which correspond to street.id (excluding existing vertices) + const addresses = stmt.all({ id: street.id }); // push street and addresses downstream - else if( addresses && addresses.length ){ + if( addresses && addresses.length ){ this.push({ street: street, addresses: addresses @@ -31,12 +29,12 @@ function streamFactory( db ){ // continue next(); + } catch(err){ + // an error occurred + console.error(err); + next(); + } - }.bind(this) ); - - }, function flush( next ){ - stmt.finalize(); // finalize prepared statement - next(); }); } diff --git a/test/_func.js b/test/_func.js index c6178d37..f8a4dba6 100644 --- a/test/_func.js +++ b/test/_func.js @@ -11,7 +11,7 @@ var tests = [ require('./functional/potsdamerplatz/run.js'), require('./functional/willow_ave/run.js'), require('./functional/nevern_square/run.js'), - require('./functional/cemetery_rd/run.js'), + require('./functional/cemetery_rd/run.js') ]; tests.map(function(t) { diff --git a/test/functional/disjoined/run.js b/test/functional/disjoined/run.js index beb065ac..8fecff81 100644 --- a/test/functional/disjoined/run.js +++ b/test/functional/disjoined/run.js @@ -163,12 +163,6 @@ module.exports.functional.search_north = function(test) { t.end(); }); }); - - test('close connection', function(t) { - conn.close(); - t.pass(); - t.end(); - }); }; module.exports.functional.spotcheck_south = function(test) { diff --git a/test/interface.js b/test/interface.js index b262117a..eff51038 100644 --- a/test/interface.js +++ b/test/interface.js @@ -54,7 +54,6 @@ module.exports.interface.query = function(test) { test('queries', function(t) { t.equal(typeof index.query.extract, 'function', 'valid function'); t.equal(typeof index.query.search, 'function', 'valid function'); - t.equal(typeof index.query.attach, 'function', 'valid function'); t.equal(typeof index.query.configure, 'function', 'valid function'); t.equal(typeof index.query.indexes, 'object', 'valid object'); t.equal(typeof index.query.indexes.street, 'function', 'valid function'); @@ -72,8 +71,6 @@ module.exports.interface.lib = function(test) { t.equal(typeof index.lib.analyze, 'object', 'valid object'); t.equal(typeof index.lib.analyze.street, 'function', 'valid function'); t.equal(typeof index.lib.analyze.housenumber, 'function', 'valid function'); - t.equal(typeof index.lib.assert, 'object', 'valid object'); - t.equal(typeof index.lib.assert.log, 'function', 'valid function'); t.equal(typeof index.lib.proximity, 'object', 'valid object'); t.equal(typeof index.lib.proximity.nearest.street, 'function', 'valid function'); t.equal(typeof index.lib.pretty, 'object', 'valid object'); From 1b449a110550db9119c1d0edd3144f5b486bf986 Mon Sep 17 00:00:00 2001 From: missinglink Date: Mon, 16 Mar 2020 10:33:30 +0100 Subject: [PATCH 2/8] feat(sqlite): use prepared statment cache --- api/extract.js | 13 ++--- api/near.js | 20 +++----- api/oa.js | 13 ++--- api/osm.js | 13 ++--- api/polyline.js | 13 ++--- api/search.js | 26 ++++------ api/street.js | 12 ++--- api/tiger.js | 9 ++-- api/vertices.js | 9 ++-- cmd/extract.js | 5 +- cmd/oa.js | 6 +-- cmd/osm.js | 6 +-- cmd/polyline.js | 6 +-- cmd/search.js | 11 ++-- cmd/server.js | 26 ++++------ cmd/tiger.js | 6 +-- cmd/vertices.js | 3 +- query/configure.js | 2 +- query/extract.js | 23 ++++----- query/indexes.js | 19 ++----- query/lookup.js | 84 +++++++++++++++---------------- query/near.js | 26 +++++----- query/search.js | 100 +++++++++++++++++++------------------ query/street.js | 27 ++++------ query/tables.js | 68 +++++++++++-------------- stream/address/import.js | 9 ++-- stream/address/lookup.js | 2 - stream/street/augment.js | 72 +++++++++++++------------- stream/street/import.js | 84 +++++++++++++++++-------------- stream/vertices/augment.js | 1 - 30 files changed, 320 insertions(+), 394 deletions(-) diff --git a/api/extract.js b/api/extract.js index 941d2d7c..4e9f187b 100644 --- a/api/extract.js +++ b/api/extract.js @@ -1,17 +1,12 @@ - -var Database = require('better-sqlite3'), - requireDir = require('require-dir'), - query = requireDir('../query'), - analyze = require('../lib/analyze'); +const Database = require('better-sqlite3'); +const query = { extract: require('../query/extract') }; +const analyze = require('../lib/analyze'); // export setup method function setup( addressDbPath, streetDbPath ){ // connect to db - var db = new Database( addressDbPath, { - readonly: true, - verbose: console.log - }); + const db = new Database(addressDbPath, { readonly: true }); // attach street database db.exec(`ATTACH DATABASE '${streetDbPath}' as 'street'`); diff --git a/api/near.js b/api/near.js index aa5f0e1f..0baea8ee 100644 --- a/api/near.js +++ b/api/near.js @@ -1,24 +1,18 @@ - -var Database = require('better-sqlite3'), - polyline = require('@mapbox/polyline'), - requireDir = require('require-dir'), - query = requireDir('../query'), - project = require('../lib/project'), - proximity = require('../lib/proximity'); +const Database = require('better-sqlite3'); +const polyline = require('@mapbox/polyline'); +const query = { near: require('../query/near') }; +const project = require('../lib/project'); +const proximity = require('../lib/proximity'); // polyline precision -var PRECISION = 6; +const PRECISION = 6; // export setup method function setup( streetDbPath ){ // connect to db // @todo: this is required as the query uses the 'street.' prefix for tables - var db = new Database( '/tmp/path', { - memory: true, - readonly: false, - verbose: console.log - }); + const db = new Database('/tmp/path', { memory: true }); // attach street database db.exec(`ATTACH DATABASE '${streetDbPath}' as 'street'`); diff --git a/api/oa.js b/api/oa.js index 2f42b13d..a8116aeb 100644 --- a/api/oa.js +++ b/api/oa.js @@ -1,16 +1,13 @@ - -var Database = require('better-sqlite3'), - requireDir = require('require-dir'), - stream = requireDir('../stream', { recurse: true }), - query = requireDir('../query'); +const Database = require('better-sqlite3'); +const requireDir = require('require-dir'); +const stream = requireDir('../stream', { recurse: true }); +const query = requireDir('../query'); // export method function oa(dataStream, addressDbPath, streetDbPath, done){ // connect to db - var db = new Database( addressDbPath, { - verbose: console.log - }); + const db = new Database(addressDbPath); query.configure(db); // configure database query.tables.address(db); // create tables only if not already created diff --git a/api/osm.js b/api/osm.js index c3aabd91..63f987c1 100644 --- a/api/osm.js +++ b/api/osm.js @@ -1,16 +1,13 @@ - -var Database = require('better-sqlite3'), - requireDir = require('require-dir'), - stream = requireDir('../stream', { recurse: true }), - query = requireDir('../query'); +const Database = require('better-sqlite3'); +const requireDir = require('require-dir'); +const stream = requireDir('../stream', { recurse: true }); +const query = requireDir('../query'); // export method function osm(dataStream, addressDbPath, streetDbPath, done){ // connect to db - var db = new Database( addressDbPath, { - verbose: console.log - }); + const db = new Database(addressDbPath); query.configure(db); // configure database query.tables.address(db); // create tables only if not already created diff --git a/api/polyline.js b/api/polyline.js index 26b863ec..d95f155e 100644 --- a/api/polyline.js +++ b/api/polyline.js @@ -1,16 +1,13 @@ - -var Database = require('better-sqlite3'), - requireDir = require('require-dir'), - stream = requireDir('../stream', { recurse: true }), - query = requireDir('../query'); +const Database = require('better-sqlite3'); +const requireDir = require('require-dir'); +const stream = requireDir('../stream', { recurse: true }); +const query = requireDir('../query'); // export method function polyline(dataStream, streetDbPath, done){ // connect to db - var db = new Database(streetDbPath, { - verbose: console.log - }); + const db = new Database(streetDbPath); query.configure(db); // configure database query.tables.street(db, true); // reset database and create tables diff --git a/api/search.js b/api/search.js index 85d27e33..dfbfb829 100644 --- a/api/search.js +++ b/api/search.js @@ -1,19 +1,15 @@ - -var Database = require('better-sqlite3'), - requireDir = require('require-dir'), - query = requireDir('../query'), - project = require('../lib/project'), - geodesic = require('../lib/geodesic'), - analyze = require('../lib/analyze'); +const Database = require('better-sqlite3'); +const requireDir = require('require-dir'); +const query = requireDir('../query'); +const project = require('../lib/project'); +const geodesic = require('../lib/geodesic'); +const analyze = require('../lib/analyze'); // export setup method function setup( addressDbPath, streetDbPath ){ // connect to db - var db = new Database( addressDbPath, { - readonly: true, - verbose: console.log - }); + const db = new Database(addressDbPath, {readonly: true}); // attach street database db.exec(`ATTACH DATABASE '${streetDbPath}' as 'street'`); @@ -136,10 +132,10 @@ function setup( addressDbPath, streetDbPath ){ // if distance = 0 then we can simply use either A or B (they are the same lat/lon) // else we interpolate between the two positions - var point2 = A; + var interpolatedPoint = A; if( distance > 0 ){ var ratio = ((normalized.number - before.housenumber) / (after.housenumber - before.housenumber)); - point2 = geodesic.interpolate( distance, ratio, A, B ); + interpolatedPoint = geodesic.interpolate( distance, ratio, A, B ); } // return interpolated address @@ -147,8 +143,8 @@ function setup( addressDbPath, streetDbPath ){ type: 'interpolated', source: 'mixed', number: '' + Math.floor( normalized.number ), - lat: parseFloat( project.toDeg( point2.lat ).toFixed(7) ), - lon: parseFloat( project.toDeg( point2.lon ).toFixed(7) ) + lat: parseFloat( project.toDeg( interpolatedPoint.lat ).toFixed(7) ), + lon: parseFloat( project.toDeg( interpolatedPoint.lon ).toFixed(7) ) }); } catch (err) { // an error occurred diff --git a/api/street.js b/api/street.js index e709655d..48d75ee6 100644 --- a/api/street.js +++ b/api/street.js @@ -1,16 +1,12 @@ - -var Database = require('better-sqlite3'), - requireDir = require('require-dir'), - query = requireDir('../query'); +const Database = require('better-sqlite3'); +const requireDir = require('require-dir'); +const query = requireDir('../query'); // export setup method function setup( streetDbPath ){ // connect to db - var db = new Database( streetDbPath, { - readonly: true, - verbose: console.log - }); + const db = new Database(streetDbPath, { readonly: true }); // query method var q = function( ids, cb ){ diff --git a/api/tiger.js b/api/tiger.js index cd16a222..42f83550 100644 --- a/api/tiger.js +++ b/api/tiger.js @@ -1,8 +1,7 @@ - -var Database = require('better-sqlite3'), - requireDir = require('require-dir'), - stream = requireDir('../stream', { recurse: true }), - query = requireDir('../query'); +const Database = require('better-sqlite3'); +const requireDir = require('require-dir'); +const stream = requireDir('../stream', { recurse: true }); +const query = requireDir('../query'); // export method function tiger(dataStream, addressDbPath, streetDbPath, done){ diff --git a/api/vertices.js b/api/vertices.js index 3755a5db..a1a10a5e 100644 --- a/api/vertices.js +++ b/api/vertices.js @@ -1,8 +1,7 @@ - -var Database = require('better-sqlite3'), - requireDir = require('require-dir'), - stream = requireDir('../stream', { recurse: true }), - query = requireDir('../query'); +const Database = require('better-sqlite3'); +const requireDir = require('require-dir'); +const stream = requireDir('../stream', { recurse: true }); +const query = requireDir('../query'); // export method function vertices(addressDbPath, streetDbPath, done){ diff --git a/cmd/extract.js b/cmd/extract.js index 7e0dd9b1..8af25e9f 100644 --- a/cmd/extract.js +++ b/cmd/extract.js @@ -1,6 +1,5 @@ - -var extract = require('../api/extract'), - pretty = require('../lib/pretty'); +const extract = require('../api/extract'); +const pretty = require('../lib/pretty'); // help text if( process.argv.length !== 7 ){ diff --git a/cmd/oa.js b/cmd/oa.js index faf80429..5eaca62d 100644 --- a/cmd/oa.js +++ b/cmd/oa.js @@ -1,6 +1,4 @@ - -var tty = require('tty'), - oa = require('../api/oa'); +const oa = require('../api/oa'); // help text if( process.argv.length !== 4 ){ @@ -10,7 +8,7 @@ if( process.argv.length !== 4 ){ process.exit(1); } -if( tty.isatty( process.stdin ) ){ +if( process.stdin.isTTY ){ console.error('no data piped to stdin'); process.exit(1); } diff --git a/cmd/osm.js b/cmd/osm.js index 62959c33..5dfe1846 100644 --- a/cmd/osm.js +++ b/cmd/osm.js @@ -1,6 +1,4 @@ - -var tty = require('tty'), - osm = require('../api/osm'); +const osm = require('../api/osm'); // help text if( process.argv.length !== 4 ){ @@ -10,7 +8,7 @@ if( process.argv.length !== 4 ){ process.exit(1); } -if( tty.isatty( process.stdin ) ){ +if( process.stdin.isTTY ){ console.error('no data piped to stdin'); process.exit(1); } diff --git a/cmd/polyline.js b/cmd/polyline.js index 524627b4..4fc612f3 100644 --- a/cmd/polyline.js +++ b/cmd/polyline.js @@ -1,6 +1,4 @@ - -var tty = require('tty'), - polyline = require('../api/polyline'); +const polyline = require('../api/polyline'); // help text if( process.argv.length < 3 ){ @@ -10,7 +8,7 @@ if( process.argv.length < 3 ){ process.exit(1); } -if( tty.isatty( process.stdin ) ){ +if( process.stdin.isTTY ){ console.error('no data piped to stdin'); process.exit(1); } diff --git a/cmd/search.js b/cmd/search.js index 5a08ba7c..62af6723 100644 --- a/cmd/search.js +++ b/cmd/search.js @@ -1,5 +1,4 @@ - -var search = require('../api/search'); +const search = require('../api/search'); // help text if( process.argv.length < 8 || process.argv.length > 9 ){ @@ -9,11 +8,11 @@ if( process.argv.length < 8 || process.argv.length > 9 ){ process.exit(1); } -var conn = search( process.argv[2], process.argv[3] ); -var number = process.argv[6]; -var street = process.argv[7]; +const conn = search( process.argv[2], process.argv[3] ); +const number = process.argv[6]; +const street = process.argv[7]; -var point = { +const point = { lat: parseFloat( process.argv[4] ), lon: parseFloat( process.argv[5] ) }; diff --git a/cmd/server.js b/cmd/server.js index 9fd6c1b9..5ef42629 100644 --- a/cmd/server.js +++ b/cmd/server.js @@ -1,15 +1,11 @@ - -var express = require('express'), - directory = require('serve-index'), - polyline = require('@mapbox/polyline'), - search = require('../api/search'), - extract = require('../api/extract'), - street = require('../api/street'), - near = require('../api/near'), - pretty = require('../lib/pretty'), - analyze = require('../lib/analyze'), - project = require('../lib/project'), - proximity = require('../lib/proximity'); +const express = require('express'); +const polyline = require('@mapbox/polyline'); +const search = require('../api/search'); +const extract = require('../api/extract'); +const street = require('../api/street'); +const near = require('../api/near'); +const pretty = require('../lib/pretty'); +const analyze = require('../lib/analyze'); const morgan = require( 'morgan' ); const logger = require('pelias-logger').get('interpolation'); @@ -17,7 +13,7 @@ const through = require( 'through2' ); const _ = require('lodash'); // optionally override port using env var -var PORT = process.env.PORT || 3000; +const PORT = process.env.PORT || 3000; // help text if( process.argv.length !== 4 ){ @@ -27,10 +23,10 @@ if( process.argv.length !== 4 ){ process.exit(1); } -var app = express(); +const app = express(); app.use(log()); -var conn = { +const conn = { search: search( process.argv[2], process.argv[3] ), extract: extract( process.argv[2], process.argv[3] ), street: street( process.argv[3] ), diff --git a/cmd/tiger.js b/cmd/tiger.js index 376277e0..ed32e952 100644 --- a/cmd/tiger.js +++ b/cmd/tiger.js @@ -1,6 +1,4 @@ - -var tty = require('tty'), - tiger = require('../api/tiger'); +const tiger = require('../api/tiger'); // help text if( process.argv.length !== 4 ){ @@ -10,7 +8,7 @@ if( process.argv.length !== 4 ){ process.exit(1); } -if( tty.isatty( process.stdin ) ){ +if( process.stdin.isTTY ){ console.error('no data piped to stdin'); process.exit(1); } diff --git a/cmd/vertices.js b/cmd/vertices.js index db29064b..fef86cd5 100644 --- a/cmd/vertices.js +++ b/cmd/vertices.js @@ -1,5 +1,4 @@ - -var vertices = require('../api/vertices'); +const vertices = require('../api/vertices'); // help text if( process.argv.length !== 4 ){ diff --git a/query/configure.js b/query/configure.js index f32d8f04..9603e2f9 100644 --- a/query/configure.js +++ b/query/configure.js @@ -1,7 +1,7 @@ // @see: http://sqlite.org/pragma.html -module.exports = function( db ){ +module.exports = ( db ) => { db.exec('PRAGMA main.foreign_keys=OFF;'); // we don't enforce foreign key constraints db.exec('PRAGMA main.page_size=4096;'); // (default: 1024) db.exec('PRAGMA main.cache_size=-2000;'); // (default: -2000, 2GB) diff --git a/query/extract.js b/query/extract.js index e62246a0..f4953110 100644 --- a/query/extract.js +++ b/query/extract.js @@ -1,3 +1,5 @@ +const _ = require('lodash'); + // maximum names to match on const MAX_NAMES = 10; @@ -20,21 +22,18 @@ const SQL = ` // SQL prepared statements dont easily support variable length inputs. // This function dynamically generates a SQL query based on the number // of 'name' conditions required. -function generateDynamicSQL(max) { - const conditions = new Array(max.names) - .fill('(street.names.name=$name)') - .map((sql, pos) => sql.replace('$name', `$name${pos}`)); - +function generateDynamicSQL(nameCount) { + const conditions = _.times(nameCount, (i) => `(street.names.name=$name${i})`); return SQL.replace('%%NAME_CONDITIONS%%', conditions.join(' OR ')); } // Reusing prepared statements can have a ~10% perf benefit // Note: the cache is global and so must be unique per database. const cache = []; -function statementCache(db, max) { - const key = `${max.names}:${db.name}`; +function statementCache(db, nameCount) { + const key = `${nameCount}:${db.name}`; if (!cache[key]) { - cache[key] = db.prepare(generateDynamicSQL(max)); + cache[key] = db.prepare(generateDynamicSQL(nameCount)); } return cache[key]; } @@ -46,11 +45,11 @@ module.exports = function( db, point, names ){ return []; } - // max conditions to search on - const max = { names: Math.min( names.length, MAX_NAMES ) }; + // total amount of names to consider for search + const nameCount = Math.min( names.length, MAX_NAMES ); // use a prepared statement from cache (or generate one if not yet cached) - const stmt = statementCache(db, max); + const stmt = statementCache(db, nameCount); // query params const params = { @@ -59,7 +58,7 @@ module.exports = function( db, point, names ){ }; // each name is added in the format: $name0=x, $name1=y - names.slice(0, max.names).forEach((name, pos) => { + names.slice(0, nameCount).forEach((name, pos) => { params[`name${pos}`] = name; }); diff --git a/query/indexes.js b/query/indexes.js index 51d5d05a..67c43881 100644 --- a/query/indexes.js +++ b/query/indexes.js @@ -1,19 +1,10 @@ - -module.exports.street = function( db ){ +module.exports.street = ( db ) => { // names - db.exec('CREATE INDEX IF NOT EXISTS names_id_idx ON names(id);'); - db.exec('CREATE INDEX IF NOT EXISTS names_name_idx ON names(name, id);'); + db.exec(`CREATE INDEX IF NOT EXISTS names_id_idx ON names(id)`); + db.exec(`CREATE INDEX IF NOT EXISTS names_name_idx ON names(name, id)`); }; -module.exports.address = function( db ){ +module.exports.address = ( db ) => { // create an index on street id - db.exec('CREATE INDEX IF NOT EXISTS address_id_idx ON address(id);'); - - // create an unique index on housenumber, this ensures we only store copy of each - // db.exec('CREATE UNIQUE INDEX IF NOT EXISTS housenumber_uniq_idx ON address(housenumber);'); - - // these indices are not strictly required and consume a large amount of disk space - // db.exec('CREATE INDEX IF NOT EXISTS address_source_idx ON address(source);'); - // db.exec('CREATE INDEX IF NOT EXISTS address_parity_idx ON address(parity);'); - // db.exec('CREATE INDEX IF NOT EXISTS address_housenumber_idx ON address(housenumber);'); + db.exec(`CREATE INDEX IF NOT EXISTS address_id_idx ON address(id)`); }; diff --git a/query/lookup.js b/query/lookup.js index 68e40415..9185868b 100644 --- a/query/lookup.js +++ b/query/lookup.js @@ -1,3 +1,5 @@ +const _ = require('lodash'); + // maximum names to match on const MAX_NAMES = 10; @@ -16,11 +18,31 @@ const SQL = ` LIMIT ${MAX_MATCHES} `; -const POINT_SQL = '(street.rtree.minX<$lon AND street.rtree.maxX>$lon AND street.rtree.minY<$lat AND street.rtree.maxY>$lat)'; -const NAME_SQL = '(street.names.name=$name)'; - -// sqlite3 prepared statements -var stmt = {}; +// SQL prepared statements dont easily support variable length inputs. +// This function dynamically generates a SQL query based on the number +// of 'name' and 'point' conditions required. +function generateDynamicSQL(pointCount, nameCount) { + const nameConditions = _.times(nameCount, (i) => `(street.names.name=$name${i})`); + const pointConditions = _.times(pointCount, (i) => `( + street.rtree.minX<$point${i}x AND street.rtree.maxX>$point${i}x AND + street.rtree.minY<$point${i}y AND street.rtree.maxY>$point${i}y + )`); + + return SQL + .replace('%%POINT_CONDITIONS%%', pointConditions.join(' OR ')) + .replace('%%NAME_CONDITIONS%%', nameConditions.join(' OR ')); +} + +// Reusing prepared statements can have a ~10% perf benefit +// Note: the cache is global and so must be unique per database. +const cache = []; +function statementCache(db, pointCount, nameCount) { + const key = `${nameCount}:${pointCount}:${db.name}`; + if (!cache[key]) { + cache[key] = db.prepare(generateDynamicSQL(pointCount, nameCount)); + } + return cache[key]; +} module.exports = function( db, names, points ){ @@ -29,51 +51,27 @@ module.exports = function( db, names, points ){ return []; } - // max conditions to search on - var max = { - names: Math.min( names.length, MAX_NAMES ), - points: Math.min( points.length, MAX_POINTS ) - }; - - // give this statement a unique key - var hash = '' + max.names + '|' + max.points; - - // create prepared statement if one doesn't exist - if( !stmt.hasOwnProperty( hash ) ){ - - // add point confitions to query - var pointConditions = Array.apply(null, new Array(max.points)).map(function(__, i){ - return POINT_SQL.replace(/\$lon/g, `$point${i}x`) - .replace(/\$lat/g, `$point${i}y`); - }); - - // add name conditions to query - var nameConditions = Array.apply(null, new Array(max.names)).map(function(__, i){ - return NAME_SQL.replace('$name', `$name${i}`); - }); - - // build unique sql statement - var sql = SQL.replace( '%%NAME_CONDITIONS%%', nameConditions.join(' OR ') ) - .replace( '%%POINT_CONDITIONS%%', pointConditions.join(' OR ') ); + // total amount of names/points to consider for search + const nameCount = Math.min(names.length, MAX_NAMES); + const pointCount = Math.min(points.length, MAX_POINTS); - // create new prepared statement - stmt[hash] = db.prepare( sql ); - } + // use a prepared statement from cache (or generate one if not yet cached) + const stmt = statementCache(db, pointCount, nameCount); - // create a variable array of args to bind to query - var args = {}; + // create a variable array of params to bind to query + var params = {}; // add points - points.slice(0, max.points).forEach( function( point, i ){ - args[`point${i}x`] = point.lon; - args[`point${i}y`] = point.lat; + points.slice(0, pointCount).forEach((point, i) => { + params[`point${i}x`] = point.lon; + params[`point${i}y`] = point.lat; }); - // add names and callback - names.slice(0, max.names).forEach(( name, i ) => { - args[`name${i}`] = name; + // add names + names.slice(0, nameCount).forEach((name, i) => { + params[`name${i}`] = name; }); // execute statement - return stmt[hash].all(args); + return stmt.all(params); }; diff --git a/query/near.js b/query/near.js index 1bfd2666..56be9b8b 100644 --- a/query/near.js +++ b/query/near.js @@ -1,4 +1,3 @@ - /** find all streets which have a bbox which envelops the specified point; regardless of their names. **/ @@ -6,27 +5,26 @@ // maximum street segments to return var MAX_MATCHES = 100; -var SQL = [ - 'SELECT street.polyline.id, street.polyline.line, street.names.name FROM street.polyline', - 'JOIN street.rtree ON street.rtree.id = street.polyline.id', - 'JOIN street.names ON street.names.id = street.polyline.id', - 'WHERE (street.rtree.minX<$LON AND street.rtree.maxX>$LON AND street.rtree.minY<$LAT AND street.rtree.maxY>$LAT)', - 'GROUP BY street.polyline.id', - 'LIMIT $LIMIT;' -].join(' '); +const SQL = ` + SELECT street.polyline.id, street.polyline.line, street.names.name FROM street.polyline + JOIN street.rtree ON street.rtree.id = street.polyline.id + JOIN street.names ON street.names.id = street.polyline.id + WHERE (street.rtree.minX<$lon AND street.rtree.maxX>$lon AND street.rtree.minY<$lat AND street.rtree.maxY>$lat) + GROUP BY street.polyline.id + LIMIT ${MAX_MATCHES} +`; -// sqlite3 prepared statements +// prepared statement cache var stmt; -module.exports = function( db, point ){ +module.exports = ( db, point ) => { // create prepared statement if one doesn't exist if( !stmt ){ stmt = db.prepare( SQL ); } // execute statement return stmt.all({ - LON: point.lon, - LAT: point.lat, - LIMIT: MAX_MATCHES + lon: point.lon, + lat: point.lat }); }; diff --git a/query/search.js b/query/search.js index 155cacd2..c722d2d9 100644 --- a/query/search.js +++ b/query/search.js @@ -1,3 +1,5 @@ +const _ = require('lodash'); + // maximum names to match on const MAX_NAMES = 10; @@ -10,62 +12,62 @@ const MAX_MATCHES = 20; exact match was found or not. **/ -const SQL = [ - 'WITH base AS (', - 'SELECT id, housenumber, rowid', - 'FROM address', - 'WHERE id IN (', - 'SELECT id', - 'FROM street.names', - 'WHERE id IN (', - 'SELECT id', - 'FROM street.rtree', - 'WHERE (', - 'street.rtree.minX<=$lon AND street.rtree.maxX>=$lon AND', - 'street.rtree.minY<=$lat AND street.rtree.maxY>=$lat', - ')', - ')', - 'AND ( %%NAME_CONDITIONS%% )', - ')', - ')', - 'SELECT * FROM address', - 'WHERE rowid IN (', - 'SELECT rowid FROM (', - 'SELECT * FROM base', - 'WHERE housenumber < $housenumber', - 'GROUP BY id HAVING( MAX( housenumber ) )', - 'ORDER BY housenumber DESC', - ')', - 'UNION', - 'SELECT rowid FROM (', - 'SELECT * FROM base', - 'WHERE housenumber >= $housenumber', - 'GROUP BY id HAVING( MIN( housenumber ) )', - 'ORDER BY housenumber ASC', - ')', - ')', - 'ORDER BY housenumber ASC', // @warning business logic depends on this - `LIMIT ${MAX_MATCHES};` -].join(' '); +// @note: window functions were introduced to sqlite since this SQL was +// originally written, it may be possible to simplify the SQL using them. +// @see: https://sqlite.org/windowfunctions.html +const SQL = ` + WITH base AS ( + SELECT id, housenumber, rowid + FROM address + WHERE id IN ( + SELECT id + FROM street.names + WHERE id IN ( + SELECT id + FROM street.rtree + WHERE ( + street.rtree.minX<=$lon AND street.rtree.maxX>=$lon AND + street.rtree.minY<=$lat AND street.rtree.maxY>=$lat + ) + ) + AND ( %%NAME_CONDITIONS%% ) + ) + ) + SELECT * FROM address + WHERE rowid IN ( + SELECT rowid FROM ( + SELECT * FROM base + WHERE housenumber < $housenumber + GROUP BY id HAVING( MAX( housenumber ) ) + ORDER BY housenumber DESC + ) + UNION + SELECT rowid FROM ( + SELECT * FROM base + WHERE housenumber >= $housenumber + GROUP BY id HAVING( MIN( housenumber ) ) + ORDER BY housenumber ASC + ) + ) + ORDER BY housenumber ASC -- @warning business logic depends on this + LIMIT ${MAX_MATCHES} +`; // SQL prepared statements dont easily support variable length inputs. // This function dynamically generates a SQL query based on the number // of 'name' conditions required. -function generateDynamicSQL(max){ - const conditions = new Array(max.names) - .fill('(street.names.name=$name)') - .map((sql, pos) => sql.replace('$name', `$name${pos}`)); - +function generateDynamicSQL(nameCount){ + const conditions = _.times(nameCount, (i) => `(street.names.name=$name${i})`); return SQL.replace('%%NAME_CONDITIONS%%', conditions.join(' OR ')); } // Reusing prepared statements can have a ~10% perf benefit // Note: the cache is global and so must be unique per database. const cache = []; -function statementCache(db, max){ - const key = `${max.names}:${db.name}`; +function statementCache(db, nameCount){ + const key = `${nameCount}:${db.name}`; if (!cache[key]) { - cache[key] = db.prepare(generateDynamicSQL(max)); + cache[key] = db.prepare(generateDynamicSQL(nameCount)); } return cache[key]; } @@ -76,11 +78,11 @@ module.exports = function( db, point, number, names ){ return []; } - // max conditions to search on - const max = { names: Math.min( names.length, MAX_NAMES ) }; + // total amount of names to consider for search + const nameCount = Math.min( names.length, MAX_NAMES ); // use a prepared statement from cache (or generate one if not yet cached) - const stmt = statementCache(db, max); + const stmt = statementCache(db, nameCount); // query params const params = { @@ -90,7 +92,7 @@ module.exports = function( db, point, number, names ){ }; // each name is added in the format: $name0=x, $name1=y - names.slice(0, max.names).forEach((name, pos) => { + names.slice(0, nameCount).forEach((name, pos) => { params[`name${pos}`] = name; }); diff --git a/query/street.js b/query/street.js index 9114462e..5a9fcc4c 100644 --- a/query/street.js +++ b/query/street.js @@ -1,3 +1,5 @@ +const _ = require('lodash'); + const SQL = ` SELECT * FROM polyline JOIN names ON polyline.id = names.id @@ -8,36 +10,25 @@ const SQL = ` // SQL prepared statements dont easily support variable length inputs. // This function dynamically generates a SQL query based on the number // of 'id' conditions required. -function generateDynamicSQL(max) { - const conditions = new Array(max.ids) - .fill('$id') - .map((sql, pos) => sql.replace('$id', `$id${pos}`)); - +function generateDynamicSQL(idCount) { + const conditions = _.times(idCount, (i) => `?`); return SQL.replace('%%IDS%%', conditions.join(',')); } // Reusing prepared statements can have a ~10% perf benefit // Note: the cache is global and so must be unique per database. const cache = []; -function statementCache(db, max) { - const key = `${max.ids}:${db.name}`; +function statementCache(db, idCount) { + const key = `${idCount}:${db.name}`; if (!cache[key]) { - cache[key] = db.prepare(generateDynamicSQL(max)); + cache[key] = db.prepare(generateDynamicSQL(idCount)); } return cache[key]; } module.exports = function( db, ids ){ - const stmt = statementCache(db, { ids: ids.length }); - - // query params - const params = {}; - - // each name is added in the format: $id0=x, $id1=y - ids.forEach((id, pos) => { - params[`id${pos}`] = id; - }); + const stmt = statementCache(db, ids.length); // execute statement - return stmt.all(params); + return stmt.all(ids); }; diff --git a/query/tables.js b/query/tables.js index 065e95d2..bafa2425 100644 --- a/query/tables.js +++ b/query/tables.js @@ -1,48 +1,38 @@ - -module.exports.street = function( db, rebuild ){ +module.exports.street = ( db, rebuild ) => { // create rtree table - if( rebuild ){ db.exec('DROP TABLE IF EXISTS rtree;'); } - db.exec([ - 'CREATE VIRTUAL TABLE IF NOT EXISTS rtree', - 'USING rtree(id, minX, maxX, minY, maxY);' - ].join(' ')); + if( rebuild ){ db.exec(`DROP TABLE IF EXISTS rtree`); } + db.exec(` + CREATE VIRTUAL TABLE IF NOT EXISTS rtree + USING rtree( + id, minX, maxX, minY, maxY + )` + ); // create names table - if( rebuild ){ db.exec('DROP TABLE IF EXISTS names;'); } - db.exec([ - 'CREATE TABLE IF NOT EXISTS names', - '(rowid INTEGER PRIMARY KEY, id INTEGER, name TEXT);' - ].join(' ')); - - // create fts table - // if( rebuild ){ db.exec('DROP TABLE IF EXISTS names;'); } - // db.exec([ - // 'CREATE VIRTUAL TABLE IF NOT EXISTS names', - // 'USING fts4(rowid INTEGER PRIMARY KEY, id INTEGER, name TEXT, notindexed=id, tokenize=simple);' - // ].join(' ')); + if( rebuild ){ db.exec(`DROP TABLE IF EXISTS names`); } + db.exec(` + CREATE TABLE IF NOT EXISTS names ( + rowid INTEGER PRIMARY KEY, id INTEGER, name TEXT + )` + ); // create polyline table - if( rebuild ){ db.exec('DROP TABLE IF EXISTS polyline;'); } - db.exec([ - 'CREATE TABLE IF NOT EXISTS polyline', - '(id INTEGER PRIMARY KEY, line TEXT);' - ].join(' ')); - - // create geometry table - // if( rebuild ){ db.exec('DROP TABLE IF EXISTS geometry;'); } - // db.exec('CREATE TABLE IF NOT EXISTS geometry (id INTEGER PRIMARY KEY);'); - // if( rebuild ){ db.exec('SELECT AddGeometryColumn('geometry', 'geometry', 4326, 'LINESTRING', 'xy', 1);'); } + if( rebuild ){ db.exec(`DROP TABLE IF EXISTS polyline`); } + db.exec(` + CREATE TABLE IF NOT EXISTS polyline ( + id INTEGER PRIMARY KEY, line TEXT + )` + ); }; -module.exports.address = function( db, rebuild ){ +module.exports.address = ( db, rebuild ) => { // create address table - if( rebuild ){ db.exec('DROP TABLE IF EXISTS address;'); } - db.exec([ - 'CREATE TABLE IF NOT EXISTS address', - '(', - 'rowid INTEGER PRIMARY KEY, id INTEGER, source TEXT, source_id TEXT, housenumber REAL,', - 'lat REAL, lon REAL, parity TEXT, proj_lat REAL, proj_lon REAL,', - 'UNIQUE( id, housenumber ) ON CONFLICT IGNORE', - ');' - ].join(' ')); + if( rebuild ){ db.exec(`DROP TABLE IF EXISTS address`); } + db.exec(` + CREATE TABLE IF NOT EXISTS address ( + rowid INTEGER PRIMARY KEY, id INTEGER, source TEXT, source_id TEXT, housenumber REAL, + lat REAL, lon REAL, parity TEXT, proj_lat REAL, proj_lon REAL, + UNIQUE( id, housenumber ) ON CONFLICT IGNORE + )` + ); }; diff --git a/stream/address/import.js b/stream/address/import.js index 8db512b2..0865ea3d 100644 --- a/stream/address/import.js +++ b/stream/address/import.js @@ -1,6 +1,5 @@ - -var through = require('through2'), - Statistics = require('../../lib/statistics'); +const through = require('through2'); +const Statistics = require('../../lib/statistics'); function streamFactory(db, done){ @@ -17,7 +16,7 @@ function streamFactory(db, done){ stats.tick(); // create a new stream - return through.obj({ highWaterMark: 2 }, function( batch, _, next ){ + return through.obj(function( batch, _, next ){ // start transaction db.transaction(() => { @@ -28,7 +27,7 @@ function streamFactory(db, done){ // insert points in address table stmt.address.run(address); }); - })(); + }).deferred(); // update statistics stats.inc( batch.length ); diff --git a/stream/address/lookup.js b/stream/address/lookup.js index aee3acc4..45400803 100644 --- a/stream/address/lookup.js +++ b/stream/address/lookup.js @@ -41,8 +41,6 @@ function streamFactory(db){ var points = selectPoints( batch ); try { - - // console.error( points ); let rows = query.lookup(db, names, points ); // no results found diff --git a/stream/street/augment.js b/stream/street/augment.js index 1032bcde..84564222 100644 --- a/stream/street/augment.js +++ b/stream/street/augment.js @@ -1,11 +1,10 @@ - -var through = require('through2'), - analyze = require('../../lib/analyze'); +const through = require('through2'); +const analyze = require('../../lib/analyze'); // increase/decrease bbox bounds by this much in order to find houses which // might be slighly outside the bounds. // eg: http://geojson.io/#id=gist:anonymous/ce8b0cdd2ba83ef24cfaab49d36d8cdd&map=15/52.5011/13.3222 -var FUDGE_FACTOR = 0.005; +const FUDGE_FACTOR = 0.005; /** this stream augments the parsed data with additional fields. @@ -14,38 +13,37 @@ var FUDGE_FACTOR = 0.005; - perform libpostal normalization - apply 'fudge factor' to bbox **/ -function streamFactory(){ - return through.obj(function( street, _, next ){ - - // normalize all names - var names = []; - street.getNames().forEach( function( name ){ - names = names.concat( analyze.street( name ) ); - }); - - // if the source file contains no valid names for this polyline - if( !names.length ){ - console.error( 'street has no valid names, check your 0sv file:' ); - console.error( street.getEncodedPolyline() ); - return next(); - } - - street.setNames( names ); - - // expand bbox - var bbox = street.getBbox(); - street.setBbox({ - minX: bbox.minX -FUDGE_FACTOR, - minY: bbox.minY -FUDGE_FACTOR, - maxX: bbox.maxX +FUDGE_FACTOR, - maxY: bbox.maxY +FUDGE_FACTOR - }); - - // push augmented data downstream - this.push( street ); - - next(); + +// the transform function is executed once per batch in the stream. +const transform = (street, _, next) => { + + // normalize all names + let names = []; + street.getNames().forEach(function (name) { + names = names.concat(analyze.street(name)); }); -} -module.exports = streamFactory; + // if the source file contains no valid names for this polyline + if (!names.length) { + console.error('street has no valid names, check your 0sv file:'); + console.error(street.getEncodedPolyline()); + return next(); + } + + street.setNames(names); + + // expand bbox + const bbox = street.getBbox(); + street.setBbox({ + minX: bbox.minX - FUDGE_FACTOR, + minY: bbox.minY - FUDGE_FACTOR, + maxX: bbox.maxX + FUDGE_FACTOR, + maxY: bbox.maxY + FUDGE_FACTOR + }); + + // push augmented data downstream + next(null, street); +}; + +// export a function which returns a new stream +module.exports = () => through.obj(transform); diff --git a/stream/street/import.js b/stream/street/import.js index e4652113..3588983c 100644 --- a/stream/street/import.js +++ b/stream/street/import.js @@ -1,70 +1,78 @@ - const through = require('through2'); const Statistics = require('../../lib/statistics'); function streamFactory(db, done) { // sqlite3 prepared statements - var stmt = { - rtree: db.prepare('INSERT INTO rtree (id, minX, maxX, minY, maxY) VALUES ($id, $minX, $maxX, $minY, $maxY);'), - names: db.prepare('INSERT INTO names (rowid, id, name) VALUES (NULL, $id, $name);'), - line: db.prepare('INSERT INTO polyline (id, line) VALUES ($id, $line);') + const stmt = { + rtree: db.prepare(`INSERT INTO rtree (id, minX, maxX, minY, maxY) VALUES ($id, $minX, $maxX, $minY, $maxY)`), + names: db.prepare(`INSERT INTO names (rowid, id, name) VALUES (NULL, $id, $name)`), + line: db.prepare(`INSERT INTO polyline (id, line) VALUES ($id, $line)`) }; // tick import stats - var stats = new Statistics(); + const stats = new Statistics(); stats.tick(); - // create a new stream - return through.obj(function (batch, _, next) { - - // start transactio - db.transaction(() => { + // the insert function imports data from each batch + // into the database. + const insert = (batch) => { + batch.forEach((street) => { - // import batch - batch.forEach(function (street) { - - // insert names in to lookup table - street.getNames().forEach(function (name) { - stmt.names.run({ - id: street.getId(), - name: name - }); - }); - - // insert bbox in to rtree table - var bbox = street.getBbox(); - stmt.rtree.run({ + // insert names in to lookup table + street.getNames().forEach((name) => { + stmt.names.run({ id: street.getId(), - minX: bbox.minX, - maxX: bbox.maxX, - minY: bbox.minY, - maxY: bbox.maxY + name: name }); + }); - // insert line in to polyline table - stmt.line.run({ - id: street.getId(), - line: street.getEncodedPolyline() - }); + // insert bbox in to rtree table + const bbox = street.getBbox(); + stmt.rtree.run({ + id: street.getId(), + minX: bbox.minX, + maxX: bbox.maxX, + minY: bbox.minY, + maxY: bbox.maxY + }); + // insert line in to polyline table + stmt.line.run({ + id: street.getId(), + line: street.getEncodedPolyline() }); - }).deferred(); + }); + }; + + // the transform function is executed once per batch in the stream. + const transform = (batch, encoding, next) => { + + // execute transaction + db.transaction(insert).deferred(batch); // update statistics stats.inc(batch.length); - // wait for transaction to complete before continuing + // ready for more data next(); + }; - }, function (next) { + // the flush function is executed once at the end of the stream. + const flush = (next) => { // stop stats ticker stats.tick(false); + // call streamFactory callback to indicate the stream is complete. done(); + + // indicate the stream has ended and all work has been complete. next(); - }); + }; + + // create a new stream + return through.obj(transform, flush); } module.exports = streamFactory; diff --git a/stream/vertices/augment.js b/stream/vertices/augment.js index e814f7d8..f041c743 100644 --- a/stream/vertices/augment.js +++ b/stream/vertices/augment.js @@ -2,7 +2,6 @@ const _ = require('lodash'); var through = require('through2'), polyline = require('@mapbox/polyline'), project = require('../../lib/project'), - analyze = require('../../lib/analyze'), interpolate = require('../../lib/interpolate'); // polyline precision From 528fb83e2b5f38befd168ce34d9ebddfd7ae95b0 Mon Sep 17 00:00:00 2001 From: missinglink Date: Mon, 16 Mar 2020 13:51:00 +0100 Subject: [PATCH 3/8] feat(sqlite): move query caching logic to class --- query/DynamicQueryCache.js | 76 ++++++++++++++++ query/extract.js | 26 +----- query/lookup.js | 37 ++------ query/near.js | 9 +- query/search.js | 24 +---- query/street.js | 34 ++----- test/_unit.js | 3 +- test/query/DynamicQueryCache.js | 156 ++++++++++++++++++++++++++++++++ 8 files changed, 263 insertions(+), 102 deletions(-) create mode 100644 query/DynamicQueryCache.js create mode 100644 test/query/DynamicQueryCache.js diff --git a/query/DynamicQueryCache.js b/query/DynamicQueryCache.js new file mode 100644 index 00000000..f4c4f9f5 --- /dev/null +++ b/query/DynamicQueryCache.js @@ -0,0 +1,76 @@ +const _ = require('lodash'); + +const validateBaseQuery = (query) => { + if (!_.isString(query) || _.isEmpty(query)) { + throw new Error('invalid base query'); + } +}; + +const validatePlaceholder = (placeholder, sql) => { + if (!_.isString(placeholder) || _.isEmpty(placeholder)) { + throw new Error('invalid placeholder'); + } + if (!sql.includes(placeholder)) { + throw new Error('sql does not contain placeholder'); + } +}; + +const validateMapFunction = (map) => { + if (!_.isFunction(map) || map.length !== 1) { + throw new Error('invalid map function'); + } +}; + +const validateDelimiter = (delim) => { + if (!_.isString(delim) || _.isEmpty(delim)) { + throw new Error('invalid delimiter'); + } +}; + +const validateCount = (count) => { + if (!_.isFinite(count) || count <= 0) { + throw new Error('invalid count'); + } +}; + +const validateTotalRequiredCounts = (conditions, counts) => { + if (conditions.length !== counts.length) { + throw new Error(`invalid counts supplied ${counts.length}, requires ${conditions.length}`); + } +}; + +class DynamicQueryCache { + constructor(baseQuery){ + validateBaseQuery(baseQuery); + this.sql = baseQuery; + this.cache = {}; + this.conditions = []; + } + addDynamicCondition(placeholder, map, delimiter = 'OR') { + validatePlaceholder(placeholder, this.sql); + validateMapFunction(map); + validateDelimiter(delimiter); + this.conditions.push({placeholder, map, delimiter}); + } + _generateDynamicSQL(...dynamicCounts) { + dynamicCounts.forEach(validateCount); + validateTotalRequiredCounts(this.conditions, dynamicCounts); + let sql = this.sql; + dynamicCounts.forEach((count, i) => { + const condition = this.conditions[i]; + const replacement = _.times(count, condition.map).join(` ${condition.delimiter} `); + sql = sql.replace(condition.placeholder, replacement); + }); + return sql; + } + getStatement(db, ...dynamicCounts){ + const key = [db.name].concat(dynamicCounts).join(':'); + if (!_.has(this.cache, key)) { + const sql = this._generateDynamicSQL(...dynamicCounts); + this.cache[key] = db.prepare(sql); + } + return this.cache[key]; + } +} + +module.exports = DynamicQueryCache; diff --git a/query/extract.js b/query/extract.js index f4953110..879d8c67 100644 --- a/query/extract.js +++ b/query/extract.js @@ -1,4 +1,4 @@ -const _ = require('lodash'); +const DynamicQueryCache = require('./DynamicQueryCache'); // maximum names to match on const MAX_NAMES = 10; @@ -19,24 +19,8 @@ const SQL = ` LIMIT ${MAX_MATCHES}; `; -// SQL prepared statements dont easily support variable length inputs. -// This function dynamically generates a SQL query based on the number -// of 'name' conditions required. -function generateDynamicSQL(nameCount) { - const conditions = _.times(nameCount, (i) => `(street.names.name=$name${i})`); - return SQL.replace('%%NAME_CONDITIONS%%', conditions.join(' OR ')); -} - -// Reusing prepared statements can have a ~10% perf benefit -// Note: the cache is global and so must be unique per database. -const cache = []; -function statementCache(db, nameCount) { - const key = `${nameCount}:${db.name}`; - if (!cache[key]) { - cache[key] = db.prepare(generateDynamicSQL(nameCount)); - } - return cache[key]; -} +const cache = new DynamicQueryCache(SQL); +cache.addDynamicCondition('%%NAME_CONDITIONS%%', (i) => `(street.names.name=$name${i})`); module.exports = function( db, point, names ){ @@ -46,10 +30,10 @@ module.exports = function( db, point, names ){ } // total amount of names to consider for search - const nameCount = Math.min( names.length, MAX_NAMES ); + const nameCount = Math.min(names.length, MAX_NAMES); // use a prepared statement from cache (or generate one if not yet cached) - const stmt = statementCache(db, nameCount); + const stmt = cache.getStatement(db, nameCount); // query params const params = { diff --git a/query/lookup.js b/query/lookup.js index 9185868b..dceb06c4 100644 --- a/query/lookup.js +++ b/query/lookup.js @@ -1,4 +1,4 @@ -const _ = require('lodash'); +const DynamicQueryCache = require('./DynamicQueryCache'); // maximum names to match on const MAX_NAMES = 10; @@ -18,31 +18,12 @@ const SQL = ` LIMIT ${MAX_MATCHES} `; -// SQL prepared statements dont easily support variable length inputs. -// This function dynamically generates a SQL query based on the number -// of 'name' and 'point' conditions required. -function generateDynamicSQL(pointCount, nameCount) { - const nameConditions = _.times(nameCount, (i) => `(street.names.name=$name${i})`); - const pointConditions = _.times(pointCount, (i) => `( - street.rtree.minX<$point${i}x AND street.rtree.maxX>$point${i}x AND - street.rtree.minY<$point${i}y AND street.rtree.maxY>$point${i}y - )`); - - return SQL - .replace('%%POINT_CONDITIONS%%', pointConditions.join(' OR ')) - .replace('%%NAME_CONDITIONS%%', nameConditions.join(' OR ')); -} - -// Reusing prepared statements can have a ~10% perf benefit -// Note: the cache is global and so must be unique per database. -const cache = []; -function statementCache(db, pointCount, nameCount) { - const key = `${nameCount}:${pointCount}:${db.name}`; - if (!cache[key]) { - cache[key] = db.prepare(generateDynamicSQL(pointCount, nameCount)); - } - return cache[key]; -} +const cache = new DynamicQueryCache(SQL); +cache.addDynamicCondition('%%NAME_CONDITIONS%%', (i) => `(street.names.name=$name${i})`); +cache.addDynamicCondition('%%POINT_CONDITIONS%%', (i) => `( + street.rtree.minX<$point${i}x AND street.rtree.maxX>$point${i}x AND + street.rtree.minY<$point${i}y AND street.rtree.maxY>$point${i}y +)`); module.exports = function( db, names, points ){ @@ -55,8 +36,8 @@ module.exports = function( db, names, points ){ const nameCount = Math.min(names.length, MAX_NAMES); const pointCount = Math.min(points.length, MAX_POINTS); - // use a prepared statement from cache (or generate one if not yet cached) - const stmt = statementCache(db, pointCount, nameCount); + // get prepared statement from cache (or generate one if not yet cached) + const stmt = cache.getStatement(db, nameCount, pointCount); // create a variable array of params to bind to query var params = {}; diff --git a/query/near.js b/query/near.js index 56be9b8b..c011a446 100644 --- a/query/near.js +++ b/query/near.js @@ -1,3 +1,5 @@ +const DynamicQueryCache = require('./DynamicQueryCache'); + /** find all streets which have a bbox which envelops the specified point; regardless of their names. **/ @@ -14,13 +16,12 @@ const SQL = ` LIMIT ${MAX_MATCHES} `; -// prepared statement cache -var stmt; +const cache = new DynamicQueryCache(SQL); module.exports = ( db, point ) => { - // create prepared statement if one doesn't exist - if( !stmt ){ stmt = db.prepare( SQL ); } + // use a prepared statement from cache (or generate one if not yet cached) + const stmt = cache.getStatement(db); // execute statement return stmt.all({ diff --git a/query/search.js b/query/search.js index c722d2d9..895e9668 100644 --- a/query/search.js +++ b/query/search.js @@ -1,4 +1,4 @@ -const _ = require('lodash'); +const DynamicQueryCache = require('./DynamicQueryCache'); // maximum names to match on const MAX_NAMES = 10; @@ -53,24 +53,8 @@ const SQL = ` LIMIT ${MAX_MATCHES} `; -// SQL prepared statements dont easily support variable length inputs. -// This function dynamically generates a SQL query based on the number -// of 'name' conditions required. -function generateDynamicSQL(nameCount){ - const conditions = _.times(nameCount, (i) => `(street.names.name=$name${i})`); - return SQL.replace('%%NAME_CONDITIONS%%', conditions.join(' OR ')); -} - -// Reusing prepared statements can have a ~10% perf benefit -// Note: the cache is global and so must be unique per database. -const cache = []; -function statementCache(db, nameCount){ - const key = `${nameCount}:${db.name}`; - if (!cache[key]) { - cache[key] = db.prepare(generateDynamicSQL(nameCount)); - } - return cache[key]; -} +const cache = new DynamicQueryCache(SQL); +cache.addDynamicCondition('%%NAME_CONDITIONS%%', (i) => `(street.names.name=$name${i})`); module.exports = function( db, point, number, names ){ // error checking @@ -82,7 +66,7 @@ module.exports = function( db, point, number, names ){ const nameCount = Math.min( names.length, MAX_NAMES ); // use a prepared statement from cache (or generate one if not yet cached) - const stmt = statementCache(db, nameCount); + const stmt = cache.getStatement(db, nameCount); // query params const params = { diff --git a/query/street.js b/query/street.js index 5a9fcc4c..45b40186 100644 --- a/query/street.js +++ b/query/street.js @@ -1,34 +1,12 @@ -const _ = require('lodash'); - -const SQL = ` +const DynamicQueryCache = require('./DynamicQueryCache'); +const cache = new DynamicQueryCache(` SELECT * FROM polyline JOIN names ON polyline.id = names.id WHERE polyline.id IN ( %%IDS%% ) LIMIT 10 -`; - -// SQL prepared statements dont easily support variable length inputs. -// This function dynamically generates a SQL query based on the number -// of 'id' conditions required. -function generateDynamicSQL(idCount) { - const conditions = _.times(idCount, (i) => `?`); - return SQL.replace('%%IDS%%', conditions.join(',')); -} - -// Reusing prepared statements can have a ~10% perf benefit -// Note: the cache is global and so must be unique per database. -const cache = []; -function statementCache(db, idCount) { - const key = `${idCount}:${db.name}`; - if (!cache[key]) { - cache[key] = db.prepare(generateDynamicSQL(idCount)); - } - return cache[key]; -} - -module.exports = function( db, ids ){ - const stmt = statementCache(db, ids.length); +`); +cache.addDynamicCondition('%%IDS%%', (i) => `?`, ','); - // execute statement - return stmt.all(ids); +module.exports = (db, ids) => { + return cache.getStatement(db, ids.length).all(ids); }; diff --git a/test/_unit.js b/test/_unit.js index 627ce56e..7e8b2cf6 100644 --- a/test/_unit.js +++ b/test/_unit.js @@ -15,7 +15,8 @@ var tests = [ require('./stream/osm/convert.js'), require('./stream/osm/delimited_ranges.js'), // require('./script/js/adapter/CensusFTP'), - require('./script/js/adapter/CensusS3Mirror') + require('./script/js/adapter/CensusS3Mirror'), + require('./query/DynamicQueryCache') ]; tests.map(function(t) { diff --git a/test/query/DynamicQueryCache.js b/test/query/DynamicQueryCache.js new file mode 100644 index 00000000..6c274421 --- /dev/null +++ b/test/query/DynamicQueryCache.js @@ -0,0 +1,156 @@ +const DynamicQueryCache = require('../../query/DynamicQueryCache'); + +class MockDatabase { + constructor() { + this.name = 'mock'; + this.sql = ''; + } + prepare(sql) { + this.sql = sql; + return this; + } +} + +module.exports.tests = {}; + +module.exports.tests.interface = function (test) { + test('interface', (t) => { + t.equal(typeof DynamicQueryCache, 'function'); + t.equal(DynamicQueryCache.length, 1); + t.end(); + }); +}; + +module.exports.tests.constructor = function (test) { + test('constructor', (t) => { + var cache = new DynamicQueryCache(`SELECT 'test'`); + t.equal(cache.sql, `SELECT 'test'`); + t.deepEquals(cache.cache, {}); + t.deepEquals(cache.conditions, []); + t.end(); + }); + test('constructor - invalid base query', (t) => { + t.throws(() => { const cache = new DynamicQueryCache(undefined); }, /invalid base query/); + t.throws(() => { const cache = new DynamicQueryCache(null); }, /invalid base query/); + t.throws(() => { const cache = new DynamicQueryCache([]); }, /invalid base query/); + t.throws(() => { const cache = new DynamicQueryCache({}); }, /invalid base query/); + t.throws(() => { const cache = new DynamicQueryCache(1); }, /invalid base query/); + t.throws(() => { const cache = new DynamicQueryCache(''); }, /invalid base query/); + t.end(); + }); +}; + +module.exports.tests.addDynamicCondition = function (test) { + test('addDynamicCondition', (t) => { + const cache = new DynamicQueryCache(`SELECT * WHERE %%TEST%%`); + const placeholder = '%%TEST%%'; + const map = (i) => `$test${i}`; + cache.addDynamicCondition(placeholder, map); + t.deepEquals(cache.conditions, [{placeholder, map, delimiter: 'OR'}]); + t.end(); + }); + test('addDynamicCondition - invalid placeholder', (t) => { + const cache = new DynamicQueryCache(`SELECT * WHERE %%TEST%%`); + t.throws(() => { cache.addDynamicCondition(undefined); }, /invalid placeholder/); + t.throws(() => { cache.addDynamicCondition(null); }, /invalid placeholder/); + t.throws(() => { cache.addDynamicCondition([]); }, /invalid placeholder/); + t.throws(() => { cache.addDynamicCondition({}); }, /invalid placeholder/); + t.throws(() => { cache.addDynamicCondition(1); }, /invalid placeholder/); + t.throws(() => { cache.addDynamicCondition(''); }, /invalid placeholder/); + + t.throws(() => { cache.addDynamicCondition('%%FOO%%', (a) => a); }, /sql does not contain placeholder/); + t.end(); + }); + test('addDynamicCondition - invalid map function', (t) => { + const cache = new DynamicQueryCache(`SELECT * WHERE %%TEST%%`); + const p = '%%TEST%%'; + t.throws(() => { cache.addDynamicCondition(p, undefined); }, /invalid map function/); + t.throws(() => { cache.addDynamicCondition(p, null); }, /invalid map function/); + t.throws(() => { cache.addDynamicCondition(p, []); }, /invalid map function/); + t.throws(() => { cache.addDynamicCondition(p, {}); }, /invalid map function/); + t.throws(() => { cache.addDynamicCondition(p, 1); }, /invalid map function/); + t.throws(() => { cache.addDynamicCondition(p, ''); }, /invalid map function/); + t.throws(() => { cache.addDynamicCondition(p, () => {}); }, /invalid map function/); + t.throws(() => { cache.addDynamicCondition(p, (a, b) => {}); }, /invalid map function/); + t.end(); + }); + test('addDynamicCondition - default delimiter', (t) => { + const cache = new DynamicQueryCache(`SELECT * WHERE %%TEST%%`); + const p = '%%TEST%%'; + const m = (a) => a; + cache.addDynamicCondition(p, m, undefined); + t.deepEquals(cache.conditions[0].delimiter, 'OR'); + t.end(); + }); + test('addDynamicCondition - invalid delimiter', (t) => { + const cache = new DynamicQueryCache(`SELECT * WHERE %%TEST%%`); + const p = '%%TEST%%'; + const m = (a) => a; + t.throws(() => { cache.addDynamicCondition(p, m, null); }, /invalid delimiter/); + t.throws(() => { cache.addDynamicCondition(p, m, []); }, /invalid delimiter/); + t.throws(() => { cache.addDynamicCondition(p, m, {}); }, /invalid delimiter/); + t.throws(() => { cache.addDynamicCondition(p, m, 1); }, /invalid delimiter/); + t.throws(() => { cache.addDynamicCondition(p, m, ''); }, /invalid delimiter/); + t.end(); + }); +}; + +module.exports.tests._generateDynamicSQL = function (test) { + test('_generateDynamicSQL - no conditions', (t) => { + const cache = new DynamicQueryCache(`SELECT * WHERE %%TEST%%`); + const sql = cache._generateDynamicSQL(); + t.equals(sql, `SELECT * WHERE %%TEST%%`); + t.end(); + }); + test('_generateDynamicSQL - no dynamic counts', (t) => { + const cache = new DynamicQueryCache(`SELECT * WHERE %%TEST%%`); + cache.addDynamicCondition('%%TEST%%', (i) => `$test${i}`); + t.throws(() => { cache._generateDynamicSQL(); }, /invalid counts supplied 0, requires 1/ ); + t.end(); + }); + test('_generateDynamicSQL - with dynamic counts', (t) => { + const cache = new DynamicQueryCache(`SELECT * WHERE %%TEST%%`); + cache.addDynamicCondition('%%TEST%%', (i) => `$test${i}`); + const sql = cache._generateDynamicSQL(3); + t.equals(sql, `SELECT * WHERE $test0 OR $test1 OR $test2`); + t.end(); + }); +}; + +module.exports.tests.getStatement = function (test) { + test('getStatement - no conditions', (t) => { + const cache = new DynamicQueryCache(`SELECT * WHERE %%TEST%%`); + const db = new MockDatabase(); + const stmt = cache.getStatement(db); + t.deepEquals(cache.cache, { mock: { name: 'mock', sql: 'SELECT * WHERE %%TEST%%' } }); + t.equals(stmt.sql, `SELECT * WHERE %%TEST%%`); + t.end(); + }); + test('getStatement - no dynamic counts', (t) => { + const cache = new DynamicQueryCache(`SELECT * WHERE %%TEST%%`); + const db = new MockDatabase(); + cache.addDynamicCondition('%%TEST%%', (i) => `$test${i}`); + t.throws(() => { cache.getStatement(db); }, /invalid counts supplied 0, requires 1/); + t.end(); + }); + test('getStatement - with dynamic counts', (t) => { + const cache = new DynamicQueryCache(`SELECT * WHERE %%TEST%%`); + const db = new MockDatabase(); + cache.addDynamicCondition('%%TEST%%', (i) => `$test${i}`); + const stmt = cache.getStatement(db, 3); + t.deepEquals(cache.cache, { 'mock:3': { name: 'mock', sql: `SELECT * WHERE $test0 OR $test1 OR $test2` } }); + t.equals(stmt.sql, `SELECT * WHERE $test0 OR $test1 OR $test2`); + t.end(); + }); +}; + +module.exports.all = function (tape) { + + function test(name, testFunction) { + return tape('DynamicQueryCache: ' + name, testFunction); + } + + for (var testCase in module.exports.tests) { + module.exports.tests[testCase](test); + } +}; From a588279bf5a170001413922744144ad79562d277 Mon Sep 17 00:00:00 2001 From: missinglink Date: Fri, 20 Mar 2020 15:57:11 +0100 Subject: [PATCH 4/8] fix(query): fix SQL syntax error --- cmd/server.js | 23 ++++++++++++++++++----- query/extract.js | 2 +- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/cmd/server.js b/cmd/server.js index 5ef42629..5b6033f6 100644 --- a/cmd/server.js +++ b/cmd/server.js @@ -52,6 +52,19 @@ function log() { }); } +// handle errors as either Error objects or strings +function formatError( err ) { + if (err instanceof Error) { + return { + type: err.name, + message: err.message + }; + } + return { + message: err + }; +} + // search with geojson view // eg: http://localhost:3000/search/geojson?lat=-41.288788&lon=174.766843&number=16&street=glasgow%20street app.get('/search/geojson', function( req, res ){ @@ -61,7 +74,7 @@ app.get('/search/geojson', function( req, res ){ var street = req.query.street; conn.search.query( point, number, street, function( err, point ){ - if( err ){ return res.status(400).json( err ); } + if( err ){ return res.status(400).json( formatError( err ) ); } if( !point ){ return res.status(200).json({}); } res.json( pretty.geojson.point( point, point.lon, point.lat ) ); @@ -77,7 +90,7 @@ app.get('/search/table', function( req, res ){ var street = req.query.street; conn.search.query( point, number, street, function( err, point ){ - if( err ){ return res.status(400).json( err ); } + if( err ){ return res.status(400).json( formatError( err ) ); } if( !point ){ return res.status(200).send(''); } res.setHeader('Content-Type', 'text/html'); @@ -93,7 +106,7 @@ app.get('/extract/geojson', function( req, res ){ var names = req.query.names ? req.query.names.split(',') : []; conn.extract.query( point, names, function( err, data ){ - if( err ){ return res.status(400).json( err ); } + if( err ){ return res.status(400).json( formatError( err ) ); } if( !data ){ return res.status(200).json({}); } res.json( pretty.geojson( data ) ); @@ -125,7 +138,7 @@ app.get('/street/near/geojson', function( req, res ){ var max_distance = req.query.dist || 0.01; conn.near.query( point, function( err, ordered ){ - if( err ){ return res.status(400).json( err ); } + if( err ){ return res.status(400).json( formatError( err ) ); } if( !ordered || !ordered.length ){ return res.status(200).json({}); } // remove points over a certain distance (in degrees) @@ -161,7 +174,7 @@ app.get('/street/near/geojson', function( req, res ){ app.get('/street/:id/geojson', function( req, res ){ conn.street.query( req.params.id.split(','), function( err, rows ){ - if( err ){ return res.status(400).json( err ); } + if( err ){ return res.status(400).json( formatError( err ) ); } if( !rows || !rows.length ){ return res.status(200).json({}); } // dedupe diff --git a/query/extract.js b/query/extract.js index 879d8c67..890a2bb2 100644 --- a/query/extract.js +++ b/query/extract.js @@ -15,7 +15,7 @@ const SQL = ` street.rtree.minY<=$lat AND street.rtree.maxY>=$lat ) AND ( %%NAME_CONDITIONS%% ) - ORDER BY address.housenumber ASC // @warning business logic depends on this + ORDER BY address.housenumber ASC -- @warning business logic depends on this LIMIT ${MAX_MATCHES}; `; From 4d381b9edcb62c75d19f5ebe949cf0c4368f88dd Mon Sep 17 00:00:00 2001 From: missinglink Date: Tue, 17 Mar 2020 15:24:08 +0100 Subject: [PATCH 5/8] feat(async): async-ify analyze.street() --- .jshintrc | 2 +- api/extract.js | 7 ++++--- api/search.js | 4 ++-- cmd/server.js | 4 ++-- lib/analyze.js | 4 ++-- lib/asyncForEach.js | 8 ++++++++ stream/address/lookup.js | 4 ++-- stream/street/augment.js | 7 ++++--- test/lib/analyze.js | 12 ++++++------ 9 files changed, 31 insertions(+), 21 deletions(-) create mode 100644 lib/asyncForEach.js diff --git a/.jshintrc b/.jshintrc index 159fb604..aa3f1574 100644 --- a/.jshintrc +++ b/.jshintrc @@ -2,7 +2,7 @@ "node": true, "curly": true, "eqeqeq": true, - "esversion": 6, + "esversion": 8, "freeze": true, "immed": true, "indent": 2, diff --git a/api/extract.js b/api/extract.js index 4e9f187b..add15f0f 100644 --- a/api/extract.js +++ b/api/extract.js @@ -1,6 +1,7 @@ const Database = require('better-sqlite3'); const query = { extract: require('../query/extract') }; const analyze = require('../lib/analyze'); +const asyncForEach = require('../lib/asyncForEach'); // export setup method function setup( addressDbPath, streetDbPath ){ @@ -12,7 +13,7 @@ function setup( addressDbPath, streetDbPath ){ db.exec(`ATTACH DATABASE '${streetDbPath}' as 'street'`); // query method - var q = function( coord, names, cb ){ + var q = async function( coord, names, cb ){ var point = { lat: parseFloat( coord.lat ), @@ -20,8 +21,8 @@ function setup( addressDbPath, streetDbPath ){ }; var normalized = []; - names.forEach( function( name ){ - normalized = normalized.concat( analyze.street( name ) ); + await asyncForEach(names, async (name) => { + normalized = normalized.concat( await analyze.street( name ) ); }); // error checking diff --git a/api/search.js b/api/search.js index dfbfb829..02f02b3a 100644 --- a/api/search.js +++ b/api/search.js @@ -19,7 +19,7 @@ function setup( addressDbPath, streetDbPath ){ db.exec('PRAGMA street.mmap_size=268435456;'); // query method - var q = function( coord, number, street, cb ){ + var q = async function( coord, number, street, cb ){ var point = { lat: parseFloat( coord.lat ), @@ -31,7 +31,7 @@ function setup( addressDbPath, streetDbPath ){ var normalized = { number: analyze.housenumber( number ), - street: analyze.street( street ) + street: await analyze.street( street ) }; // error checking diff --git a/cmd/server.js b/cmd/server.js index 5b6033f6..ca30f962 100644 --- a/cmd/server.js +++ b/cmd/server.js @@ -217,10 +217,10 @@ app.use('/demo', express.static('demo')); // app.use('/builds', express.static('/data/builds')); // app.use('/builds', directory('/data/builds', { hidden: false, icons: false, view: 'details' })); -app.listen( PORT, function() { +app.listen( PORT, async function() { // force loading of libpostal - analyze.street( 'test street' ); + await analyze.street( 'test street' ); console.log( 'server listening on port', PORT ); }); diff --git a/lib/analyze.js b/lib/analyze.js index cc13cf99..4eea5a49 100644 --- a/lib/analyze.js +++ b/lib/analyze.js @@ -31,11 +31,11 @@ function get_libpostal() { /** analyze input streetname string and return a list of expansions. **/ -function street( streetName ){ +async function street( streetName ){ const postal = get_libpostal(); // use libpostal to expand the address - var expansions = postal.expand.expand_address( streetName ); + let expansions = await postal.expand.expand_address( streetName ); // remove ordinals expansions = expansions.map(function( item ){ diff --git a/lib/asyncForEach.js b/lib/asyncForEach.js new file mode 100644 index 00000000..69c6cec7 --- /dev/null +++ b/lib/asyncForEach.js @@ -0,0 +1,8 @@ +// async friendly version of Array.forEach +async function asyncForEach(array, callback) { + for (let index = 0; index < array.length; index++) { + await callback(array[index], index, array); + } +} + +module.exports = asyncForEach; diff --git a/stream/address/lookup.js b/stream/address/lookup.js index 45400803..10d0fee1 100644 --- a/stream/address/lookup.js +++ b/stream/address/lookup.js @@ -17,7 +17,7 @@ if( hasFD3 ){ function streamFactory(db){ - return through.obj(function( batch, _, next ){ + return through.obj(async function( batch, _, next ){ // invalid batch if( !batch || !batch.length ){ @@ -30,7 +30,7 @@ function streamFactory(db){ // all street names in batch should be the same // perform libpostal normalization - var names = analyze.street( result.getStreet() ); + var names = await analyze.street( result.getStreet() ); // ensure at least one name was produced if( !names.length ){ diff --git a/stream/street/augment.js b/stream/street/augment.js index 84564222..31e7bdf2 100644 --- a/stream/street/augment.js +++ b/stream/street/augment.js @@ -1,5 +1,6 @@ const through = require('through2'); const analyze = require('../../lib/analyze'); +const asyncForEach = require('../../lib/asyncForEach'); // increase/decrease bbox bounds by this much in order to find houses which // might be slighly outside the bounds. @@ -15,12 +16,12 @@ const FUDGE_FACTOR = 0.005; **/ // the transform function is executed once per batch in the stream. -const transform = (street, _, next) => { +const transform = async (street, _, next) => { // normalize all names let names = []; - street.getNames().forEach(function (name) { - names = names.concat(analyze.street(name)); + await asyncForEach(street.getNames(), async (name) => { + names = names.concat(await analyze.street(name)); }); // if the source file contains no valid names for this polyline diff --git a/test/lib/analyze.js b/test/lib/analyze.js index 9ae15f6f..8590cbb0 100644 --- a/test/lib/analyze.js +++ b/test/lib/analyze.js @@ -4,18 +4,18 @@ var analyze = require('../../lib/analyze'); module.exports.analyze = {}; module.exports.analyze.street = function(test) { - test('street: synonym expansions', function(t) { - var perms = analyze.street('grolmanstraße'); + test('street: synonym expansions', async function(t) { + var perms = await analyze.street('grolmanstraße'); t.deepEqual(perms, ['grolmanstraße', 'grolman straße']); t.end(); }); - test('street: remove ordinals', function(t) { - var perms = analyze.street('West 26th st'); + test('street: remove ordinals', async function(t) { + var perms = await analyze.street('West 26th st'); t.deepEqual(perms, ['west 26 street', 'west 26 saint']); t.end(); }); - test('street: always returns array', function(t) { - var perms = analyze.street(''); + test('street: always returns array', async function(t) { + var perms = await analyze.street(''); t.deepEqual(perms, ['']); t.end(); }); From 12422aea88027664e5f5995f0a76bb1870408e8b Mon Sep 17 00:00:00 2001 From: missinglink Date: Tue, 17 Mar 2020 17:02:32 +0100 Subject: [PATCH 6/8] feat(libpostal): add support for libpostal over http service, adapter pattern et al. --- lib/analyze.js | 30 +- libpostal/LibpostalServiceConfig.js | 17 + libpostal/client.js | 21 + libpostal/fixture/libpostal_responses.json | 1419 ++++++++++++++++++++ libpostal/mock.js | 45 + libpostal/module.js | 17 + libpostal/service.js | 31 + package.json | 1 + readme.md | 3 - test/lib/mock_libpostal.js | 42 - 10 files changed, 1556 insertions(+), 70 deletions(-) create mode 100644 libpostal/LibpostalServiceConfig.js create mode 100644 libpostal/client.js create mode 100644 libpostal/fixture/libpostal_responses.json create mode 100644 libpostal/mock.js create mode 100644 libpostal/module.js create mode 100644 libpostal/service.js delete mode 100644 test/lib/mock_libpostal.js diff --git a/lib/analyze.js b/lib/analyze.js index 4eea5a49..63bb21ce 100644 --- a/lib/analyze.js +++ b/lib/analyze.js @@ -1,38 +1,18 @@ +const postal = require('../libpostal/client'); + // constants for controlling how we parse ranges, eg: 'α-β' // some ranges such as '1-7' are ambiguous; it could mean 'apt 7, no 1'; or // it could mean 'apt 1, no 7'; or could even be a valid range 'one to seven'. // note: these values provide a means of setting some sane defaults for which // ranges we try to parse and which ones we leave. -var MIN_RANGE = 1; // the miniumum amount β is higher than α -var MAX_RANGE = 6; // the maximum amount β is higher than α -var MIN_RANGE_HOUSENUMBER = 10; // the minimum acceptible value for both α and β - -/* - * Return the appropriate version of node-postal - */ - -var _nodepostal_module; -function get_libpostal() { - // lazy load this dependency; since it's large (~2GB RAM) and may be - // accidentally required by a process which doesn't use it. - if (!_nodepostal_module) { - // load the mock library if MOCK_LIBPOSTAL env var is set - if (process.env.MOCK_LIBPOSTAL) { - _nodepostal_module = require('../test/lib/mock_libpostal'); - // otherwise load the real thing - } else { - _nodepostal_module = require('node-postal'); - } - } - - return _nodepostal_module; -} +const MIN_RANGE = 1; // the miniumum amount β is higher than α +const MAX_RANGE = 6; // the maximum amount β is higher than α +const MIN_RANGE_HOUSENUMBER = 10; // the minimum acceptible value for both α and β /** analyze input streetname string and return a list of expansions. **/ async function street( streetName ){ - const postal = get_libpostal(); // use libpostal to expand the address let expansions = await postal.expand.expand_address( streetName ); diff --git a/libpostal/LibpostalServiceConfig.js b/libpostal/LibpostalServiceConfig.js new file mode 100644 index 00000000..489857e2 --- /dev/null +++ b/libpostal/LibpostalServiceConfig.js @@ -0,0 +1,17 @@ +const ServiceConfiguration = require('pelias-microservice-wrapper').ServiceConfiguration; + +class LibpostalServiceConfig extends ServiceConfiguration { + constructor(config) { + super('libpostal', config); + } + getUrl(params) { + return this.baseUrl + params.endpoint; + } + getParameters(params) { + return { + address: params.address + }; + } +} + +module.exports = LibpostalServiceConfig; diff --git a/libpostal/client.js b/libpostal/client.js new file mode 100644 index 00000000..12a42d64 --- /dev/null +++ b/libpostal/client.js @@ -0,0 +1,21 @@ +/* + * Return the appropriate version of node-postal + */ + +const config = require('pelias-config').generate(); +const serviceIsConfigured = config.get('services.libpostal') || config.get('api.services.libpostal'); + +// load the mock library if MOCK_LIBPOSTAL env var is set +if (process.env.MOCK_LIBPOSTAL) { + module.exports = require('./mock'); +} + +// else use the HTTP webservice when configured +else if (serviceIsConfigured) { + module.exports = require('./service'); +} + +// otherwise use the npm module +else { + module.exports = require('./module'); +} diff --git a/libpostal/fixture/libpostal_responses.json b/libpostal/fixture/libpostal_responses.json new file mode 100644 index 00000000..79572c8d --- /dev/null +++ b/libpostal/fixture/libpostal_responses.json @@ -0,0 +1,1419 @@ +{ + "glasgow street": [ + "glasgow street" + ], + "grolmanstrasse": [ + "grolmanstraße", + "grolman straße" + ], + "west 26th street": [ + "west 26th street" + ], + "rigaer strasse": [ + "rigaer strasse" + ], + "markgrafenstrasse": [ + "markgrafenstraße", + "markgrafen straße" + ], + "potsdamer platz": [ + "potsdamer platz" + ], + "nevern square": [ + "nevern square" + ], + "cemetery road": [ + "cemetery road" + ], + "grolmanstraße": [ + "grolmanstraße", + "grolman straße" + ], + "southwest 26th street": [ + "southwest 26th street" + ], + "northwest 26th street": [ + "northwest 26th street" + ], + "cr 34": [ + "creek 34", + "county road 34", + "county route 34", + "crescent 34" + ], + "west 26th street place": [ + "west 26th street place" + ], + "us 20": [ + "us 20" + ], + "west 26th street road": [ + "west 26th street road" + ], + "west 26 street": [ + "west 26 street" + ], + "rigaer straße": [ + "rigaer strasse" + ], + "markgrafenstraße": [ + "markgrafenstraße", + "markgrafen straße" + ], + "potsdamer straße": [ + "potsdamer strasse" + ], + "l 69": [ + "left 69", + "lane 69", + "l 69", + "50 69", + "level 69", + "links 69", + "lang 69", + "lange 69" + ], + "alte potsdamer straße": [ + "alte potsdamer strasse" + ], + "b 1": [ + "b 1", + "bei 1" + ], + "s+u potsdamer platz": [ + "s+u potsdamer platz" + ], + "lützowstraße/potsdamer straße": [ + "luetzowstraße potsdamer strasse", + "luetzow straße potsdamer strasse", + "lutzowstraße potsdamer strasse", + "lutzow straße potsdamer strasse" + ], + "s potsdamer platz/voßstraße": [ + "sud potsdamer platz vossstraße", + "s potsdamer platz vossstraße", + "see potsdamer platz vossstraße", + "sud potsdamer platz voss straße", + "s potsdamer platz voss straße", + "see potsdamer platz voss straße" + ], + "potsdamer platz arkaden": [ + "potsdamer platz arkaden" + ], + "l 204": [ + "left 204", + "lane 204", + "l 204", + "50 204", + "level 204", + "links 204", + "lang 204", + "lange 204" + ], + "l 40": [ + "left 40", + "lane 40", + "l 40", + "50 40", + "level 40", + "links 40", + "lang 40", + "lange 40" + ], + "kleine potsdamer straße": [ + "kleine potsdamer strasse" + ], + "l 79": [ + "left 79", + "lane 79", + "l 79", + "50 79", + "level 79", + "links 79", + "lang 79", + "lange 79" + ], + "goethestraße/potsdamer straße": [ + "goethestraße potsdamer strasse", + "goethe straße potsdamer strasse" + ], + "b 273": [ + "b 273", + "bei 273" + ], + "k 6960": [ + "k 6960", + "kalea 6960", + "kamp 6960", + "katu 6960" + ], + "l 77": [ + "left 77", + "lane 77", + "l 77", + "50 77", + "level 77", + "links 77", + "lang 77", + "lange 77" + ], + "l 78": [ + "left 78", + "lane 78", + "l 78", + "50 78", + "level 78", + "links 78", + "lang 78", + "lange 78" + ], + "k 6909": [ + "k 6909", + "kalea 6909", + "kamp 6909", + "katu 6909" + ], + "l 90": [ + "left 90", + "lane 90", + "l 90", + "50 90", + "level 90", + "links 90", + "lang 90", + "lange 90" + ], + "l 86": [ + "left 86", + "lane 86", + "l 86", + "50 86", + "level 86", + "links 86", + "lang 86", + "lange 86" + ], + "l 92": [ + "left 92", + "lane 92", + "l 92", + "50 92", + "level 92", + "links 92", + "lang 92", + "lange 92" + ], + "b 102": [ + "b 102", + "bei 102" + ], + "k 7220": [ + "k 7220", + "kalea 7220", + "kamp 7220", + "katu 7220" + ], + "k 7221": [ + "k 7221", + "kalea 7221", + "kamp 7221", + "katu 7221" + ], + "k 7": [ + "k 7", + "kalea 7", + "kamp 7", + "katu 7" + ], + "willow avenue": [ + "willow avenue" + ], + "cr 634": [ + "creek 634", + "county road 634", + "county route 634", + "crescent 634" + ], + "brookwillow avenue": [ + "brookwillow avenue" + ], + "willow ave": [ + "willow avenue" + ], + "1016-18 willow avenue": [ + "1016-18 willow avenue" + ], + "1030-1032 willow avenue": [ + "1030-1032 willow avenue" + ], + "1109-1121 willow avenue": [ + "1109-1121 willow avenue" + ], + "112-14 willow avenue": [ + "112-14 willow avenue" + ], + "116-118 willow avenue": [ + "116-118 willow avenue" + ], + "1200-22 willow avenue": [ + "1200-22 willow avenue" + ], + "1203-19 willow avenue": [ + "1203-19 willow avenue" + ], + "124-128 willow avenue": [ + "124-128 willow avenue" + ], + "1300-14 willow avenue": [ + "1300-14 willow avenue" + ], + "1316-1330 willow avenue": [ + "1316-1330 willow avenue" + ], + "1317-1319 willow avenue": [ + "1317-1319 willow avenue" + ], + "1401-1407 willow avenue": [ + "1401-1407 willow avenue" + ], + "1404-12 willow avenue": [ + "1404-12 willow avenue" + ], + "1409-11 willow avenue": [ + "1409-11 willow avenue" + ], + "1413-1425 willow avenue": [ + "1413-1425 willow avenue" + ], + "1414-1418 willow avenue": [ + "1414-1418 willow avenue" + ], + "1420-1424 willow avenue": [ + "1420-1424 willow avenue" + ], + "1426-28 willow avenue": [ + "1426-28 willow avenue" + ], + "1427-1429 willow avenue": [ + "1427-1429 willow avenue" + ], + "147-51 willow avenue": [ + "147-51 willow avenue" + ], + "1501-1503 willow avenue": [ + "1501-1503 willow avenue" + ], + "1502-1506 willow avenue": [ + "1502-1506 willow avenue" + ], + "1508-1510 willow avenue": [ + "1508-1510 willow avenue" + ], + "1512-1522 willow avenue": [ + "1512-1522 willow avenue" + ], + "1524-1530 willow avenue": [ + "1524-1530 willow avenue" + ], + "1612-1614 willow avenue": [ + "1612-1614 willow avenue" + ], + "1622-28 willow avenue": [ + "1622-28 willow avenue" + ], + "167-169 willow avenue": [ + "167-169 willow avenue" + ], + "1700-02 willow avenue": [ + "1700-02 willow avenue" + ], + "1714-16 willow avenue": [ + "1714-16 willow avenue" + ], + "175-177 willow avenue": [ + "175-177 willow avenue" + ], + "1801-13 willow avenue": [ + "1801-13 willow avenue" + ], + "181-183 willow avenue": [ + "181-183 willow avenue" + ], + "182-184 willow avenue": [ + "182-184 willow avenue" + ], + "18oo willow avenue": [ + "18oo willow avenue" + ], + "19 willow avenue": [ + "19 willow avenue" + ], + "200-202 willow avenue": [ + "200-202 willow avenue" + ], + "201-215 willow avenue": [ + "201-215 willow avenue" + ], + "20-22 willow avenue": [ + "20-22 willow avenue" + ], + "203-205 willow avenue": [ + "203-205 willow avenue" + ], + "208-212 willow avenue": [ + "208-212 willow avenue" + ], + "214-216 willow avenue": [ + "214-216 willow avenue" + ], + "223-25 willow avenue": [ + "223-25 willow avenue" + ], + "224-226 willow avenue": [ + "224-226 willow avenue" + ], + "225a willow avenue": [ + "225a willow avenue" + ], + "27-29 willow avenue": [ + "27-29 willow avenue" + ], + "27 willow avenue": [ + "27 willow avenue" + ], + "28-30 willow avenue": [ + "28-30 willow avenue" + ], + "300-336 willow avenue": [ + "300-336 willow avenue" + ], + "308a willow avenue": [ + "308a willow avenue" + ], + "309-311 willow avenue": [ + "309-311 willow avenue" + ], + "311-313 willow avenue": [ + "311-313 willow avenue" + ], + "319a willow avenue": [ + "319a willow avenue" + ], + "321a willow avenue": [ + "321a willow avenue" + ], + "322a willow avenue": [ + "322a willow avenue" + ], + "32-34 willow avenue": [ + "32-34 willow avenue" + ], + "323a willow avenue": [ + "323a willow avenue" + ], + "324a willow avenue": [ + "324a willow avenue" + ], + "325-327 willow avenue": [ + "325-327 willow avenue" + ], + "33 willow avenue": [ + "33 willow avenue" + ], + "34 willow avenue": [ + "34 willow avenue" + ], + "400a willow avenue": [ + "400a willow avenue" + ], + "413a willow avenue": [ + "413a willow avenue" + ], + "424a willow avenue": [ + "424a willow avenue" + ], + "430a willow avenue": [ + "430a willow avenue" + ], + "453a willow avenue": [ + "453a willow avenue" + ], + "455a willow avenue": [ + "455a willow avenue" + ], + "458a willow avenue": [ + "458a willow avenue" + ], + "524-34 willow avenue": [ + "524-34 willow avenue" + ], + "529-533 willow avenue": [ + "529-533 willow avenue" + ], + "54-56 willow avenue": [ + "54-56 willow avenue" + ], + "5 willow avenue": [ + "5 willow avenue" + ], + "605-607 willow avenue": [ + "605-607 willow avenue" + ], + "619-621 willow avenue": [ + "619-621 willow avenue" + ], + "708-10 willow avenue": [ + "708-10 willow avenue" + ], + "708-710 willow avenue": [ + "708-710 willow avenue" + ], + "711-13 willow avenue": [ + "711-13 willow avenue" + ], + "712-14 willow avenue": [ + "712-14 willow avenue" + ], + "725-727 willow avenue": [ + "725-727 willow avenue" + ], + "730-732 willow avenue": [ + "730-732 willow avenue" + ], + "736-738 willow avenue": [ + "736-738 willow avenue" + ], + "77-83 willow avenue": [ + "77-83 willow avenue" + ], + "800-812 willow avenue": [ + "800-812 willow avenue" + ], + "832-834 willow avenue": [ + "832-834 willow avenue" + ], + "835-837 willow avenue": [ + "835-837 willow avenue" + ], + "89-91 willow avenue": [ + "89-91 willow avenue" + ], + "902-904 willow avenue": [ + "902-904 willow avenue" + ], + "913-915 willow avenue": [ + "913-915 willow avenue" + ], + "918-920 willow avenue": [ + "918-920 willow avenue" + ], + "end willow avenue": [ + "end willow avenue" + ], + "laurel place & willow avenue": [ + "laurel place & willow avenue" + ], + "maple & willow aves": [ + "maple & willow avenues" + ], + "west willow avenue": [ + "west willow avenue" + ], + "willow ave/257 13th": [ + "willow avenue 257 13th" + ], + "willow ave/260 eighth": [ + "willow avenue 260 8th" + ], + "willow ave/aka265 7th": [ + "willow avenue aka265 7th", + "willow avenue aka 265 7th" + ], + "willow avenue (a & b)": [ + "willow avenue a & b" + ], + "willow avenue extension": [ + "willow avenue extension" + ], + "willow avenue (lake)": [ + "willow avenue lake " + ], + "willow avenue & railroad": [ + "willow avenue & railroad" + ], + "willow avenue rear": [ + "willow avenue rear" + ], + "willow avenue (rear)": [ + "willow avenue rear " + ], + "willow avenue - rear": [ + "willow avenue rear" + ], + "nevern road": [ + "nevern road" + ], + "nevern place": [ + "nevern place" + ], + "lingshire rd": [ + "lingshire road" + ], + "smith river rd": [ + "smith river road" + ], + "butte creek rd": [ + "butte creek road" + ], + "fort logan rd": [ + "fort logan road" + ], + "hwy 360": [ + "highway 360" + ], + "state hwy 360": [ + "state hwy 360" + ], + "canyon rd": [ + "canyon road" + ], + "birch creek rd": [ + "birch creek road" + ], + "big sky ln": [ + "big sky lane", + "big sky line" + ], + "birky rd": [ + "birky road" + ], + "rostad rd": [ + "rostad road" + ], + "us hwy 89": [ + "us highway 89" + ], + "us hwy 12 e": [ + "us highway 12 east", + "us highway 12 e" + ], + "feddes rd": [ + "feddes road" + ], + "bonanza creek rd": [ + "bonanza creek road" + ], + "nat for dev rd 585": [ + "nat for dev road 585" + ], + "state hwy 294": [ + "state hwy 294" + ], + "cottonwood creek rd": [ + "cottonwood creek road" + ], + "findon ln": [ + "findon lane", + "findon line" + ], + "main st": [ + "main street", + "main saint" + ], + "grant ave": [ + "grant avenue" + ], + "b st": [ + "b st" + ], + "c st": [ + "center street", + "center saint", + "central street", + "central saint", + "c street", + "100 street", + "c saint", + "100 saint", + "centre street", + "centre saint" + ], + "merino ave": [ + "merino avenue", + "merino avenida" + ], + "2nd st": [ + "2nd street", + "2nd saint" + ], + "1st st": [ + "1st street", + "1st saint" + ], + "3rd ave": [ + "3rd avenue" + ], + "new dorsey rd": [ + "new dorsey road" + ], + "railroad ave": [ + "railroad avenue" + ], + "2nd ave": [ + "2nd avenue" + ], + "forest lake rd": [ + "forest lake road" + ], + "crazy m ranch rd": [ + "crazy m ranch road", + "crazy 1000 ranch road" + ], + "brekey rd": [ + "brekey road" + ], + "smith creek rd": [ + "smith creek road" + ], + "schendel rd": [ + "schendel road" + ], + "mike day dr": [ + "mike day drive", + "mike day doctor" + ], + "mountain view trl": [ + "mountain view trail" + ], + "sixteen mile rd": [ + "16 mile road" + ], + "co rd 119": [ + "county road 119" + ], + "sheep creek rd": [ + "sheep creek road" + ], + "newlan creek rd": [ + "newlan creek road" + ], + "stud horse rd": [ + "stud horse road" + ], + "bingham ln": [ + "bingham lane", + "bingham line" + ], + "jackson ln rd": [ + "jackson lane road", + "jackson line road" + ], + "monroe st": [ + "monroe street", + "monroe saint" + ], + "1st ave nw": [ + "1st avenue northwest", + "1st avenue nw" + ], + "n central ave": [ + "north central avenida", + "north central avenue", + "n central avenida", + "n central avenue", + "nosso central avenida", + "nosso central avenue", + "norte central avenida", + "norte central avenue" + ], + "1st ave ne": [ + "1st avenue ne", + "1st avenue nebraska", + "1st avenue northeast" + ], + "e baker st": [ + "east baker street", + "east baker saint", + "e baker street", + "e baker saint" + ], + "e grove st": [ + "east grove street", + "east grove saint", + "e grove street", + "e grove saint" + ], + "e woodson st": [ + "east woodson street", + "east woodson saint", + "e woodson street", + "e woodson saint" + ], + "2nd ave ne": [ + "2nd avenue ne", + "2nd avenue nebraska", + "2nd avenue northeast" + ], + "3rd ave ne": [ + "3rd avenue ne", + "3rd avenue nebraska", + "3rd avenue northeast" + ], + "e laramie st": [ + "east laramie street", + "east laramie saint", + "e laramie street", + "e laramie saint" + ], + "e wright st": [ + "east wright street", + "east wright saint", + "e wright street", + "e wright saint" + ], + "e washington st": [ + "east washington street", + "east washington saint", + "e washington street", + "e washington saint" + ], + "4th ave ne": [ + "4th avenue ne", + "4th avenue nebraska", + "4th avenue northeast" + ], + "5th ave ne": [ + "5th avenue ne", + "5th avenue nebraska", + "5th avenue northeast" + ], + "e hampton st": [ + "east hampton street", + "east hampton saint", + "e hampton street", + "e hampton saint" + ], + "6th ave ne": [ + "6th avenue ne", + "6th avenue nebraska", + "6th avenue northeast" + ], + "w main st": [ + "west main street", + "west main saint", + "w main street", + "w main saint" + ], + "e main st": [ + "east main street", + "east main saint", + "e main street", + "e main saint" + ], + "main st w": [ + "main street west", + "main street w", + "main saint west", + "main saint w" + ], + "2nd ave se": [ + "2nd avenue european company", + "2nd avenue southeast", + "2nd avenue suite", + "2nd avenue se" + ], + "3rd ave se": [ + "3rd avenue european company", + "3rd avenue southeast", + "3rd avenue suite", + "3rd avenue se" + ], + "e houston st": [ + "east houston street", + "east houston saint", + "e houston street", + "e houston saint" + ], + "4th ave se": [ + "4th avenue european company", + "4th avenue southeast", + "4th avenue suite", + "4th avenue se" + ], + "e lincoln st": [ + "east lincoln street", + "east lincoln saint", + "e lincoln street", + "e lincoln saint" + ], + "5th ave se": [ + "5th avenue european company", + "5th avenue southeast", + "5th avenue suite", + "5th avenue se" + ], + "1st ave se": [ + "1st avenue european company", + "1st avenue southeast", + "1st avenue suite", + "1st avenue se" + ], + "e chilton st": [ + "east chilton street", + "east chilton saint", + "e chilton street", + "e chilton saint" + ], + "e jefferson st": [ + "east jefferson street", + "east jefferson saint", + "e jefferson street", + "e jefferson saint" + ], + "10th ave sw": [ + "10th avenue southwest", + "10th avenue sw" + ], + "s central ave": [ + "south central avenida", + "south central avenue", + "san central avenida", + "san central avenue", + "s central avenida", + "s central avenue", + "sul central avenida", + "sul central avenue", + "sao central avenida", + "sao central avenue", + "senhor central avenida", + "senhor central avenue" + ], + "e alabama st": [ + "east alabama street", + "east alabama saint", + "e alabama street", + "e alabama saint" + ], + "e brown st": [ + "east brown street", + "east brown saint", + "e brown street", + "e brown saint" + ], + "5th ave sw": [ + "5th avenue southwest", + "5th avenue sw" + ], + "e crawford st": [ + "east crawford street", + "east crawford saint", + "e crawford street", + "e crawford saint" + ], + "sw maginnis st": [ + "southwest maginnis street", + "southwest maginnis saint", + "sw maginnis street", + "sw maginnis saint" + ], + "4th ave sw": [ + "4th avenue southwest", + "4th avenue sw" + ], + "3rd ave sw": [ + "3rd avenue southwest", + "3rd avenue sw" + ], + "e maginnis st": [ + "east maginnis street", + "east maginnis saint", + "e maginnis street", + "e maginnis saint" + ], + "folsom st": [ + "folsom street", + "folsom saint" + ], + "sw folsom st": [ + "southwest folsom street", + "southwest folsom saint", + "sw folsom street", + "sw folsom saint" + ], + "se folsom st": [ + "european company folsom street", + "european company folsom saint", + "southeast folsom street", + "southeast folsom saint", + "suite folsom street", + "suite folsom saint", + "se folsom street", + "se folsom saint" + ], + "w folsom st": [ + "west folsom street", + "west folsom saint", + "w folsom street", + "w folsom saint" + ], + "e folsom st": [ + "east folsom street", + "east folsom saint", + "e folsom street", + "e folsom saint" + ], + "e hancock st": [ + "east hancock street", + "east hancock saint", + "e hancock street", + "e hancock saint" + ], + "sw garfield st": [ + "southwest garfield street", + "southwest garfield saint", + "sw garfield street", + "sw garfield saint" + ], + "w garfield st": [ + "west garfield street", + "west garfield saint", + "w garfield street", + "w garfield saint" + ], + "e garfield st": [ + "east garfield street", + "east garfield saint", + "e garfield street", + "e garfield saint" + ], + "e south st": [ + "east south street", + "east south saint", + "e south street", + "e south saint" + ], + "6th ave sw": [ + "6th avenue southwest", + "6th avenue sw" + ], + "slaughter house rd": [ + "slaughter house road" + ], + "ramspeck ln": [ + "ramspeck lane", + "ramspeck line", + "ramspeck laan" + ], + "jackson ln": [ + "jackson lane", + "jackson line" + ], + "slaughter house ln": [ + "slaughter house lane", + "slaughter house line" + ], + "luppold rd": [ + "luppold road" + ], + "1st ave sw": [ + "1st avenue southwest", + "1st avenue sw" + ], + "sw 1st ave": [ + "southwest 1st avenue", + "sw 1st avenue" + ], + "sw south st": [ + "southwest south street", + "southwest south saint", + "sw south street", + "sw south saint" + ], + "2nd ave sw": [ + "2nd avenue southwest", + "2nd avenue sw" + ], + "w hampton st": [ + "west hampton street", + "west hampton saint", + "w hampton street", + "w hampton saint" + ], + "swmaginnis st": [ + "swmaginnis street", + "swmaginnis saint" + ], + "9th ave nw": [ + "9th avenue northwest", + "9th avenue nw" + ], + "jefferson st": [ + "jefferson street", + "jefferson saint" + ], + "castle mtn ranch rd": [ + "castle mountain ranch road" + ], + "fourmile creek rd": [ + "fourmile creek road" + ], + "nat for dev rd 211": [ + "nat for dev road 211" + ], + "castle mountain estate rd": [ + "castle mountain estate road" + ], + "castle mnt ests rd": [ + "castle mount estates road" + ], + "apple rd": [ + "apple road" + ], + "forest rd": [ + "forest road" + ], + "cold springs rd": [ + "cold springs road" + ], + "spring creek rd": [ + "spring creek road" + ], + "natl forest develop road 3423 rd": [ + "national forest develop road 3423 road" + ], + "ranch creek rd": [ + "ranch creek road" + ], + "ranch creek rd s": [ + "ranch creek road south", + "ranch creek road san", + "ranch creek road s" + ], + "w houston st": [ + "west houston street", + "west houston saint", + "w houston street", + "w houston saint" + ], + "cedar rd": [ + "cedar road" + ], + "birch rd": [ + "birch road" + ], + "shearer rd": [ + "shearer road" + ], + "luppold ln": [ + "luppold laan", + "luppold lane", + "luppold line" + ], + "earling ave": [ + "earling avenue" + ], + "ryan": [ + "ryan" + ], + "miller rd": [ + "miller road" + ], + "meadow creek rd": [ + "meadow creek road" + ], + "kiff rd": [ + "kiff road" + ], + "voldseth rd": [ + "voldseth road" + ], + "n badger": [ + "north badger", + "n badger" + ], + "wall st": [ + "wall street", + "wall saint" + ], + "little moose creek trl": [ + "little moose creek trail" + ], + "5th ave nw": [ + "5th avenue northwest", + "5th avenue nw" + ], + "sw hancock st": [ + "southwest hancock street", + "southwest hancock saint", + "sw hancock street", + "sw hancock saint" + ], + "sw crawford st": [ + "southwest crawford street", + "southwest crawford saint", + "sw crawford street", + "sw crawford saint" + ], + "8th ave sw": [ + "8th avenue southwest", + "8th avenue sw" + ], + "w alabama st": [ + "west alabama street", + "west alabama saint", + "w alabama street", + "w alabama saint" + ], + "forest rd south": [ + "forest road south" + ], + "us hwy 89 n": [ + "us highway 89 north", + "us highway 89 n" + ], + "west roadway": [ + "west roadway" + ], + "stagecoach trl": [ + "stagecoach trail" + ], + "mountain rd": [ + "mountain road" + ], + "2nd ave s": [ + "2nd avenue south", + "2nd avenue san", + "2nd avenue s" + ], + "bridger view cir": [ + "bridger view circle" + ], + "lucas rd": [ + "lucas road" + ], + "canyon ranch rd": [ + "canyon ranch road" + ], + "sunrise dr": [ + "sunrise drive", + "sunrise doctor" + ], + "jackrabbit ln": [ + "jackrabbit lane", + "jackrabbit line" + ], + "sagebrush trl": [ + "sagebrush trail" + ], + "arrowhead cir": [ + "arrowhead circle" + ], + "grassy mountain rd": [ + "grassy mountain road" + ], + "pine hill dr": [ + "pine hill drive", + "pine hill doctor" + ], + "cemetery rd": [ + "cemetery road" + ], + "w south st": [ + "west south street", + "west south saint", + "w south street", + "w south saint" + ], + "badger st": [ + "badger street", + "badger saint" + ], + "e monroe st": [ + "east monroe street", + "east monroe saint", + "e monroe street", + "e monroe saint" + ], + "studhorse rd": [ + "studhorse road" + ], + "folsom st w": [ + "folsom street west", + "folsom street w", + "folsom saint west", + "folsom saint w" + ], + "w chilton st": [ + "west chilton street", + "west chilton saint", + "w chilton street", + "w chilton saint" + ], + "grove st": [ + "grove street", + "grove saint" + ], + "lind ln": [ + "lind lane", + "lind line", + "lind laan" + ], + "baker st w": [ + "baker street west", + "baker street w", + "baker saint west", + "baker saint w" + ], + "south st e": [ + "south street east", + "south street e", + "south saint east", + "south saint e" + ], + "1st st s": [ + "1st street south", + "1st street san", + "1st street s", + "1st saint south", + "1st saint san", + "1st saint s" + ], + "lake rd": [ + "lake road" + ], + "mountainview ln": [ + "mountainview lane", + "mountainview line" + ], + "bachler ln": [ + "bachler lane", + "bachler line", + "bachler laan" + ], + "ryan st": [ + "ryan street", + "ryan saint" + ], + "allen gulch rd": [ + "allen gulch road" + ], + "maudlow rd": [ + "maudlow road" + ], + "s east rd": [ + "southeast road" + ], + "hitching post rd": [ + "hitching post road" + ], + "hidden trl": [ + "hidden trail" + ], + "3 rd": [ + "3 road" + ], + "goat mountain rd": [ + "goat mountain road" + ], + "sage brush trl": [ + "sage brush trail" + ], + "south st": [ + "south street", + "south saint" + ], + "zehntner ln": [ + "zehntner lane", + "zehntner line" + ], + "painted pony ln": [ + "painted pony lane", + "painted pony line" + ], + "n star rd": [ + "north star road", + "n star road" + ], + "grande rd": [ + "grande road" + ], + "goat mountain f": [ + "goat mountain f", + "goat mountain front" + ], + "panorama dr": [ + "panorama drive", + "panorama doctor" + ], + "airport rd": [ + "airport road" + ], + "2nd st s": [ + "2nd street south", + "2nd street san", + "2nd street s", + "2nd saint south", + "2nd saint san", + "2nd saint s" + ], + "berg rd": [ + "berg road" + ], + "castle mt rd": [ + "castle mt road", + "castle mount road", + "castle montana road" + ], + "castle mountain rd": [ + "castle mountain road" + ], + "brown st": [ + "brown street", + "brown saint" + ], + "crawford st w": [ + "crawford street west", + "crawford street w", + "crawford saint west", + "crawford saint w" + ], + "sw10th ave": [ + "sw10th avenue", + "southwest 10th avenue", + "sw 10th avenue" + ], + "castle town rd": [ + "castle town road" + ], + "nat for dev rd 581": [ + "nat for dev road 581" + ], + "lennep rd": [ + "lennep road" + ], + "elk ridge rd": [ + "elk ridge road" + ], + "71 ranch rd": [ + "71 ranch road" + ], + "crawford st": [ + "crawford street", + "crawford saint" + ], + "fox wood ln": [ + "fox wood lane", + "fox wood line" + ], + "foxwood ln": [ + "foxwood lane", + "foxwood line" + ], + "west 26th st": [ + "west 26th street", + "west 26th saint" + ], + "": [ + "" + ] +} \ No newline at end of file diff --git a/libpostal/mock.js b/libpostal/mock.js new file mode 100644 index 00000000..229ac94e --- /dev/null +++ b/libpostal/mock.js @@ -0,0 +1,45 @@ +const _ = require('lodash'); +const fs = require('fs'); +const path = require('path'); +const fixtureFile = path.join(__dirname, './fixture/libpostal_responses.json'); +let fixtures = require(fixtureFile); + +/* When `SEED_MOCK_LIBPOSTAL is set, this library will actually + * call through to the real libpostal and record the response. + * In this way the mock responses can be kept up to date as libpostal changes */ +const SEED_MOCK_LIBPOSTAL = _.has(process, 'env.SEED_MOCK_LIBPOSTAL'); + +const expand_address = async (address) => { + + // perform some basic normalization on the address string + const normalizedAddress = address.trim().toLowerCase(); + + // return a mocked response if one is available + const mockResponse = _.get(fixtures, normalizedAddress); + if (mockResponse) { + return Promise.resolve(mockResponse); + } + + // if no mock response is available but falling back to libpostal service + // is enabled, and return the real response + else if (SEED_MOCK_LIBPOSTAL) { + const service = require('./service'); + const resp = await service.expand.expand_address(normalizedAddress); + + // write the stored list of responses after _every_ new one is added. this is inefficient + // but it does not appear using `process.on('exit')` is reliable + fixtures[normalizedAddress] = resp; + fs.writeFileSync(fixtureFile, JSON.stringify(fixtures, null, 2)); + + return Promise.resolve(resp); + } + + // if there is no mock response and falling back to real libpostal is disabled, + // throw an error because a human has to run libpostal and find the correct response + else { + console.error(`mock libpostal has no response for ${normalizedAddress}`); + process.exit(1); + } +}; + +module.exports.expand = { expand_address }; diff --git a/libpostal/module.js b/libpostal/module.js new file mode 100644 index 00000000..be74dca7 --- /dev/null +++ b/libpostal/module.js @@ -0,0 +1,17 @@ +let postal; + +// lazy-load node-postal only as required +const lazy = () => { + if(!postal){ postal = require('node-postal'); } + return postal; +}; + +const expand_address = async (address) => { + // return empty array immediately for empty input + if (!address) { return Promise.resolve([]); } + + const resp = lazy().expand.expand_address(address); + return Promise.resolve(resp); +}; + +module.exports.expand = { expand_address }; diff --git a/libpostal/service.js b/libpostal/service.js new file mode 100644 index 00000000..5b39f00b --- /dev/null +++ b/libpostal/service.js @@ -0,0 +1,31 @@ +const util = require('util'); +const pmw = require('pelias-microservice-wrapper'); +const LibpostalServiceConfig = require('./LibpostalServiceConfig'); +const config = require('pelias-config').generate(); + +// use the 'services.libpostal' config entry if available, otherwise fall back to 'api.services.libpostal' +const serviceConfig = config.get('services.libpostal') || config.get('api.services.libpostal'); +if (!serviceConfig) { + throw new Error('Libpostal configuration not found in `services.libpostal` or `api.services.libpostal`'); +} + +// create a service +const service = pmw.service(new LibpostalServiceConfig(serviceConfig)); + +// create an object that looks like the interface to `node-postal` but uses a remote service +module.exports = { + expand: { + expand_address: async function (address) { + + // the libpostal service will not handle an empty address + // string, so return empty array immediately + if (!address) { return Promise.resolve([]); } + + const promise = util.promisify(service); + return promise({ + endpoint: 'expand', + address + }); + } + } +}; diff --git a/package.json b/package.json index e755fce6..d54173d5 100644 --- a/package.json +++ b/package.json @@ -39,6 +39,7 @@ "pbf2json": "^6.4.0", "pelias-config": "^4.0.0", "pelias-logger": "^1.2.1", + "pelias-microservice-wrapper": "^1.8.3", "quadtree": "^1.1.3", "require-dir": "^1.0.0", "serve-index": "^1.8.0", diff --git a/readme.md b/readme.md index fb8177b1..7ae0bcec 100644 --- a/readme.md +++ b/readme.md @@ -238,7 +238,6 @@ see: [source](https://github.com/pelias/interpolation/blob/master/cmd/server.js) # docker ### build docker image -This can take some time for the first build due to installing libpostal from source. ```bash docker build -t pelias/interpolation . ``` @@ -438,8 +437,6 @@ To use Interpolation service with the Pelias API, [configure the pelias config f ### install dependencies -*note:* [libpostal](https://github.com/openvenues/node-postal#troubleshooting) **must** be installed on your system before you continue! - The `Dockerfile` in this repo has complete instructions on how to install everything from scratch on Ubuntu. ### TIGER dependency on GDAL diff --git a/test/lib/mock_libpostal.js b/test/lib/mock_libpostal.js deleted file mode 100644 index 9d064750..00000000 --- a/test/lib/mock_libpostal.js +++ /dev/null @@ -1,42 +0,0 @@ -const _ = require('lodash'); -const fs = require('fs'); - -// the real libpostal module, if needed will be loaded here -let real_libpostal; - -/* When `SEED_MOCK_LIBPOSTAL is set, this library will actually - * call through to the real libpostal and record the response. - * In this way the mock responses can be kept up to date as libpostal changes */ -const use_real_libpostal = process.env.SEED_MOCK_LIBPOSTAL !== undefined; - -// put all desired responses from libpostal here -let mock_responses = require('../../test/lib/mock_libpostal_responses'); - -module.exports.expand = { - expand_address: function(input_string) { - const clean_string = input_string.trim().toLowerCase(); - // return a mocked response if one is available - if (_.has(mock_responses, clean_string)) { - return mock_responses[clean_string]; - // if no mock response is available but falling back to real libpostal - // is enabled, lazy load real libpostal, and return the real response - } else if (use_real_libpostal) { - // lazy load libpostal only when needed - if (!real_libpostal) { real_libpostal = require('node-postal'); } - - const real_response = real_libpostal.expand.expand_address(clean_string); - mock_responses[clean_string] = real_response; - - // write the stored list of responses after _every_ new one is added. this is inefficient - // but it does not appear using `process.on('exit')` is reliable - fs.writeFileSync(__dirname +'/../../test/lib/mock_libpostal_responses.json', JSON.stringify(mock_responses, null, 2)); - - return real_response; - // if there is no mock response and falling back to real libpostal is disabled, - // throw an error because a human has to run libpostal and find the correct response - } else { - console.error(`mock libpostal has no response for ${clean_string}`); - process.exit(1); - } - } -}; From 2bd95b2b454c3d640391fff7f59f2a354fab666b Mon Sep 17 00:00:00 2001 From: missinglink Date: Tue, 17 Mar 2020 17:02:32 +0100 Subject: [PATCH 7/8] feat(libpostal): add support for libpostal over http service, adapter pattern et al. --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index adee0c75..280bb08b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # base image -FROM pelias/libpostal_baseimage +FROM pelias/baseimage # dependencies RUN apt-get update && \ From dadecb7a3e444cd53f1951f5dbac20b1a0015985 Mon Sep 17 00:00:00 2001 From: missinglink Date: Tue, 17 Mar 2020 17:22:44 +0100 Subject: [PATCH 8/8] feat(server): add multi-core support via the cluster module --- cmd/server.js | 65 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 7 deletions(-) diff --git a/cmd/server.js b/cmd/server.js index ca30f962..aa7b10ee 100644 --- a/cmd/server.js +++ b/cmd/server.js @@ -1,19 +1,42 @@ +/** + The http server improves performance on multicore machines by using the + node core 'cluster' module to fork worker processes. + + The default setting is to use all available CPUs, this will spawn 32 child + processes on a 32 core machine. + + If you would like to disable this feature (maybe because you are running + inside a container) then you can do so by setting the env var CPUS=1 + + You may also specify exactly how many child processes you would like to + spawn by setting the env var to a numeric value >1, eg CPUS=4 + + If the CPUS env var is set less than 1 or greater than os.cpus().length + then the default setting will be used (using all available cores). +**/ + +const os = require('os'); const express = require('express'); +const cluster = require('cluster'); const polyline = require('@mapbox/polyline'); const search = require('../api/search'); const extract = require('../api/extract'); const street = require('../api/street'); const near = require('../api/near'); const pretty = require('../lib/pretty'); -const analyze = require('../lib/analyze'); const morgan = require( 'morgan' ); const logger = require('pelias-logger').get('interpolation'); const through = require( 'through2' ); const _ = require('lodash'); -// optionally override port using env var +// select the amount of cpus we will use +const envCpus = parseInt(process.env.CPUS, 10); +const cpus = Math.min(Math.max(envCpus || Infinity, 1), os.cpus().length); + +// optionally override port/host using env var const PORT = process.env.PORT || 3000; +const HOST = process.env.HOST || undefined; // help text if( process.argv.length !== 4 ){ @@ -217,10 +240,38 @@ app.use('/demo', express.static('demo')); // app.use('/builds', express.static('/data/builds')); // app.use('/builds', directory('/data/builds', { hidden: false, icons: false, view: 'details' })); -app.listen( PORT, async function() { +// start multi-threaded server +if (cpus > 1) { + if (cluster.isMaster) { + logger.info('[master] using %d cpus', cpus); - // force loading of libpostal - await analyze.street( 'test street' ); + // worker exit event + cluster.on('exit', (worker, code, signal) => { + logger.error('[master] worker died', worker.process.pid); + }); - console.log( 'server listening on port', PORT ); -}); + // worker fork event + cluster.on('fork', (worker, code, signal) => { + logger.info('[master] worker forked', worker.process.pid); + }); + + // fork workers + for (var c = 0; c < cpus; c++) { + cluster.fork(); + } + + } else { + app.listen(PORT, HOST, () => { + logger.info('[worker %d] listening on %s:%s', process.pid, HOST || '0.0.0.0', PORT); + }); + } +} + +// start single-threaded server +else { + logger.info('[master] using %d cpus', cpus); + + app.listen(PORT, HOST, async () => { + logger.info('[master] listening on %s:%s', HOST || '0.0.0.0', PORT); + }); +}