forked from 43South/KingboroughCouncil
-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added comment_url and documents fields
- Loading branch information
Showing
4 changed files
with
230 additions
and
167 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
"use strict"; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.insertData = exports.fieldNames = void 0; | ||
// Initialise our database | ||
const sqlite3_1 = __importDefault(require("sqlite3")); | ||
/** The field names in the SQL database. | ||
* Use this for consistent ordering of fields in queries. | ||
*/ | ||
exports.fieldNames = [ | ||
"council_reference", | ||
"address", | ||
"description", | ||
"info_url", | ||
"comment_url", | ||
"date_scraped", | ||
"on_notice_from", | ||
"on_notice_to", | ||
"documents", | ||
]; | ||
const db = new sqlite3_1.default.Database("data.sqlite"); | ||
db.serialize(function () { | ||
const createFields = exports.fieldNames | ||
.map((f, i) => { | ||
if (i === 0) | ||
return `${f} TEXT PRIMARY KEY`; | ||
return `${f} TEXT`; | ||
}) | ||
.join(", "); | ||
const createQuery = `CREATE TABLE IF NOT EXISTS data (${createFields})`; | ||
//Create new table | ||
console.log(`createQuery:`, createQuery); | ||
db.run(createQuery); | ||
}); | ||
// add the documents column if it doesn't exist | ||
db.serialize(function () { | ||
// Check if the column already exists | ||
const checkQuery = `PRAGMA table_info(data)`; | ||
db.all(checkQuery, function (err, rows) { | ||
console.log("🚀 ~ file: index.ts:62 ~ row:", rows); | ||
if (err) { | ||
console.error(err.message); | ||
return; | ||
} | ||
const rowExists = !!rows.find((r) => r.name === "documents"); | ||
if (!rowExists) { | ||
// Column doesn't exist, execute the ALTER TABLE statement | ||
db.run(`ALTER TABLE data ADD COLUMN documents TEXT`, function (err) { | ||
if (err) { | ||
console.error(err.message); | ||
return; | ||
} | ||
console.log('Column "documents" added to the table "data"'); | ||
}); | ||
} | ||
}); | ||
}); | ||
function insertData(data) { | ||
db.serialize(function () { | ||
const insertFields = exports.fieldNames.join(", "); | ||
/** Morph.io appears to persist the database across scraper runs. | ||
* This should be enough to insert new DAs, update DAs when they change, | ||
* and keep their data when they are removed from the website. | ||
*/ | ||
const insertQuery = `INSERT OR REPLACE INTO data (${insertFields}) VALUES (${exports.fieldNames | ||
.map(() => "?") | ||
.join(", ")})`; | ||
console.log(`insertQuery:`, insertQuery); | ||
/** Insert new records */ | ||
var statement = db.prepare(insertQuery); | ||
data.forEach((record) => { | ||
statement.run(record[exports.fieldNames[0]], record[exports.fieldNames[1]], record[exports.fieldNames[2]], record[exports.fieldNames[3]], record[exports.fieldNames[4]], record[exports.fieldNames[5]], record[exports.fieldNames[6]], record[exports.fieldNames[7]], record[exports.fieldNames[8]]); | ||
}); | ||
statement.finalize(); | ||
console.log("Inserted/updated", data.length, "records"); | ||
}); | ||
} | ||
exports.insertData = insertData; | ||
exports.default = db; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,31 +30,20 @@ const request_promise_1 = __importDefault(require("request-promise")); | |
const cheerio = __importStar(require("cheerio")); | ||
const sqlite3_1 = __importDefault(require("sqlite3")); | ||
const luxon_1 = require("luxon"); | ||
/** The field names in the SQL database. | ||
* Use this for consistent ordering of fields in queries. | ||
*/ | ||
const fieldNames = [ | ||
'council_reference', | ||
'address', | ||
'description', | ||
'info_url', | ||
'date_scraped', | ||
'on_notice_from', | ||
'on_notice_to', | ||
// 'more_info', | ||
]; | ||
const options = { | ||
uri: 'https://www.kingborough.tas.gov.au/development/planning-notices/', | ||
transform: (body) => cheerio.load(body), | ||
}; | ||
const db_1 = require("./db"); | ||
sqlite3_1.default.verbose(); | ||
const info_url = "https://www.kingborough.tas.gov.au/development/planning-notices/"; | ||
const comment_url = "mailto:[email protected]"; | ||
(async () => { | ||
const $ = await (0, request_promise_1.default)(options); | ||
const $ = await (0, request_promise_1.default)({ | ||
uri: "https://www.kingborough.tas.gov.au/development/planning-notices/", | ||
transform: (body) => cheerio.load(body), | ||
}); | ||
/** Table rows parsed in to the database fields */ | ||
const data = $('#list tbody tr') | ||
const data = $("#list tbody tr") | ||
.toArray() | ||
.map((el) => { | ||
const cells = $(el).find('td'); | ||
const cells = $(el).find("td"); | ||
/** The first 5 fields are just simple strings */ | ||
const strings = cells | ||
.toArray() | ||
|
@@ -66,70 +55,30 @@ sqlite3_1.default.verbose(); | |
* - Environmental Impact Assessments | ||
* - etc | ||
*/ | ||
const more_info = $(el) | ||
.find('a') | ||
const documents = $(el) | ||
.find("a") | ||
.toArray() | ||
.map((el) => $(el).attr('href')) | ||
.map((el) => $(el).attr("href")) | ||
.filter((s) => !!s); | ||
/** First link is the primary Development Application document */ | ||
const info_url = more_info.shift() || ''; | ||
/** Assign the string fields to variables */ | ||
const [council_reference, address, on_notice_from, on_notice_to, description,] = strings; | ||
return { | ||
council_reference, | ||
address: `${address}, Tasmania`, | ||
description, | ||
info_url, | ||
date_scraped: new Date().toISOString(), | ||
comment_url, | ||
date_scraped: luxon_1.DateTime.now().toISODate(), | ||
on_notice_from: | ||
/** Convert the date strings from localised version to ISO */ | ||
luxon_1.DateTime.fromFormat(on_notice_from, 'd MMM yyyy').toISODate() || '', | ||
on_notice_to: luxon_1.DateTime.fromFormat(on_notice_to, 'd MMM yyyy').toISODate() || '', | ||
luxon_1.DateTime.fromFormat(on_notice_from, "d MMM yyyy").toISODate() || "", | ||
on_notice_to: luxon_1.DateTime.fromFormat(on_notice_to, "d MMM yyyy").toISODate() || "", | ||
/** Dump the additional PDF links in to this extra variable | ||
* and figure out what to do with them later 🤷♂️. | ||
* morph.io API could be used to access this and download files */ | ||
more_info: JSON.stringify(more_info), | ||
documents: JSON.stringify(documents), | ||
}; | ||
}); | ||
console.log(data); | ||
// Open a database handle | ||
var db = new sqlite3_1.default.Database('data.sqlite'); | ||
const createFields = fieldNames | ||
.map((f, i) => { | ||
if (i === 0) | ||
return `${f} TEXT PRIMARY KEY`; | ||
return `${f} TEXT`; | ||
}) | ||
.join(', '); | ||
const createQuery = `CREATE TABLE IF NOT EXISTS data (${createFields})`; | ||
const insertFields = fieldNames.join(', '); | ||
/** Morph.io appears to persist the database across scraper runs. | ||
* This should be enough to insert new DAs, update DAs when they change, | ||
* and keep their data when they are removed from the website. | ||
*/ | ||
const insertQuery = `INSERT OR REPLACE INTO data (${insertFields}) VALUES (${fieldNames | ||
.map(() => '?') | ||
.join(', ')})`; | ||
db.serialize(function () { | ||
//Create new table | ||
console.log(`createQuery:`, createQuery); | ||
db.run(createQuery); | ||
// TODO: Figure out how to add a column if it doesn't exist | ||
// db.get('SELECT more_info from data', (error) => { | ||
// console.log('error', error); | ||
// if (error) { | ||
// db.run('ALTER TABLE data ADD COLUMN more_info TEXT'); | ||
// } | ||
// }); | ||
console.log(`insertQuery:`, insertQuery); | ||
/** Insert new records */ | ||
var statement = db.prepare(insertQuery); | ||
data.forEach((record) => { | ||
statement.run(record[fieldNames[0]], record[fieldNames[1]], record[fieldNames[2]], record[fieldNames[3]], record[fieldNames[4]], record[fieldNames[5]], record[fieldNames[6]] | ||
// record[fieldNames[7]] | ||
); | ||
}); | ||
statement.finalize(); | ||
console.log('Inserted/updated', data.length, 'records'); | ||
}); | ||
(0, db_1.insertData)(data); | ||
})(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
// Initialise our database | ||
import sqlite3 from "sqlite3"; | ||
|
||
export type FieldNames = | ||
| "council_reference" | ||
| "address" | ||
| "description" | ||
| "info_url" | ||
| "comment_url" | ||
| "date_scraped" | ||
| "on_notice_from" | ||
| "on_notice_to" | ||
| "documents"; | ||
|
||
/** The field names in the SQL database. | ||
* Use this for consistent ordering of fields in queries. | ||
*/ | ||
export const fieldNames: readonly FieldNames[] = [ | ||
"council_reference", | ||
"address", | ||
"description", | ||
"info_url", | ||
"comment_url", | ||
"date_scraped", | ||
"on_notice_from", | ||
"on_notice_to", | ||
"documents", | ||
]; | ||
|
||
export type Document = Record<FieldNames, string>; | ||
|
||
const db = new sqlite3.Database("data.sqlite"); | ||
db.serialize(function () { | ||
const createFields = fieldNames | ||
.map((f, i) => { | ||
if (i === 0) return `${f} TEXT PRIMARY KEY`; | ||
return `${f} TEXT`; | ||
}) | ||
.join(", "); | ||
const createQuery = `CREATE TABLE IF NOT EXISTS data (${createFields})`; | ||
//Create new table | ||
console.log(`createQuery:`, createQuery); | ||
db.run(createQuery); | ||
}); | ||
|
||
// add the documents column if it doesn't exist | ||
db.serialize(function () { | ||
// Check if the column already exists | ||
const checkQuery = `PRAGMA table_info(data)`; | ||
db.all(checkQuery, function (err, rows: { name: string }[]) { | ||
console.log("🚀 ~ file: index.ts:62 ~ row:", rows); | ||
if (err) { | ||
console.error(err.message); | ||
return; | ||
} | ||
|
||
const rowExists = !!rows.find((r) => r.name === "documents"); | ||
|
||
if (!rowExists) { | ||
// Column doesn't exist, execute the ALTER TABLE statement | ||
db.run(`ALTER TABLE data ADD COLUMN documents TEXT`, function (err) { | ||
if (err) { | ||
console.error(err.message); | ||
return; | ||
} | ||
console.log('Column "documents" added to the table "data"'); | ||
}); | ||
} | ||
}); | ||
}); | ||
|
||
export function insertData(data: Document[]) { | ||
db.serialize(function () { | ||
const insertFields = fieldNames.join(", "); | ||
/** Morph.io appears to persist the database across scraper runs. | ||
* This should be enough to insert new DAs, update DAs when they change, | ||
* and keep their data when they are removed from the website. | ||
*/ | ||
const insertQuery = `INSERT OR REPLACE INTO data (${insertFields}) VALUES (${fieldNames | ||
.map(() => "?") | ||
.join(", ")})`; | ||
|
||
console.log(`insertQuery:`, insertQuery); | ||
|
||
/** Insert new records */ | ||
var statement = db.prepare(insertQuery); | ||
data.forEach((record) => { | ||
statement.run( | ||
record[fieldNames[0]], | ||
record[fieldNames[1]], | ||
record[fieldNames[2]], | ||
record[fieldNames[3]], | ||
record[fieldNames[4]], | ||
record[fieldNames[5]], | ||
record[fieldNames[6]], | ||
record[fieldNames[7]], | ||
record[fieldNames[8]] | ||
); | ||
}); | ||
statement.finalize(); | ||
console.log("Inserted/updated", data.length, "records"); | ||
}); | ||
} | ||
|
||
export default db; |
Oops, something went wrong.