forked from 43South/KingboroughCouncil
-
Notifications
You must be signed in to change notification settings - Fork 4
/
scraper.php
38 lines (30 loc) · 1.69 KB
/
scraper.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
<?php
require_once 'vendor/autoload.php';
require_once 'vendor/openaustralia/scraperwiki/scraperwiki.php';
use PGuardiario\PGBrowser;
use Torann\DomParser\HtmlDom;
date_default_timezone_set('Australia/Hobart');
$url_base = 'https://www.kingborough.tas.gov.au/development/planning-notices/';
$comment_url = 'mailto:[email protected]';
$browser = new PGBrowser();
$page = $browser->get($url_base);
$dom = HtmlDom::fromString($page->html);
foreach ( $dom->find("table.table",0)->children(1)->find('tr') as $tr ) {
$council_reference = strrev(explode("/", strrev($tr->find("a",0)->href))[0]); # get the file name
$council_reference = explode("-", $council_reference); # split up
$council_reference = $council_reference[0] . '-' . $council_reference[1] . '-' . $council_reference[2]; # only pickup the first three fields
# Put all information in an array
$record = [
'council_reference' => $council_reference,
'address' => trim(htmlspecialchars_decode($tr->find("td",0)->plaintext)) . ', Tasmania',
'description' => trim(htmlspecialchars_decode($tr->find("td",3)->plaintext)),
'info_url' => $url_base,
'comment_url' => $comment_url,
'date_scraped' => date('Y-m-d'),
'on_notice_from' => date('Y-m-d', strtotime($tr->find("td",1)->plaintext)),
'on_notice_to' => date('Y-m-d', strtotime($tr->find("td",2)->plaintext))
];
print ("Saving record " . $record['council_reference'] . " - " . $record['address'] ."\n");
// print_r ($record);
scraperwiki::save(array('council_reference'), $record);
}