forked from LoveMyData/wollondilly
-
Notifications
You must be signed in to change notification settings - Fork 2
/
scraper.rb
49 lines (41 loc) · 1.29 KB
/
scraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
require "mechanize"
require "json"
require "scraperwiki"
def parse_date(s)
if s.strip == ""
nil
else
Date.strptime(s, "%d/%m/%Y")
end
end
root_url = "https://tracking.wollondilly.nsw.gov.au"
url = "#{root_url}/api/app"
agent = Mechanize.new
if ENV["MORPH_AUSTRALIAN_PROXY"]
# On morph.io set the environment variable MORPH_AUSTRALIAN_PROXY to
# http://morph:[email protected]:8888 replacing password with
# the real password.
puts "Using Australian proxy..."
agent.agent.set_proxy(ENV["MORPH_AUSTRALIAN_PROXY"])
end
page = agent.get(url)
result = JSON.parse(page.body)
result.each do |a|
date_received = parse_date(a["rec_dte"])
if date_received && date_received >= Date.today - 30
record = {
"council_reference" => a["fmt_acc2"],
"address" => a["prm_adr"] + ", NSW",
"description" => a["precis"].strip,
"info_url" => "#{root_url}/detail/#{a['fmt_acc']}",
"date_scraped" => Date.today.to_s,
"date_received" => date_received.to_s,
"on_notice_from" => parse_date(a["not_opn_dte"]),
"on_notice_to" => parse_date(a["not_clo_dte"]),
"lat" => a["lat"],
"lng" => a["lon"]
}
puts "Storing #{record["council_reference"]} - #{record["address"]}"
ScraperWiki.save_sqlite(["council_reference"], record)
end
end