-
Notifications
You must be signed in to change notification settings - Fork 2
/
scraper.rb
31 lines (26 loc) · 1 KB
/
scraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
require 'scraperwiki'
require 'mechanize'
agent = Mechanize.new
def scrape_page(page, url)
table = page.at("tbody")
table.search("tr")[0..-1].each do |tr|
record = {
"info_url" => url,
"comment_url" => url,
"council_reference" => tr.search("td")[0].inner_text.split("(")[0],
"address" => tr.search("td")[1].inner_text + ", VIC",
"description" => tr.search("td")[2].inner_text,
"date_scraped" => Date.today.to_s
}
# Check if record already exists
if (ScraperWiki.select("* from data where `council_reference`='#{record['council_reference']}'").empty? rescue true)
ScraperWiki.save_sqlite(['council_reference'], record)
else
puts "Skipping already saved record " + record['council_reference']
end
end
end
url = "http://www.bawbawshire.vic.gov.au/Building-and-Planning/Planning/Current-Applications-on-Exhibition/List-of-Current-Planning-Permit-Applications-on-Exhibition"
page = agent.get(url)
puts "Scraping page..."
scrape_page(page, url)