Skip to content
This repository has been archived by the owner on Mar 5, 2022. It is now read-only.

Commit

Permalink
Google replaced "li" with "div" as search result separator.
Browse files Browse the repository at this point in the history
Signed-off-by: Arun Prakash Jana <[email protected]>
  • Loading branch information
jarun committed Jan 9, 2016
1 parent 77ca3f3 commit 77df5a1
Showing 1 changed file with 19 additions and 19 deletions.
38 changes: 19 additions & 19 deletions googler
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@ class GoogleParser(HTMLParser.HTMLParser):
self.results = []

def main_start(self, tag, attrs):
if tag == "li" and len(attrs) > 0 and attrs[0] == ("class", "g"):
if tag == "div" and len(attrs) > 0 and attrs[0] == ("class", "g"):
self.title = ""
self.url = ""
self.text = ""
self.handle_starttag = self.li_start
self.handle_data = self.li_data
self.handle_endtag = self.li_end
self.handle_starttag = self.div_outer_start
self.handle_data = self.div_outer_data
self.handle_endtag = self.div_outer_end

def main_data(self, data):
pass
Expand All @@ -79,20 +79,20 @@ class GoogleParser(HTMLParser.HTMLParser):
pass
# <li class="g"> ... </li>

def li_start(self, tag, attrs):
def div_outer_start(self, tag, attrs):
if tag == "h3":
self.handle_starttag = self.h3_start
self.handle_data = self.h3_data
self.handle_endtag = self.h3_end
elif tag == "div" and len(attrs) > 0 and attrs[0] == ("class", "s"):
self.handle_starttag = self.div_start
self.handle_data = self.div_data
self.handle_endtag = self.div_end
self.handle_starttag = self.div_inner_start
self.handle_data = self.div_inner_data
self.handle_endtag = self.div_inner_end

def li_data(self, data):
def div_outer_data(self, data):
pass

def li_end(self, tag):
def div_outer_end(self, tag):
if tag == "div":
marker = self.url.find("?q=")
if marker >= 0:
Expand All @@ -119,21 +119,21 @@ class GoogleParser(HTMLParser.HTMLParser):

def h3_end(self, tag):
if tag == "h3":
self.handle_starttag = self.li_start
self.handle_data = self.li_data
self.handle_endtag = self.li_end
self.handle_starttag = self.div_outer_start
self.handle_data = self.div_outer_data
self.handle_endtag = self.div_outer_end
# <div> ... </div>

def div_start(self, tag, attrs):
def div_inner_start(self, tag, attrs):
if tag == "span" and len(attrs) > 0 and attrs[0] == ("class", "st"):
self.handle_starttag = self.span_start
self.handle_data = self.span_data
self.handle_endtag = self.span_end

def div_data(self, data):
def div_inner_data(self, data):
pass

def div_end(self, tag):
def div_inner_end(self, tag):
pass

def span_start(self, tag, start):
Expand All @@ -144,9 +144,9 @@ class GoogleParser(HTMLParser.HTMLParser):

def span_end(self, tag):
if tag == "span":
self.handle_starttag = self.li_start
self.handle_data = self.li_data
self.handle_endtag = self.li_end
self.handle_starttag = self.div_outer_start
self.handle_data = self.div_outer_data
self.handle_endtag = self.div_outer_end


class Result:
Expand Down

0 comments on commit 77df5a1

Please sign in to comment.