Skip to content
This repository has been archived by the owner on Mar 5, 2022. It is now read-only.

Commit

Permalink
small adjustments and cleanup
Browse files Browse the repository at this point in the history
Signed-off-by: Johnathan Jenkins <[email protected]>
  • Loading branch information
professorjamesmoriarty committed Dec 11, 2015
1 parent 452f37c commit e8554fe
Showing 1 changed file with 71 additions and 31 deletions.
102 changes: 71 additions & 31 deletions googler
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
from __future__ import print_function
import sys
import os
import termios, fcntl, struct
import termios
import fcntl
import struct
import webbrowser
from getopt import getopt, GetoptError
import readline
Expand All @@ -36,40 +38,47 @@ except ImportError:
from httplib import HTTPSConnection

# Global variables
columns = None # Terminal window size.
start = "0" # The first result to display (option -s)
num = None # Number of results to display (option -n)
lang = None # Language to search for (option -l)
openUrl = False # If True, opens the first URL in browser (option -j)
columns = None # Terminal window size.
start = "0" # The first result to display (option -s)
num = None # Number of results to display (option -n)
lang = None # Language to search for (option -l)
openUrl = False # If True, opens the first URL in browser (option -j)
colorize = True # If True, colorizes the output (option -C)
duration = None # Time limit search (option -t) [e.g. h5, d5, w5, m5, y5]
conn = None # Use a single global connection during navigation
nav = "n" # For user navigation
server = "www.google.com" # For country-specific search
debug = False # Print debug logs
news = False # Read news
conn = None # Use a single global connection during navigation
nav = "n" # For user navigation
server = "www.google.com" # For country-specific search
debug = False # Print debug logs
news = False # Read news

# Classes


class GoogleParser(HTMLParser.HTMLParser):

def __init__(self):
HTMLParser.HTMLParser.__init__(self)
self.handle_starttag = self.main_start
self.handle_data = self.main_data
self.handle_endtag = self.main_end
self.results = []

def main_start(self, tag, attrs):
if tag == "li" and len(attrs) > 0 and attrs[0] == ("class", "g"):
self.title = ""
self.url = ""
self.text = ""
self.url = ""
self.text = ""
self.handle_starttag = self.li_start
self.handle_data = self.li_data
self.handle_endtag = self.li_end

def main_data(self, data):
pass

def main_end(self, tag):
pass
# <li class="g"> ... </li>

def li_start(self, tag, attrs):
if tag == "h3":
self.handle_starttag = self.h3_start
Expand All @@ -79,8 +88,10 @@ class GoogleParser(HTMLParser.HTMLParser):
self.handle_starttag = self.div_start
self.handle_data = self.div_data
self.handle_endtag = self.div_end

def li_data(self, data):
pass

def li_end(self, tag):
if tag == "div":
marker = self.url.find("?q=")
Expand All @@ -92,35 +103,45 @@ class GoogleParser(HTMLParser.HTMLParser):

if self.url != "":
index = len(self.results) + 1
self.results.append(Result(index, self.title, unquote(self.url), self.text))
self.results.append(Result(index, self.title,
unquote(self.url), self.text))
self.handle_starttag = self.main_start
self.handle_data = self.main_data
self.handle_endtag = self.main_end
# <h3> ... </h3>

def h3_start(self, tag, attrs):
if tag == "a":
self.url = attrs[0][1]

def h3_data(self, data):
self.title += data

def h3_end(self, tag):
if tag == "h3":
self.handle_starttag = self.li_start
self.handle_data = self.li_data
self.handle_endtag = self.li_end
# <div> ... </div>

def div_start(self, tag, attrs):
if tag == "span" and len(attrs) > 0 and attrs[0] == ("class", "st"):
self.handle_starttag = self.span_start
self.handle_data = self.span_data
self.handle_endtag = self.span_end

def div_data(self, data):
pass

def div_end(self, tag):
pass

def span_start(self, tag, start):
pass

def span_data(self, data):
self.text += data

def span_end(self, tag):
if tag == "span":
self.handle_starttag = self.li_start
Expand All @@ -129,11 +150,13 @@ class GoogleParser(HTMLParser.HTMLParser):


class Result:

def __init__(self, index, title, url, text):
self.index = index
self.title = title
self.url = url
self.text = text

def print_entry(self):
index = self.index
title = self.title
Expand Down Expand Up @@ -170,7 +193,7 @@ class Result:
col += len(w) + 1
print("\n")
else:
print("%s\n" % text.replace("\n"," "))
print("%s\n" % text.replace("\n", " "))

def open(self):
_stderr = os.dup(2)
Expand All @@ -188,28 +211,34 @@ class Result:
os.dup2(_stdout, 1)

# Functions


def is_int(string):
try:
int(string)
return True
except:
return False


def usage():
print("Usage: googler [OPTIONS] KEYWORDS...")
print("Performs a Google search and prints the results to stdout.\n")
print("Options")
print(" -s N start at the Nth result")
print(" -n N show N results (default 10)")
print(" -N show results from news section")
print(" -c SERV country-specific search (refer man or project page for details)")
print(" -c SERV country-specific search (refer man or project page for" +
" details)")
print(" -l LANG display in language LANG, such as fi for Finnish")
print(" -C disable color output")
print(" -j open the first result in a web browser")
print(" -t dN time limit search [h5 (5 hrs), d5 (5 days), w5 (5 weeks), m5 (5 months), y5 (5 years)]")
print(" -t dN time limit search [h5 (5 hrs), d5 (5 days), " +
"w5 (5 weeks), m5 (5 months), y5 (5 years)]")
print(" -d enable debugging\n")
print("Keys")
print(" g terms enter 'g' followed by keywords to initiate a new search (with original options)")
print(" g terms enter 'g' followed by keywords to initiate a new " +
"search (with original options)")
print(" n, p enter 'n' or 'p' to navigate forward or backward")
print(" 1-N enter a number to open that result in browser")
print(" any other input exits googler\n")
Expand All @@ -219,18 +248,22 @@ def usage():
print("Webpage: https://github.com/jarun/google-cli")
sys.exit(1)


def serverURL(domain):
# Google domain ref: https://en.wikipedia.org/wiki/List_of_Google_domains
if domain in ["id", "in", "jp", "kr", "uk"]: # www.google.co.domain
# www.google.co.domain
if domain in ["id", "in", "jp", "kr", "uk"]:
return "www.google.co." + domain
if domain in ["be", "ca", "ch", "cz", "de", "es", "fi", "fr", "it", "nl", "pl", "pt", "ro", "ru", "se"]: # www.google.domain
if domain in ["be", "ca", "ch", "cz", "de", "es", "fi", "fr", "it", "nl",
"pl", "pt", "ro", "ru", "se"]: # www.google.domain
return "www.google." + domain
if domain in ["ar", "au", "br", "mx", "ph", "tw", "ua"]: # www.google.com.domain
# www.google.com.domain
if domain in ["ar", "au", "br", "mx", "ph", "tw", "ua"]:
return "www.google.com." + domain

return "www.google.com"

########### Program Main
# Program Main

# Process command line options.
optlist = None
Expand Down Expand Up @@ -272,7 +305,7 @@ try:
elif opt[0] == "-t":
# Option -t dN
duration = opt[1]
if not opt[1][0] in ("h", "d","w","m","y",):
if not opt[1][0] in ("h", "d", "w", "m", "y",):
usage()
sys.exit(1)
if not opt[1][1].isdigit():
Expand Down Expand Up @@ -323,6 +356,7 @@ if columns <= 0:
# Connect to Google and request the result page.
conn = HTTPSConnection(server, timeout=45)


def fetch_results():
global conn
global url
Expand All @@ -339,7 +373,7 @@ def fetch_results():
resp = conn.getresponse()

if resp.status != 200:
if resp.status in (301,302,):
if resp.status in (301, 302,):
url = urljoin(url, resp.getheader('location', ''))
if debug:
print("[DEBUG] Redirected URL [%s]" % url)
Expand All @@ -349,8 +383,10 @@ def fetch_results():
sys.exit(1)
conn.close()
if debug:
print("[DEBUG] Next Server [%s]" % url[url.find("//") + 2:url.find("/search")])
conn = HTTPSConnection(url[url.find("//") + 2:url.find("/search")], timeout=45)
print("[DEBUG] Next Server [%s]" % url[url.find("//") +
2:url.find("/search")])
conn = HTTPSConnection(url[url.find("//") + 2:url.find("/search")],
timeout=45)
url = url[url.find("/search"):]
if debug:
print("[DEBUG] Next GET [%s]\n" % url)
Expand All @@ -365,7 +401,8 @@ def fetch_results():

if resp.status != 200:
# Failed connecting to redirected server too!
print("ERROR after 1st redirection:", str(resp.status), ": ", resp.reason)
print("ERROR after 1st redirection:", str(resp.status), ": ",
resp.reason)
conn.close()
sys.exit(1)
else:
Expand All @@ -389,7 +426,7 @@ def fetch_results():

results = []
while True:
if nav == "n" or nav == "p" or nav =="g":
if nav == "n" or nav == "p" or nav == "g":
results = fetch_results()

oldstart = start
Expand All @@ -403,13 +440,15 @@ while True:
start = str(int(start) + int(num))
else:
start = str(int(start) + 10)
print("\n\x1B[91m\x1B[1m ***** ***** ***** ***** \x1B[0m\n")
print("\n\x1B[91m\x1B[1m ***** ***** ***** *****\
\x1B[0m\n")
elif nav == "p":
if num is not None:
start = str(int(start) - int(num))
else:
start = str(int(start) - 10)
print("\n\x1B[91m\x1B[1m ***** ***** ***** ***** \x1B[0m\n")
print("\n\x1B[91m\x1B[1m ***** ***** ***** *****\
\x1B[0m\n")
elif len(nav) > 2 and nav[0] == "g" and nav[1] == " ":
trimsearch = nav[2:].strip().replace(" ", "+")
if trimsearch == "":
Expand All @@ -420,7 +459,8 @@ while True:
print("New search URL [%s]" % url)
nav = "g"
start = basestart
print("\n\x1B[91m\x1B[1m ***** ***** ***** ***** \x1B[0m\n")
print("\n\x1B[91m\x1B[1m ***** ***** ***** *****\
\x1B[0m\n")
continue
elif is_int(nav):
index = int(nav) - 1
Expand Down

0 comments on commit e8554fe

Please sign in to comment.