Skip to content

Commit

Permalink
Merge pull request #2 from lleans/main
Browse files Browse the repository at this point in the history
Somehow i was able to add Google API
  • Loading branch information
kitUIN authored Feb 27, 2021
2 parents d848895 + 123feea commit 7ccfbe7
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 5 deletions.
1 change: 1 addition & 0 deletions PicImageSearch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .tracemoe import TraceMoe
from .ascii2d import Ascii2D
from .iqdb import Iqdb
from .google import Google
__author__ = 'kitUIN'
__license__ = 'Apache-2.0 License'
__maintainer__ = 'kitUIN'
Expand Down
114 changes: 114 additions & 0 deletions PicImageSearch/google.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import requests
from bs4 import BeautifulSoup
from loguru import logger
from requests_toolbelt import MultipartEncoder
from urllib.parse import quote
import re


class GoogleNorm:

def __init__(self, data):
self.thumbnail: list = list()
self.titles: list = list()
self.urls: list = list()
self._arrange(data)

def _arrange(self, data):
get_data = self._getdata(data)
self.titles = get_data['titles']
self.urls = get_data['urls']
self.thumbnail = get_data['thumbnail']

def _getdata(self, datas):

data = {
'thumbnail': [],
'titles': [],
'urls': [],
}

for x in datas:
try:
origin = x.find_all('span')
data['titles'].append(origin[0].string)
url = x.find_all('a')
data['urls'].append(url[0]['href'])
img = self._gethumbnail(url)
data['thumbnail'].append(img)
except:
pass

return data

@staticmethod
def _gethumbnail(data):
GOOGLEURL = "https://www.google.com/"
regex = re.compile(
r"((http(s)?(\:\/\/))+(www\.)?([\w\-\.\/])*(\.[a-zA-Z]{2,3}\/?))[^\s\b\n|]*[^.,;:\?\!\@\^\$ -]")

thumbnail = ""

try:
for a in range(2, 5):
if re.findall('jpg|png', regex.search(data[a]['href']).group(1)):
thumbnail = regex.search(data[a]['href']).group(1)
elif re.findall('/imgres', data[a]['href']):
thumbnail = f"{GOOGLEURL}{data[a]['href']}"
except:
thumbnail = "No directable url"

return thumbnail

def __repr__(self):
return f'<NormGoogle(title={repr(self.titles)}, urls={self.urls}, thumbnail={self.thumbnail})>'


class GoogleResponse:

def __init__(self, resp):
self.origin: list = resp
self.raw: list = list()

for ele in self.origin:
detail = ele.contents
self.raw.append(GoogleNorm(detail))

def __repr__(self):
return f'<GoogleResponse(count{repr(len(self.origin))})>'


class Google:
GOOGLEURL = 'https://www.google.com/searchbyimage'

def __init__(self, **request_kwargs):
params = dict()
self.params = params
self.header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0',
}
self.requests_kwargs = request_kwargs

@staticmethod
def _slice(res):
soup = BeautifulSoup(res, 'html.parser', from_encoding='utf-8')
resp = soup.find_all(class_='g')
return GoogleResponse(resp)

def search(self, url):
params = self.params
if url[:4] == 'http':
urlimage_encd = quote(url, safe='')
params['image_url'] = urlimage_encd
response = requests.get(
self.GOOGLEURL, params=params, headers=self.header, **self.requests_kwargs)
else:
params['encoded_image'] = url
multipart = {'encoded_image': (
url, open(url, 'rb')), 'image_content': ''}
response = requests.post(
f"{self.GOOGLEURL}/upload", files=multipart, headers=self.header, **self.requests_kwargs)
if response.status_code == 200:
return self._slice(response.text)
else:
logger.error(response.status_code)
20 changes: 19 additions & 1 deletion PicImageSearch/iqdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,22 @@ def search(self, url):
logger.error(e)

def search_3d(self, url):
pass
try:
if url[:4] == 'http': # 网络url
datas = {
"url": url
}
res = requests.post(self.url_3d, data=datas, **self.requests_kwargs)
else: # 是否是本地文件
m = MultipartEncoder(
fields={
'file': ('filename', open(url, 'rb'), "type=multipart/form-data")
}
)
headers = {'Content-Type': m.content_type}
urllib3.disable_warnings()
res = requests.post(self.url_3d, headers=headers, **self.requests_kwargs)
if res.status_code == 200:
return IqdbResponse(res.content)
except Exception as e:
logger.error(e)
13 changes: 10 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
beautifulsoup4==4.9.3
certifi==2020.12.5
chardet==3.0.4
colorama==0.4.4
idna==2.10
loguru==0.5.3
requests==2.23.0
loguru~=0.5.3
BeautifulSoup4~=4.9.3
urllib3~=1.25.11
requests-toolbelt==0.9.1
soupsieve==2.2
urllib3==1.25.11
win32-setctime==1.0.3
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

setuptools.setup(
name="PicImageSearch",
version="0.7.0",
version="0.8.2",
author="kitUIN",
author_email="[email protected]",
description="PicImageSearch APIs for Python 3.x 适用于 Python 3 以图搜源整合API",
Expand Down
14 changes: 14 additions & 0 deletions test/test5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from loguru import logger

from PicImageSearch import Google

google = Google()
res = google.search("https://media.discordapp.net/attachments/783138508038471701/813452582948306974/hl-18-1-900x1280.png?width=314&height=447")
#res = google.search(r'C:/kitUIN/img/tinted-good.jpg') # Search Image URL or path
logger.info(res.origin) # Original Data
logger.info(res.raw) # Raw Data
# Should start from index 2, because from there is matching image
logger.info(res.raw[2]) # <NormGoogle(title=["The Strongest Dull Prince's Secret Battle for the Throne ..."], urls=['https://kiryuu.co/the-strongest-dull-princes-secret-battle-for-the-throne-chapter-3-bahasa-indonesia/'], thumbnail=['No directable url'])>
logger.info(res.raw[2].thumbnail[0]) # No directable url
logger.info(res.raw[2].titles[0]) # The Strongest Dull Prince's Secret Battle for the Throne ...
logger.info(res.raw[2].urls[0]) # https://kiryuu.co/the-strongest-dull-princes-secret-battle-for-the-throne-chapter-3-bahasa-indonesia/

0 comments on commit 7ccfbe7

Please sign in to comment.