-
Notifications
You must be signed in to change notification settings - Fork 48
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from lleans/main
Somehow i was able to add Google API
- Loading branch information
Showing
6 changed files
with
159 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
import requests | ||
from bs4 import BeautifulSoup | ||
from loguru import logger | ||
from requests_toolbelt import MultipartEncoder | ||
from urllib.parse import quote | ||
import re | ||
|
||
|
||
class GoogleNorm: | ||
|
||
def __init__(self, data): | ||
self.thumbnail: list = list() | ||
self.titles: list = list() | ||
self.urls: list = list() | ||
self._arrange(data) | ||
|
||
def _arrange(self, data): | ||
get_data = self._getdata(data) | ||
self.titles = get_data['titles'] | ||
self.urls = get_data['urls'] | ||
self.thumbnail = get_data['thumbnail'] | ||
|
||
def _getdata(self, datas): | ||
|
||
data = { | ||
'thumbnail': [], | ||
'titles': [], | ||
'urls': [], | ||
} | ||
|
||
for x in datas: | ||
try: | ||
origin = x.find_all('span') | ||
data['titles'].append(origin[0].string) | ||
url = x.find_all('a') | ||
data['urls'].append(url[0]['href']) | ||
img = self._gethumbnail(url) | ||
data['thumbnail'].append(img) | ||
except: | ||
pass | ||
|
||
return data | ||
|
||
@staticmethod | ||
def _gethumbnail(data): | ||
GOOGLEURL = "https://www.google.com/" | ||
regex = re.compile( | ||
r"((http(s)?(\:\/\/))+(www\.)?([\w\-\.\/])*(\.[a-zA-Z]{2,3}\/?))[^\s\b\n|]*[^.,;:\?\!\@\^\$ -]") | ||
|
||
thumbnail = "" | ||
|
||
try: | ||
for a in range(2, 5): | ||
if re.findall('jpg|png', regex.search(data[a]['href']).group(1)): | ||
thumbnail = regex.search(data[a]['href']).group(1) | ||
elif re.findall('/imgres', data[a]['href']): | ||
thumbnail = f"{GOOGLEURL}{data[a]['href']}" | ||
except: | ||
thumbnail = "No directable url" | ||
|
||
return thumbnail | ||
|
||
def __repr__(self): | ||
return f'<NormGoogle(title={repr(self.titles)}, urls={self.urls}, thumbnail={self.thumbnail})>' | ||
|
||
|
||
class GoogleResponse: | ||
|
||
def __init__(self, resp): | ||
self.origin: list = resp | ||
self.raw: list = list() | ||
|
||
for ele in self.origin: | ||
detail = ele.contents | ||
self.raw.append(GoogleNorm(detail)) | ||
|
||
def __repr__(self): | ||
return f'<GoogleResponse(count{repr(len(self.origin))})>' | ||
|
||
|
||
class Google: | ||
GOOGLEURL = 'https://www.google.com/searchbyimage' | ||
|
||
def __init__(self, **request_kwargs): | ||
params = dict() | ||
self.params = params | ||
self.header = { | ||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0', | ||
} | ||
self.requests_kwargs = request_kwargs | ||
|
||
@staticmethod | ||
def _slice(res): | ||
soup = BeautifulSoup(res, 'html.parser', from_encoding='utf-8') | ||
resp = soup.find_all(class_='g') | ||
return GoogleResponse(resp) | ||
|
||
def search(self, url): | ||
params = self.params | ||
if url[:4] == 'http': | ||
urlimage_encd = quote(url, safe='') | ||
params['image_url'] = urlimage_encd | ||
response = requests.get( | ||
self.GOOGLEURL, params=params, headers=self.header, **self.requests_kwargs) | ||
else: | ||
params['encoded_image'] = url | ||
multipart = {'encoded_image': ( | ||
url, open(url, 'rb')), 'image_content': ''} | ||
response = requests.post( | ||
f"{self.GOOGLEURL}/upload", files=multipart, headers=self.header, **self.requests_kwargs) | ||
if response.status_code == 200: | ||
return self._slice(response.text) | ||
else: | ||
logger.error(response.status_code) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,11 @@ | ||
beautifulsoup4==4.9.3 | ||
certifi==2020.12.5 | ||
chardet==3.0.4 | ||
colorama==0.4.4 | ||
idna==2.10 | ||
loguru==0.5.3 | ||
requests==2.23.0 | ||
loguru~=0.5.3 | ||
BeautifulSoup4~=4.9.3 | ||
urllib3~=1.25.11 | ||
requests-toolbelt==0.9.1 | ||
soupsieve==2.2 | ||
urllib3==1.25.11 | ||
win32-setctime==1.0.3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ | |
|
||
setuptools.setup( | ||
name="PicImageSearch", | ||
version="0.7.0", | ||
version="0.8.2", | ||
author="kitUIN", | ||
author_email="[email protected]", | ||
description="PicImageSearch APIs for Python 3.x 适用于 Python 3 以图搜源整合API", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
from loguru import logger | ||
|
||
from PicImageSearch import Google | ||
|
||
google = Google() | ||
res = google.search("https://media.discordapp.net/attachments/783138508038471701/813452582948306974/hl-18-1-900x1280.png?width=314&height=447") | ||
#res = google.search(r'C:/kitUIN/img/tinted-good.jpg') # Search Image URL or path | ||
logger.info(res.origin) # Original Data | ||
logger.info(res.raw) # Raw Data | ||
# Should start from index 2, because from there is matching image | ||
logger.info(res.raw[2]) # <NormGoogle(title=["The Strongest Dull Prince's Secret Battle for the Throne ..."], urls=['https://kiryuu.co/the-strongest-dull-princes-secret-battle-for-the-throne-chapter-3-bahasa-indonesia/'], thumbnail=['No directable url'])> | ||
logger.info(res.raw[2].thumbnail[0]) # No directable url | ||
logger.info(res.raw[2].titles[0]) # The Strongest Dull Prince's Secret Battle for the Throne ... | ||
logger.info(res.raw[2].urls[0]) # https://kiryuu.co/the-strongest-dull-princes-secret-battle-for-the-throne-chapter-3-bahasa-indonesia/ |