Merge pull request #2 from lleans/main

Somehow i was able to add Google API
kitUIN · Feb 27, 2021 · 7ccfbe7 · 7ccfbe7
2 parents d848895 + 123feea
commit 7ccfbe7
Show file tree

Hide file tree

Showing 6 changed files with 159 additions and 5 deletions.
diff --git a/PicImageSearch/__init__.py b/PicImageSearch/__init__.py
@@ -2,6 +2,7 @@
 from .tracemoe import TraceMoe
 from .ascii2d import Ascii2D
 from .iqdb import Iqdb
+from .google import Google
 __author__ = 'kitUIN'
 __license__ = 'Apache-2.0 License'
 __maintainer__ = 'kitUIN'

diff --git a/PicImageSearch/google.py b/PicImageSearch/google.py
@@ -0,0 +1,114 @@
+import requests
+from bs4 import BeautifulSoup
+from loguru import logger
+from requests_toolbelt import MultipartEncoder
+from urllib.parse import quote
+import re
+
+
+class GoogleNorm:
+
+    def __init__(self, data):
+        self.thumbnail: list = list()
+        self.titles: list = list()
+        self.urls: list = list()
+        self._arrange(data)
+
+    def _arrange(self, data):
+        get_data = self._getdata(data)
+        self.titles = get_data['titles']
+        self.urls = get_data['urls']
+        self.thumbnail = get_data['thumbnail']
+
+    def _getdata(self, datas):
+
+        data = {
+            'thumbnail': [],
+            'titles': [],
+            'urls': [],
+        }
+
+        for x in datas:
+            try:
+                origin = x.find_all('span')
+                data['titles'].append(origin[0].string)
+                url = x.find_all('a')
+                data['urls'].append(url[0]['href'])
+                img = self._gethumbnail(url)
+                data['thumbnail'].append(img)
+            except:
+                pass
+
+        return data
+
+    @staticmethod
+    def _gethumbnail(data):
+        GOOGLEURL = "https://www.google.com/"
+        regex = re.compile(
+            r"((http(s)?(\:\/\/))+(www\.)?([\w\-\.\/])*(\.[a-zA-Z]{2,3}\/?))[^\s\b\n|]*[^.,;:\?\!\@\^\$ -]")
+
+        thumbnail = ""
+
+        try:
+            for a in range(2, 5):
+                if re.findall('jpg|png', regex.search(data[a]['href']).group(1)):
+                    thumbnail = regex.search(data[a]['href']).group(1)
+                elif re.findall('/imgres', data[a]['href']):
+                    thumbnail = f"{GOOGLEURL}{data[a]['href']}"
+        except:
+            thumbnail = "No directable url"
+
+        return thumbnail
+
+    def __repr__(self):
+        return f'<NormGoogle(title={repr(self.titles)}, urls={self.urls}, thumbnail={self.thumbnail})>'
+
+
+class GoogleResponse:
+
+    def __init__(self, resp):
+        self.origin: list = resp
+        self.raw: list = list()
+
+        for ele in self.origin:
+            detail = ele.contents
+            self.raw.append(GoogleNorm(detail))
+
+    def __repr__(self):
+        return f'<GoogleResponse(count{repr(len(self.origin))})>'
+
+
+class Google:
+    GOOGLEURL = 'https://www.google.com/searchbyimage'
+
+    def __init__(self, **request_kwargs):
+        params = dict()
+        self.params = params
+        self.header = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0',
+        }
+        self.requests_kwargs = request_kwargs
+
+    @staticmethod
+    def _slice(res):
+        soup = BeautifulSoup(res, 'html.parser', from_encoding='utf-8')
+        resp = soup.find_all(class_='g')
+        return GoogleResponse(resp)
+
+    def search(self, url):
+        params = self.params
+        if url[:4] == 'http':
+            urlimage_encd = quote(url, safe='')
+            params['image_url'] = urlimage_encd
+            response = requests.get(
+                self.GOOGLEURL, params=params, headers=self.header, **self.requests_kwargs)
+        else:
+            params['encoded_image'] = url
+            multipart = {'encoded_image': (
+                url, open(url, 'rb')), 'image_content': ''}
+            response = requests.post(
+                f"{self.GOOGLEURL}/upload", files=multipart, headers=self.header, **self.requests_kwargs)
+        if response.status_code == 200:
+            return self._slice(response.text)
+        else:
+            logger.error(response.status_code)
diff --git a/PicImageSearch/iqdb.py b/PicImageSearch/iqdb.py
@@ -86,4 +86,22 @@ def search(self, url):
             logger.error(e)
 
     def search_3d(self, url):
-        pass
+        try:
+            if url[:4] == 'http':  # 网络url
+                datas = {
+                    "url": url
+                }
+                res = requests.post(self.url_3d, data=datas, **self.requests_kwargs)
+            else:  # 是否是本地文件
+                m = MultipartEncoder(
+                    fields={
+                        'file': ('filename', open(url, 'rb'), "type=multipart/form-data")
+                    }
+                )
+                headers = {'Content-Type': m.content_type}
+                urllib3.disable_warnings()
+                res = requests.post(self.url_3d, headers=headers, **self.requests_kwargs)
+            if res.status_code == 200:
+                return IqdbResponse(res.content)
+        except Exception as e:
+            logger.error(e)
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,11 @@
+beautifulsoup4==4.9.3
+certifi==2020.12.5
+chardet==3.0.4
+colorama==0.4.4
+idna==2.10
+loguru==0.5.3
 requests==2.23.0
-loguru~=0.5.3
-BeautifulSoup4~=4.9.3
-urllib3~=1.25.11
+requests-toolbelt==0.9.1
+soupsieve==2.2
+urllib3==1.25.11
+win32-setctime==1.0.3
diff --git a/setup.py b/setup.py
@@ -9,7 +9,7 @@
 
 setuptools.setup(
     name="PicImageSearch",
-    version="0.7.0",
+    version="0.8.2",
     author="kitUIN",
     author_email="[email protected]",
     description="PicImageSearch APIs for Python 3.x 适用于 Python 3 以图搜源整合API",

diff --git a/test/test5.py b/test/test5.py
@@ -0,0 +1,14 @@
+from loguru import logger
+
+from PicImageSearch import Google
+
+google = Google()
+res = google.search("https://media.discordapp.net/attachments/783138508038471701/813452582948306974/hl-18-1-900x1280.png?width=314&height=447")
+#res = google.search(r'C:/kitUIN/img/tinted-good.jpg')  # Search Image URL or path
+logger.info(res.origin)  # Original Data
+logger.info(res.raw)  # Raw Data
+# Should start from index 2, because from there is matching image
+logger.info(res.raw[2])  # <NormGoogle(title=["The Strongest Dull Prince's Secret Battle for the Throne ..."], urls=['https://kiryuu.co/the-strongest-dull-princes-secret-battle-for-the-throne-chapter-3-bahasa-indonesia/'], thumbnail=['No directable url'])>
+logger.info(res.raw[2].thumbnail[0])  # No directable url
+logger.info(res.raw[2].titles[0])  # The Strongest Dull Prince's Secret Battle for the Throne ...
+logger.info(res.raw[2].urls[0])  # https://kiryuu.co/the-strongest-dull-princes-secret-battle-for-the-throne-chapter-3-bahasa-indonesia/