-
Notifications
You must be signed in to change notification settings - Fork 7
/
app.py
102 lines (81 loc) · 3.77 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# coding: utf8
# importing modules
from flask import Flask, render_template, request, jsonify
from googleapiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
from apiclient.http import MediaIoBaseDownload, MediaFileUpload
import requests
import datetime
import io
import os
from flask_cors import CORS
# initializing a variable of Flask
app = Flask(__name__)
CORS(app)
# Disalbe
app.config['JSON_AS_ASCII'] = False
# decorating index function with the app.route with url as /home
@app.route('/')
def index():
return render_template('home.html')
# decorating index function with the app.route with url as /getOCR
@app.route('/getOCR', methods=['GET'])
def getOCR():
if request.method == 'GET':
# Hey why get langcode?, It is not using anyywhere
# Well, It will use in future. This is important variable
langcode = request.args.get('langcode', '')
imageUrl = request.args.get('imageurl', '')
isAPI = request.args.get('api', '')
if not imageUrl:
return jsonify({"error": "No image URL provided"})
try:
# Download the Image File
r = requests.get(imageUrl, allow_redirects=True, headers={'User-Agent': 'wikimedia-indic-ocr/1.0'}, timeout=10)
if not r.ok:
return jsonify({"error": f"Failed to fetch image from {imageUrl}"})
currentTime = str(datetime.datetime.now()).replace(':', '_').replace(' ', '_')
fileName = currentTime + "." + r.headers.get('content-type', '').replace('image/', '')
# Save the Image File
file_path = "ocr/" + fileName
with open(file_path, 'wb') as f:
f.write(r.content)
# Google Drive API Setup
SCOPES = 'https://www.googleapis.com/auth/drive.file'
store = file.Storage('token.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('client_secret.json', SCOPES)
creds = tools.run_flow(flow, store)
service = build('drive', 'v3', http=creds.authorize(Http()))
# Upload the file to Google Drive
folder_id = '1-G2OEJI5RonEobDW6AfaeC0FsDfUVJPx'
mime = 'application/vnd.google-apps.document'
file_metadata = {'name': fileName, 'mimeType': mime, 'parents': [folder_id]}
media = MediaFileUpload(file_path, mimetype=mime)
Imgfile = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
# Delete local Image file
os.remove(file_path)
# Download the file in txt format from Google Drive
getTxt = service.files().export_media(fileId=Imgfile.get('id'), mimeType='text/plain')
txt_file_path = "ocr/" + currentTime + ".txt"
with open(txt_file_path, 'wb') as fh:
downloader = MediaIoBaseDownload(fh, getTxt)
done = False
while not done:
status, done = downloader.next_chunk()
# Read the OCR text
with open(txt_file_path, mode="r", encoding="utf-8") as txt_file:
OCRtext = txt_file.read()
# Remove the character ________________ These characters are present in the output of google ocr text
OCRtext = OCRtext.replace('________________\n\n', '')
# Check if it's an API request
if "True" in isAPI:
return jsonify({"text": OCRtext})
# Return HTML page with OCR data
return render_template('getOCR.html', imageUrl=imageUrl, OCRtext=OCRtext)
except Exception as e:
return jsonify({"error": str(e)})
if __name__ == "__main__":
app.run()