Glinet

Tedcharlesbrown · Sep 25, 2023 · 87f2a6d · 87f2a6d
1 parent be96ee2
commit 87f2a6d
Show file tree

Hide file tree

Showing 2 changed files with 310 additions and 0 deletions.
diff --git a/GliNet/GLiNet_Archive.py b/GliNet/GLiNet_Archive.py
diff --git a/GliNet/GLiNet_Download.py b/GliNet/GLiNet_Download.py
@@ -0,0 +1,310 @@
+from dataclasses import dataclass
+@dataclass
+class APPLICATION:
+	display: str
+	name: str
+	link: str
+
+import json
+import asyncio
+from pyppeteer import launch
+from bs4 import BeautifulSoup
+import aiohttp
+
+import webbrowser
+from urllib.parse import urljoin
+
+from fuzzywuzzy import fuzz
+from tqdm import tqdm
+from tqdm.asyncio import tqdm as async_tqdm
+
+
+from ftplib import FTP, error_perm, error_temp, error_proto
+import os
+import datetime
+
+flags = [
+	"initialize",		#0
+	"script-run",		#1
+	"",					#2
+	"",					#3
+	"",					#4
+	"free-space-error",	#5
+	"ftp-attempt",		#6
+	"ftp-error",		#7
+	"ftp-success",   	#8
+	"",					#9
+	"ftp-quit",			#10
+	"file-notfound",	#11
+	"file-sizediff",	#12
+	"file-found",		#13
+	"",					#14
+	"file-limit",		#15
+	"download-success",	#16
+	"",					#17
+	"",					#18
+	"",					#19
+	"",					#20
+	"get-download",		#21
+	"got-download",		#22
+	"start-download",	#23
+	"",					#24
+	"",					#25
+	"parse-fail",		#26
+	"download-fail",	#27
+	]
+
+error = "UNCAUGHT ERROR"
+log_file = "archive_log.csv"
+script_path = os.path.dirname(os.path.abspath(__file__))
+parent_path = os.path.dirname(script_path)
+tcbatch_path = os.path.join(parent_path,"TCBatch")
+archive_flag = os.path.join(tcbatch_path,"__archive__")
+
+application_list = "application_list.json"
+
+async def check_already_downloaded(filename: str) -> bool:
+	"""RETURNS TRUE IF FILE ALREADY DOWNLOADED"""
+	downloaded_files = os.listdir(tcbatch_path)
+	for file in downloaded_files:
+		if filename.lower() == file.lower():
+			return True
+
+	return False
+
+def check_file_size(filename: str) -> bool:
+	"""RETURNS TRUE IF FILE SIZES MATCH"""
+
+
+
+async def get_page_html(url: str):
+	"""Get page HTML, including content loaded via JavaScript."""
+	browser = await launch()
+	try:
+		page = await browser.newPage()
+		await page.goto(url)
+		html = await page.content()
+	finally:
+		await browser.close()
+	return html
+
+def parse_html_for_link(url: str, html: str):
+	"""Parse HTML to find download links."""
+	soup = BeautifulSoup(html, 'html.parser')
+	for link in soup.find_all('a', href=True):
+		# Create absolute URL if necessary
+		absolute_link = urljoin(url, link['href'])
+		if ".exe" in absolute_link or ".msi" in absolute_link:
+			return absolute_link
+	return None
+
+async def download_from_web(app_name: str, url: str):
+	"""Download a file from a URL."""
+	try:
+		async with aiohttp.ClientSession() as session:
+			async with session.get(url) as response:
+				filename = url.split("/")[-1]
+				if not await check_already_downloaded(filename):
+					console_log(flags[23],f"DOWNLOADING: {app_name.upper()}")
+					with open(os.path.join(tcbatch_path,filename), 'wb') as f:
+						async for data in response.content.iter_any():
+							f.write(data)
+					console_log(flags[16],f"FINISHED DOWNLOAD: {app_name}")
+				else:
+					console_log(flags[13],f"{app_name.upper()} ALREADY IN GLINET")
+
+	except Exception as e:
+		console_log(flags[27],f"COULD NOT DOWNLOAD: {app_name}")
+
+
+async def find_file_from_website(app: APPLICATION):
+	url = app.link
+	try:
+		html = await get_page_html(url)
+		download_link = parse_html_for_link(url, html)
+		if download_link is not None:
+			console_log(flags[22],f"GOT LINK FOR: {app.display}")
+			await download_from_web(app.display, download_link)
+			return True
+		else:
+			console_log(flags[26], f"NO LINK FOR: {app.display}")
+			return None
+
+	except Exception as e:
+		console_log(flags[26], f"COULD NOT PARSE: {app.display}")
+		return None
+
+# async def get_download(download_list: list):
+# 	TIMEOUT_DURATION = 5
+# 	download_tasks = []
+# 	archived_apps = []
+
+# 	file_limit = 0
+
+# 	for i, app, in enumerate(download_list):
+# 		if file_limit < 1000:
+# 			if app.link == "Archive":
+# 				archived_apps.append(app)
+# 			else:
+# 				console_log(flags[21],f"GETTING FROM ONLINE: {app.display}")
+# 				task = asyncio.wait_for(find_file_from_website(app), TIMEOUT_DURATION)
+# 				download_tasks.append(task)
+# 			file_limit += 1
+# 		else:
+# 			console_log(flags[15],"DEBUG FILE LIMIT REACHED - INTERNET")
+# 			break
+
+# 	# Execute all tasks concurrently
+# 	results = []
+# 	for task in download_tasks:
+# 		try:
+# 			result = await task
+# 			results.append(result)
+# 		except asyncio.TimeoutError:
+# 			# If there's a timeout, append None to results (or any sentinel value you prefer)
+# 			results.append(None)
+
+# 	for result, app in zip(results, download_list):
+# 		if result is None:
+# 			archived_apps.append(app)
+
+# 	return archived_apps
+
+async def get_download(download_list: list):
+    TIMEOUT_DURATION = 5
+    download_tasks = []
+    archived_apps = []
+
+    file_limit = 0
+
+    for i, app in enumerate(download_list):
+        if file_limit < 10:
+            if app.link == "Archive":
+                archived_apps.append(app)
+            else:
+                # console_log(flags[21],f"GETTING FROM ONLINE: {app.display}")
+                task = asyncio.wait_for(find_file_from_website(app), TIMEOUT_DURATION)
+                download_tasks.append(task)
+            file_limit += 1
+        else:
+            console_log(flags[15],"DEBUG FILE LIMIT REACHED - INTERNET")
+            break
+
+    # Execute all tasks concurrently
+    results = []
+    for task, app in zip(download_tasks, download_list):
+        try:
+            result = await task
+            results.append(result)
+            if result is None:  # If the download failed (based on your sentinel value)
+                archived_apps.append(app)
+        except asyncio.TimeoutError:
+            # If there's a timeout, append None to results (or any sentinel value you prefer)
+            results.append(None)
+            archived_apps.append(app)  # Add app to archived list if there's a timeout
+
+    return archived_apps
+
+# ---------------------------------------------------------------------------- #
+#                                    ARCHIVE                                   #
+# ---------------------------------------------------------------------------- #
+
+def download_from_archive(ftp, file):
+	"""download from FTP server"""
+	try:
+		with open(f"{tcbatch_path}/{file}", 'wb') as local_file:
+			ftp.retrbinary('RETR ' + file, local_file.write)
+		console_log(flags[16],f"DOWNLOADED {file}")
+	except (error_perm, error_temp, error_proto, EOFError, IOError, FileNotFoundError, PermissionError, MemoryError, TypeError, Exception) as e:
+		if isinstance(e, Exception):  # All exceptions derive from the base Exception class.
+			console_log(flags[7], str(e).upper())
+		else:
+			console_log(error, "UNCAUGHT ERROR")
+
+
+def search_archive(init: bool):
+	"""connect to FTP server and compare files"""
+	console_log(flags[6],"CONNECTING TO FTP")
+	try:
+		# ftp = FTP("tedcharlesbrown.synology.me")
+		ftp = FTP("192.168.1.100")
+		ftp.login("_FTP_", "tedcharlesbrown_ftp")
+		console_log(flags[8],"FTP CONNECTION SUCCESS")
+
+	except (error_perm, error_temp, error_proto, EOFError, IOError, FileNotFoundError, PermissionError, MemoryError, TypeError, Exception) as e:
+		if isinstance(e, Exception):  # All exceptions derive from the base Exception class.
+			console_log(flags[7], str(e).upper())
+		else:
+			console_log(error, "UNCAUGHT ERROR")
+
+	files_in_archive = ftp.nlst()
+	files_downloaded = os.listdir(tcbatch_path)
+
+	file_limit = 0
+
+	if init:
+		if application_list in files_in_archive:
+			download_from_archive(ftp,application_list)
+	else:
+		for file in files_in_archive:
+			if file != "__hidden__" and file != application_list:
+				if file_limit < 10:
+					if not asyncio.run(check_already_downloaded(file)):
+						console_log(flags[11],f"{file.upper()} NOT IN GLINET")
+						download_from_archive(ftp,file)
+					elif ftp.size(file) != os.path.getsize(os.path.join(tcbatch_path,file)):
+						console_log(flags[12],f"{file.upper()} SIZE MISMATCH")
+						download_from_archive(ftp,file)
+					else:
+						console_log(flags[13],f"{file.upper()} IN GLINET, SKIPPING")
+					file_limit += 1
+				else:
+					console_log(flags[15],"DEBUG FILE LIMIT REACHED- ARCHIVE")
+					break
+
+	ftp.quit()
+	console_log(flags[10],f"FTP CLOSED")
+
+# ---------------------------------------------------------------------------- #
+#                                  CONSOLE LOG                                 #
+# ---------------------------------------------------------------------------- #
+
+def console_log(flag: str, log: str):
+	"""prints to the console and appends the log file"""
+	date = datetime.datetime.now()
+	print(date, flag, log)
+	with open(os.path.join(script_path,log_file),'a') as file:
+		file.write(f"\n{date},{flag},{log}")
+
+
+if not os.path.exists(log_file):
+	with open(os.path.join(script_path,log_file),'w') as file:
+		file.write(f"[TIME],[FLAG],[LOG]\n{datetime.datetime.now()},{flags[0]},LOG CREATED")
+
+if not os.path.exists(tcbatch_path):
+	os.mkdir(tcbatch_path)
+
+if not os.path.exists(archive_flag):
+	with open(archive_flag,'w') as file:
+		file.write("")
+
+
+console_log(flags[1],"-----RUNNING SCRIPT-----")
+
+# GET APPLICATION DOWNLOAD LIST
+search_archive(True)
+
+#PARSE LIST FROM APPLICATION DOWNLOAD LIST
+download_list = []
+with open(os.path.join(tcbatch_path,application_list), "r") as json_file:
+	json_data = json.load(json_file)
+
+for item in json_data:
+	app = APPLICATION(item["display"], item["name"], item["link"])
+	download_list.append(app)
+
+apps = asyncio.run(get_download(download_list))
+search_archive(False)
+
+
+