Skip to content

Commit

Permalink
Merge pull request #1 from DuckingSimplify/master
Browse files Browse the repository at this point in the history
Refactoring, Adding CLI, Additional Options, etc.
  • Loading branch information
surfer190 authored May 10, 2023
2 parents 810678a + 23e4b57 commit 66e17fa
Showing 1 changed file with 147 additions and 85 deletions.
232 changes: 147 additions & 85 deletions download_releases.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,52 +7,78 @@
import wget
import eyed3
from colorama import Fore, Style
import argparse
from collections import namedtuple, OrderedDict

# get html file of releases page
http = urllib3.PoolManager()
resp = http.request("GET", "https://lofigirl.com/blogs/releases")
soup = BeautifulSoup(resp.data, "html.parser")

# prepare variables for crunching releases html
releases_link_prefix = "https://lofigirl.com"
releases_links = []
releases_names = []
releases_artists = []

# crunch releases html, look for releases names, artists and URLs
for link in soup.find_all("div", class_="Cv_release_mini_wrap_inner"):
releases_links.append(releases_link_prefix + link.find("a").get("href"))
releases_names.append(link.find("h2").string)
releases_artists.append(link.find("i").string)

# remove duplicates in links and names, then reverse all 3 arrays so the newest ones are at the bottom of the command line
releases_links = list(dict.fromkeys(releases_links))
releases_names = list(dict.fromkeys(releases_names))
releases_links.reverse()
releases_names.reverse()
releases_artists.reverse()

# print all releases for the user to choose, get user input
for i in range(0, len(releases_links)):
print(
Fore.RED
+ str(i + 1)
+ ". "
+ Fore.BLUE
+ releases_names[i]
+ Style.RESET_ALL
+ " by "
+ Fore.GREEN
+ releases_artists[i]
)
print(Style.RESET_ALL)
# URL Configuration
RELEASE_URL = "https://lofigirl.com/blogs/releases"
RELEASE_LINK_PREFIX = "https://lofigirl.com"

# Release & Sound Definition
Release = namedtuple("Release", ("name","link","artists"))
SoundFile = namedtuple("SoundFile", ("title","link","artists"))
TranslatorGroup = namedtuple("TranslatorGroup", ("title","artist","album"))

def download_lofi(output_dir, download, release_numbers, translators):
# HTTP Manager
http = urllib3.PoolManager()

releases = manage_info(http)

if download:
if release_numbers:
selected_releases = []
for release_num in release_numbers:
selected_releases.append(releases[release_num])
releases = selected_releases

download_releases(http, releases, output_dir, translators=translators)
else:
print("Skipping download. Specify -d with optional release numbers to download")

def manage_info(http):

# get html file of releases page
resp = http.request("GET", RELEASE_URL)
soup = BeautifulSoup(resp.data, "html.parser")

# prepare variables for crunching releases html
releases = []

# crunch releases html, look for releases names, artists and URLs
for link in soup.find_all("div", class_="Cv_release_mini_wrap_inner"):
release_link = RELEASE_LINK_PREFIX + link.find("a").get("href")
name = link.find("h2").string
artists = link.find("i").string
releases.append(Release(name, release_link, artists))

num_releases = len(releases_links)
# remove duplicates in links and names, then reverse all 3 arrays so the newest ones are at the bottom of the command line
releases = list(OrderedDict(((release.link, release) for release in releases)).values())
releases = list(OrderedDict(((release.name, release) for release in releases)).values())
releases.reverse()

# print all releases for the user to choose, get user input
for i, release in enumerate(releases):
print(
Fore.RED
+ str(i)
+ ". "
+ Fore.BLUE
+ release.name
+ Style.RESET_ALL
+ " by "
+ Fore.GREEN
+ release.artists
)
print(Style.RESET_ALL)

return releases

def download_release(http, release, output_dir, translators):
print(f"Fetching Release: {release.name} by {release.artists}")

for selected_release in range(1, num_releases + 1):
print("#", selected_release)
# get html file of user selected release
resp = http.request("GET", releases_links[int(selected_release) - 1])
resp = http.request("GET", release.link)
soup = BeautifulSoup(resp.data, "html.parser")

# crunch release html for its name and link to its image
Expand All @@ -66,115 +92,110 @@
)

# crunch release html for links, titles and artists of individual songs
sound_file_links = []
sound_file_title = []
sound_file_artist = []
sound_files = []
for link in soup.find_all("div", class_="cv_custom_album_play_contents_inner_part"):
try:
sound_file_links.append(
link.find("div", class_="cv_custom_download_icon_part").get(
sound_file_link = link.find(
"div", class_="cv_custom_download_icon_part").get(
"data-audio-src"
)
)
except AttributeError as error:
print(error)
print("No data source found...skipping")
continue
sound_file_title.append(
link.find(

sound_file_title=link.find(
"div", class_="cv_custom_custom_content_description"
).h4.string.strip()[3:]
)
).h4.string.strip()[3:].strip()

try:
sound_file_artist.append(
link.find(
sound_file_artists = link.find(
"div", class_="cv_custom_custom_content_description"
).p.string.strip()
)
except AttributeError as error:
print(error)
print("Using h4 tag")
sound_file_artist.append(
link.find(
sound_file_artists = link.find(
"div", class_="cv_custom_custom_content_description"
).h4.string.strip()
)

sound_files.append(SoundFile(sound_file_title, sound_file_link, sound_file_artists))

# show user the links to the credit templates and release, also generate YouTube credits
print(Fore.RED + "Here is the usage policy and credit templates:" + Style.RESET_ALL)
print("https://lofigirl.com/pages/use-the-music")
print(Fore.RED + "Here's the link to the release:" + Style.RESET_ALL)
print(releases_links[int(selected_release) - 1])
print(release.link)
print(
Fore.RED
+ "And here's the credit template for youtube for an entire album. Note that watch and listen links only show search queries on their respective platforms:"
+ Style.RESET_ALL
)
for i in range(0, len(sound_file_links)):
print("- " + sound_file_artist[i] + " - " + sound_file_title[i])
for sound_file in sound_files:
print("- " + sound_file.artists + " - " + sound_file.title)
print("- Provided by Lofi Girl")
print(
"- Watch: https://www.youtube.com/c/LofiGirl/search?query="
+ album_name.replace(" ", "")
)
print("- Listen: https://open.spotify.com/search/" + album_name.replace(" ", ""))

album_name_stripped = album_name.replace(" ", "_").replace(".", "")
album_name_stripped = album_name.translate(translators.album)

# make a folder with the name of the album and download the cover into it
album_name = f"downloads/{album_name_stripped}"
album_path = os.path.join(output_dir, album_name_stripped)
try:
os.mkdir(album_name)
os.mkdir(album_path)
except FileExistsError as error:
print(error)
print(
"Folder exists - moving on...delete the folder and rerun for a fresh download"
)
continue
return

if not image_link:
breakpoint()
wget.download(image_link, out=os.path.join(album_name, "cover.png"))
wget.download(image_link, out=os.path.join(album_path, "cover.png"))

# create credits.txt file with the same content like what is printed into the console above^
with open(album_name + "/credits.txt", "w") as f:
for i in range(0, len(sound_file_links)):
f.write("- " + sound_file_artist[i] + " - " + sound_file_title[i] + "\n")
with open(album_path + "/credits.txt", "w") as f:
for sound_file in sound_files:
f.write("- " + sound_file.artists + " - " + sound_file.title + "\n")
f.close()

# download all songs 1 by 1 into the new folder, access it's metadata and fill album, artist, title and track num tags. Also create a trivial playlist file
f = open(album_name + "/playlist.m3u", "w")
for i in range(0, len(sound_file_links)):
f = open(album_path + "/playlist.m3u", "w")
for i, sound_file in enumerate(sound_files):
artist = (
sound_file_artist[i]
.replace(".", "")
.replace("!", "")
.replace(" ", "_")
sound_file.artists
.translate(translators.artist)
.replace("\u2019", "'")
.replace("\u012b", "")
)
title = (
sound_file_title[i]
.replace(".", "")
.replace("!", "")
.replace(" ", "_")
sound_file.title
.translate(translators.title)
.replace("\u2019", "'")
.replace("\u012b", "")
)

filename = f"{artist}-{title}.mp3"
if not sound_file_links[i]:
file_basename = f"{artist}-{title}.mp3"
filename = os.path.join(album_path, file_basename)

if not sound_file.link:
breakpoint()
wget.download(sound_file_links[i], out=os.path.join(album_name, filename))
audiofile = eyed3.load(os.path.join(album_name, filename))

wget.download(sound_file.link, out=filename)
audiofile = eyed3.load(filename)
audiofile.tag.album = album_name.replace("\u2019", "'").replace("\u012b", "")
audiofile.tag.artist = (
sound_file_artist[i].replace("\u2019", "'").replace("\u012b", "")
sound_file.artists.replace("\u2019", "'").replace("\u012b", "")
)
audiofile.tag.title = (
sound_file_title[i].replace("\u2019", "'").replace("\u012b", "")
sound_file.title.replace("\u2019", "'").replace("\u012b", "")
)
audiofile.tag.track_num = i + 1

try:
audiofile.tag.save()
except UnicodeEncodeError as error:
Expand All @@ -185,3 +206,44 @@

print()
print(Fore.RED + "all done" + Style.RESET_ALL)

def download_releases(http, releases, output_dir, translators):
print(f"Downloading {len(releases)} releases")
for selected_release in releases:
download_release(http, selected_release, output_dir, translators=translators)

def cli():
def is_dir(path):
if os.path.isdir(path):
return path
else:
return NotADirectoryError(f"Supplied path is not a directory: {path}")

parser = argparse.ArgumentParser(prog=__file__, description="Downloader for lofigirl.com")
parser.add_argument("-o","--output", help="Output folder", type=is_dir, default="downloads", required=True)
parser.add_argument("-d","--download", help="Specify releases to be downloaded. Leave blank for all", type=int, nargs="*")
parser.add_argument("--title-remove", help="Remove all specified characters within the title", type=str, default="")
parser.add_argument("--title-replace", help="Specific characters within the title to be replaced with --title-replace-with", type=str, default="")
parser.add_argument("--title-replace-with", help="Specific characters within the title to replace occurences of --title-replace", type=str, default="")
parser.add_argument("--artist-remove", help="Remove all specified characters within the artist", type=str, default="")
parser.add_argument("--artist-replace", help="Specific characters within the artist to be replaced with --artist-replace-with", type=str, default="")
parser.add_argument("--artist-replace-with", help="Specific characters within the artist to replace occurences of --artist-replace", type=str, default="")
parser.add_argument("--album-remove", help="Remove all specified characters within the album", type=str, default="")
parser.add_argument("--album-replace", help="Specific characters within the album to be replaced with --album-replace-with", type=str, default="")
parser.add_argument("--album-replace-with", help="Specific characters within the album to replace occurences of --album-replace", type=str, default="")

args = parser.parse_args()
return args

def translator(replace_characters, replace_with_characters, remove_characters):
return str.maketrans(replace_characters, replace_with_characters, remove_characters)

if __name__ == "__main__":
args = cli()

title_translator = translator(args.title_replace, args.title_replace_with, args.title_remove)
artist_translator = translator(args.artist_replace, args.artist_replace_with, args.artist_remove)
album_translator = translator(args.album_replace, args.album_replace_with, args.album_remove)
tg = TranslatorGroup(title_translator, artist_translator, album_translator)

download_lofi(output_dir=args.output, download=args.download is not None, release_numbers=args.download, translators=tg)

0 comments on commit 66e17fa

Please sign in to comment.