From 4f4c6936a77f0cc041ad6fc84fa696bec5033ab6 Mon Sep 17 00:00:00 2001 From: Ryu18 Date: Thu, 13 Jan 2022 22:37:47 +0100 Subject: [PATCH] added input cookie file --- README.md | 37 +++++++++++--------- twspace_dl/__init__.py | 3 +- twspace_dl/__main__.py | 74 ++++++++++++++++++++++++++++------------ twspace_dl/login.py | 54 ++++++++++++++++++++--------- twspace_dl/twspace_dl.py | 28 ++++++++------- 5 files changed, 127 insertions(+), 69 deletions(-) diff --git a/README.md b/README.md index 426461e..97bd3bf 100644 --- a/README.md +++ b/README.md @@ -70,42 +70,45 @@ twspace_dl -i space_url Here's the output of the help option ```txt -usage: twspace_dl [-h] [-t THREADS] [-v] [-s] [-k] [-i SPACE_URL] [-U USER_URL] [-M PATH] [-d DYN_URL] [-f URL] - [-o FORMAT_STR] [-m] [-p] [-u] +usage: twspace_dl [-h] [-v] [-s] [-k] [--input-cookie-file COOKIE_FILE] [-i SPACE_URL | -U USER_URL] [-d DYN_URL] [-f URL] [-M PATH] [-o FORMAT_STR] [-m] [-p] [-u] [--write-url URL_OUTPUT] {login} ... Script designed to help download twitter spaces -optional arguments: +positional arguments: + {login} (EXPERIMENTAL) Login to your account using username and password + +options: -h, --help show this help message and exit - -t THREADS, --threads THREADS - number of threads to run the script with(default with max) -v, --verbose -s, --skip-download -k, --keep-files + --input-cookie-file COOKIE_FILE input: -i SPACE_URL, --input-url SPACE_URL -U USER_URL, --user-url USER_URL - -M PATH, --input-metadata PATH - use a metadata json file instead of input url (useful for very old ended spaces) -d DYN_URL, --from-dynamic-url DYN_URL - use the dynamic url for the processes(useful for ended spaces) example: https://prod- - fastly-ap- - northeast-1.video.pscp.tv/Transcoding/v1/hls/zUUpEgiM0M18jCGxo2eSZs99p49hfyFQr1l4cdze- - Sp4T-DQOMMoZpkbdyetgfwscfvvUkAdeF-I5hPI4bGoYg/non_transcode/ap-northeast-1/periscope- - replay-direct-prod-ap-northeast-1-public/audio-space/dynamic_playlist.m3u8?type=live + use the dynamic url for the processes(useful for ended spaces) + example: https://prod-fastly-ap-northeast-1.video.pscp.tv/Transcoding/v1/hls/ + zUUpEgiM0M18jCGxo2eSZs99p49hfyFQr1l4cdze-Sp4T-DQOMMoZpkbdyetgfwscfvvUkAdeF-I5hPI4bGoYg/ + non_transcode/ap-northeast-1/periscope-replay-direct-prod-ap-northeast-1-public/ + audio-space/dynamic_playlist.m3u8?type=live -f URL, --from-master-url URL - use the master url for the processes(useful for ended spaces) example: https://prod- - fastly-ap-northeast-1.video.pscp.tv/Transcoding/v1/hls/YRSsw6_P5xUZHMualK5-ihvePR6o4QmoZV - OBGicKvmkL_KB9IQYtxVqm3P_vpZ2HnFkoRfar4_uJOjqC8OCo5A/non_transcode/ap- - northeast-1/periscope-replay-direct-prod-ap-northeast-1-public/audio- - space/master_playlist.m3u8 + use the master url for the processes(useful for ended spaces) + example: https://prod-fastly-ap-northeast-1.video.pscp.tv/Transcoding/v1/hls/ + YRSsw6_P5xUZHMualK5-ihvePR6o4QmoZVOBGicKvmkL_KB9IQYtxVqm3P_vpZ2HnFkoRfar4_uJOjqC8OCo5A/ + non_transcode/ap-northeast-1/periscope-replay-direct-prod-ap- + northeast-1-public/audio-space/master_playlist.m3u8 + -M PATH, --input-metadata PATH + use a metadata json file instead of input url (useful for very old ended spaces) output: -o FORMAT_STR, --output FORMAT_STR -m, --write-metadata write the full metadata json to a file -p, --write-playlist write the m3u8 used to download the stream(e.g. if you want to use another downloader) -u, --url display the master url + --write-url URL_OUTPUT + write master url to file ``` ## Format diff --git a/twspace_dl/__init__.py b/twspace_dl/__init__.py index 60c8321..28efe3f 100644 --- a/twspace_dl/__init__.py +++ b/twspace_dl/__init__.py @@ -1,4 +1,5 @@ from .format_info import FormatInfo from .twspace_dl import TwspaceDL +from .login import Login, load_from_file, write_to_file -__all__ = ["FormatInfo", "TwspaceDL"] +__all__ = ["FormatInfo", "TwspaceDL", "Login", "load_from_file", "write_to_file"] diff --git a/twspace_dl/__main__.py b/twspace_dl/__main__.py index f748207..88b9a37 100644 --- a/twspace_dl/__main__.py +++ b/twspace_dl/__main__.py @@ -7,33 +7,34 @@ import sys from twspace_dl.twspace_dl import TwspaceDL +from twspace_dl.login import Login, load_from_file, write_to_file def get_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Script designed to help download twitter spaces" ) + subparsers = parser.add_subparsers( + help="(EXPERIMENTAL) Login to your account using username and password" + ) + login_parser = subparsers.add_parser("login", description="EXPERIMENTAL") input_group = parser.add_argument_group("input") + input_method = input_group.add_mutually_exclusive_group() output_group = parser.add_argument_group("output") parser.add_argument("-v", "--verbose", action="store_true") parser.add_argument("-s", "--skip-download", action="store_true") parser.add_argument("-k", "--keep-files", action="store_true") - parser.add_argument("--username", type=str, metavar="USERNAME") - parser.add_argument("--password", type=str, metavar="PASSWORD") + parser.add_argument("--input-cookie-file", type=str, metavar="COOKIE_FILE") - input_group.add_argument("-i", "--input-url", type=str, metavar="SPACE_URL") - input_group.add_argument("-U", "--user-url", type=str, metavar="USER_URL") - input_group.add_argument( - "-M", - "--input-metadata", - type=str, - metavar="PATH", - help=( - "use a metadata json file instead of input url\n" - "(useful for very old ended spaces)" - ), + login_parser.add_argument("-u", "--username", type=str, metavar="USERNAME") + login_parser.add_argument("-p", "--password", type=str, metavar="PASSWORD") + login_parser.add_argument( + "-o", "--output-cookie-file", type=str, metavar="OUTPUT_COOKIE_FILE" ) + + input_method.add_argument("-i", "--input-url", type=str, metavar="SPACE_URL") + input_method.add_argument("-U", "--user-url", type=str, metavar="USER_URL") input_group.add_argument( "-d", "--from-dynamic-url", @@ -60,6 +61,16 @@ def get_args() -> argparse.Namespace: "audio-space/master_playlist.m3u8" ), ) + input_group.add_argument( + "-M", + "--input-metadata", + type=str, + metavar="PATH", + help=( + "use a metadata json file instead of input url\n" + "(useful for very old ended spaces)" + ), + ) output_group.add_argument( "-o", @@ -88,7 +99,6 @@ def get_args() -> argparse.Namespace: output_group.add_argument( "--write-url", type=str, metavar="URL_OUTPUT", help="write master url to file" ) - if len(sys.argv) == 1: parser.print_help(sys.stderr) sys.exit(1) @@ -98,23 +108,43 @@ def get_args() -> argparse.Namespace: def main() -> None: args = get_args() - if ( - not args.input_url - and not args.input_metadata - and not args.user_url - and not args.from_master_url + has_input = ( + args.input_url + or args.input_metadata + or args.user_url + or args.from_master_url + or args.from_dynamic_url + ) + has_partial_login = ( + args.username or args.password or args.output_cookie_file # has at least one + ) and not ( + args.username and args.password and args.output_cookie_file # has both + ) # has one but not both + + if has_partial_login: + print("login needs both username, password, and output file") + sys.exit(2) + + if not has_input and not ( + args.username or args.password or args.output_cookie_file ): print("Either space url, user url or master url should be provided") - sys.exit(1) + sys.exit(2) logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) + if args.username and args.password and args.output_cookie_file: + login = Login(args.username, args.password, TwspaceDL.guest_token()) + auth_token = login.login() + write_to_file(auth_token, args.output_cookie_file) + if args.input_url: twspace_dl = TwspaceDL.from_space_url(args.input_url, args.output) elif args.user_url: - if args.username and args.password: + if args.input_cookie_file: + auth_token = load_from_file(args.input_cookie_file) twspace_dl = TwspaceDL.from_user_avatar( - args.user_url, args.output, args.username, args.password + args.user_url, args.output, auth_token ) else: twspace_dl = TwspaceDL.from_user_tweets(args.user_url, args.output) diff --git a/twspace_dl/login.py b/twspace_dl/login.py index bd5c2b3..6e64a84 100644 --- a/twspace_dl/login.py +++ b/twspace_dl/login.py @@ -1,5 +1,21 @@ +"Module providing login utilities for twspace_dl" +import re + import requests -from typing import Optional + + +def load_from_file(filename: str) -> str: + """return auth_token from netscape cookie file""" + return re.findall( + r"(?<=auth_token.{8}).{33}", open(filename, "r", encoding="utf-8").read() + )[0] + + +def write_to_file(auth_token: str, filename: str) -> None: + """Write cookie to file in a format recognizable by the module + (`auth_token {auth_token}`)""" + with open(filename, "w", encoding="utf-8") as cookie_file: + cookie_file.write("auth_token" + 8 * " " + auth_token) class Login: @@ -11,7 +27,7 @@ def __init__(self, username, password, guest_token): self.task_url = "https://twitter.com/i/api/1.1/onboarding/task.json" self.flow_token: str - def login(self) -> Optional[str]: + def login(self) -> str: request_flow = self.session.post( self.task_url, params={"flow_name": "login"}, @@ -20,9 +36,10 @@ def login(self) -> Optional[str]: ) try: self.flow_token = request_flow.json()["flow_token"] - except KeyError: - print("Error while intiial_params:", request_flow.json()) - return None + except KeyError as err: + raise RuntimeError( + "Error while initiating parameters:", request_flow.json() + ) from err # js instrumentation subtask request_flow = self.session.post( @@ -30,9 +47,10 @@ def login(self) -> Optional[str]: ) try: self.flow_token = request_flow.json()["flow_token"] - except KeyError: - print("Error while task0:", request_flow.json()) - return None + except KeyError as err: + raise RuntimeError( + "Error while performing js instrumentation:", request_flow.json() + ) from err # user identifier sso subtask request_flow = self.session.post( @@ -40,9 +58,8 @@ def login(self) -> Optional[str]: ) try: self.flow_token = request_flow.json()["flow_token"] - except KeyError: - print("Error while task1:", request_flow.json()) - return None + except KeyError as err: + raise RuntimeError("Error identifying user:", request_flow.json()) from err # account duplication check request_flow = self.session.post( @@ -50,9 +67,10 @@ def login(self) -> Optional[str]: ) try: self.flow_token = request_flow.json()["flow_token"] - except KeyError: - print("Error while task2:", request_flow.json()) - return None + except KeyError as err: + raise RuntimeError( + "Error while checking account duplication:", request_flow.json() + ) from err # enter password request_flow = self.session.post( @@ -60,9 +78,11 @@ def login(self) -> Optional[str]: ) try: auth_token = str(request_flow.cookies["auth_token"]) - except KeyError: - print("Error while task6:", request_flow.json()) - return None + except KeyError as err: + raise RuntimeError( + "Error while while entering password:", request_flow.json() + ) from err + return auth_token @property diff --git a/twspace_dl/twspace_dl.py b/twspace_dl/twspace_dl.py index cd7bac3..0e686a1 100644 --- a/twspace_dl/twspace_dl.py +++ b/twspace_dl/twspace_dl.py @@ -5,7 +5,6 @@ import shutil import subprocess import tempfile -import time from functools import cached_property from urllib.parse import urlparse @@ -13,7 +12,6 @@ import requests from .format_info import FormatInfo -from .login import Login class TwspaceDL: @@ -22,10 +20,12 @@ class TwspaceDL: def __init__(self, space_id: str, format_str: str) -> None: self.id = space_id self.format_str = format_str or FormatInfo.DEFAULT_FNAME_FORMAT + self.session = requests.Session() self._tmpdir: str @classmethod def from_space_url(cls, url: str, format_str: str): + """Create a TwspaceDL object from a space url""" if not url: logging.warning("No space url given, file won't have any metadata") space_id = "no_id" @@ -36,6 +36,8 @@ def from_space_url(cls, url: str, format_str: str): @classmethod def from_user_tweets(cls, url: str, format_str: str): + """Create a TwspaceDL object from the first space + found in the 20 last user tweets""" user_id = TwspaceDL.user_id(url) headers = { "authorization": ( @@ -77,9 +79,8 @@ def from_user_tweets(cls, url: str, format_str: str): return cls(space_id, format_str) @classmethod - def from_user_avatar(cls, user_url, format_str, username, password): - login = Login(username, password, TwspaceDL.guest_token()) - auth_token = login.login() + def from_user_avatar(cls, user_url, format_str, auth_token): + """Create a TwspaceDL object from a twitter user ongoing space""" headers = { "authorization": ( "Bearer " @@ -89,19 +90,21 @@ def from_user_avatar(cls, user_url, format_str, username, password): "cookie": f"auth_token={auth_token};", } user_id = TwspaceDL.user_id(user_url) - r = requests.get( - f"https://twitter.com/i/api/fleets/v1/avatar_content?user_ids={user_id}&only_spaces=true", + params = {"user_ids": user_id, "only_spaces": "true"} + avatar_content = requests.get( + f"https://twitter.com/i/api/fleets/v1/avatar_content", + params=params, headers=headers, - ) + ).json() - obj = r.json() - broadcast_id = obj["users"][user_id]["spaces"]["live_content"]["audiospace"][ - "broadcast_id" - ] + broadcast_id = avatar_content["users"][user_id]["spaces"]["live_content"][ + "audiospace" + ]["broadcast_id"] return cls(broadcast_id, format_str) @staticmethod def user_id(user_url: str) -> str: + """Get the id of a twitter using the url linking to their account""" screen_name = re.findall(r"(?<=twitter.com/)\w*", user_url)[0] params = { @@ -133,6 +136,7 @@ def user_id(user_url: str) -> str: @staticmethod def guest_token() -> str: + """Generate a guest token to authorize twitter api requests""" headers = { "authorization": ( "Bearer "