Skip to content

Commit

Permalink
added input cookie file
Browse files Browse the repository at this point in the history
  • Loading branch information
Ryu1845 committed Jan 13, 2022
1 parent dd46fdc commit 4f4c693
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 69 deletions.
37 changes: 20 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,42 +70,45 @@ twspace_dl -i space_url
Here's the output of the help option

```txt
usage: twspace_dl [-h] [-t THREADS] [-v] [-s] [-k] [-i SPACE_URL] [-U USER_URL] [-M PATH] [-d DYN_URL] [-f URL]
[-o FORMAT_STR] [-m] [-p] [-u]
usage: twspace_dl [-h] [-v] [-s] [-k] [--input-cookie-file COOKIE_FILE] [-i SPACE_URL | -U USER_URL] [-d DYN_URL] [-f URL] [-M PATH] [-o FORMAT_STR] [-m] [-p] [-u] [--write-url URL_OUTPUT] {login} ...
Script designed to help download twitter spaces
optional arguments:
positional arguments:
{login} (EXPERIMENTAL) Login to your account using username and password
options:
-h, --help show this help message and exit
-t THREADS, --threads THREADS
number of threads to run the script with(default with max)
-v, --verbose
-s, --skip-download
-k, --keep-files
--input-cookie-file COOKIE_FILE
input:
-i SPACE_URL, --input-url SPACE_URL
-U USER_URL, --user-url USER_URL
-M PATH, --input-metadata PATH
use a metadata json file instead of input url (useful for very old ended spaces)
-d DYN_URL, --from-dynamic-url DYN_URL
use the dynamic url for the processes(useful for ended spaces) example: https://prod-
fastly-ap-
northeast-1.video.pscp.tv/Transcoding/v1/hls/zUUpEgiM0M18jCGxo2eSZs99p49hfyFQr1l4cdze-
Sp4T-DQOMMoZpkbdyetgfwscfvvUkAdeF-I5hPI4bGoYg/non_transcode/ap-northeast-1/periscope-
replay-direct-prod-ap-northeast-1-public/audio-space/dynamic_playlist.m3u8?type=live
use the dynamic url for the processes(useful for ended spaces)
example: https://prod-fastly-ap-northeast-1.video.pscp.tv/Transcoding/v1/hls/
zUUpEgiM0M18jCGxo2eSZs99p49hfyFQr1l4cdze-Sp4T-DQOMMoZpkbdyetgfwscfvvUkAdeF-I5hPI4bGoYg/
non_transcode/ap-northeast-1/periscope-replay-direct-prod-ap-northeast-1-public/
audio-space/dynamic_playlist.m3u8?type=live
-f URL, --from-master-url URL
use the master url for the processes(useful for ended spaces) example: https://prod-
fastly-ap-northeast-1.video.pscp.tv/Transcoding/v1/hls/YRSsw6_P5xUZHMualK5-ihvePR6o4QmoZV
OBGicKvmkL_KB9IQYtxVqm3P_vpZ2HnFkoRfar4_uJOjqC8OCo5A/non_transcode/ap-
northeast-1/periscope-replay-direct-prod-ap-northeast-1-public/audio-
space/master_playlist.m3u8
use the master url for the processes(useful for ended spaces)
example: https://prod-fastly-ap-northeast-1.video.pscp.tv/Transcoding/v1/hls/
YRSsw6_P5xUZHMualK5-ihvePR6o4QmoZVOBGicKvmkL_KB9IQYtxVqm3P_vpZ2HnFkoRfar4_uJOjqC8OCo5A/
non_transcode/ap-northeast-1/periscope-replay-direct-prod-ap-
northeast-1-public/audio-space/master_playlist.m3u8
-M PATH, --input-metadata PATH
use a metadata json file instead of input url (useful for very old ended spaces)
output:
-o FORMAT_STR, --output FORMAT_STR
-m, --write-metadata write the full metadata json to a file
-p, --write-playlist write the m3u8 used to download the stream(e.g. if you want to use another downloader)
-u, --url display the master url
--write-url URL_OUTPUT
write master url to file
```

## Format
Expand Down
3 changes: 2 additions & 1 deletion twspace_dl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .format_info import FormatInfo
from .twspace_dl import TwspaceDL
from .login import Login, load_from_file, write_to_file

__all__ = ["FormatInfo", "TwspaceDL"]
__all__ = ["FormatInfo", "TwspaceDL", "Login", "load_from_file", "write_to_file"]
74 changes: 52 additions & 22 deletions twspace_dl/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,34 @@
import sys

from twspace_dl.twspace_dl import TwspaceDL
from twspace_dl.login import Login, load_from_file, write_to_file


def get_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Script designed to help download twitter spaces"
)
subparsers = parser.add_subparsers(
help="(EXPERIMENTAL) Login to your account using username and password"
)
login_parser = subparsers.add_parser("login", description="EXPERIMENTAL")
input_group = parser.add_argument_group("input")
input_method = input_group.add_mutually_exclusive_group()
output_group = parser.add_argument_group("output")

parser.add_argument("-v", "--verbose", action="store_true")
parser.add_argument("-s", "--skip-download", action="store_true")
parser.add_argument("-k", "--keep-files", action="store_true")
parser.add_argument("--username", type=str, metavar="USERNAME")
parser.add_argument("--password", type=str, metavar="PASSWORD")
parser.add_argument("--input-cookie-file", type=str, metavar="COOKIE_FILE")

input_group.add_argument("-i", "--input-url", type=str, metavar="SPACE_URL")
input_group.add_argument("-U", "--user-url", type=str, metavar="USER_URL")
input_group.add_argument(
"-M",
"--input-metadata",
type=str,
metavar="PATH",
help=(
"use a metadata json file instead of input url\n"
"(useful for very old ended spaces)"
),
login_parser.add_argument("-u", "--username", type=str, metavar="USERNAME")
login_parser.add_argument("-p", "--password", type=str, metavar="PASSWORD")
login_parser.add_argument(
"-o", "--output-cookie-file", type=str, metavar="OUTPUT_COOKIE_FILE"
)

input_method.add_argument("-i", "--input-url", type=str, metavar="SPACE_URL")
input_method.add_argument("-U", "--user-url", type=str, metavar="USER_URL")
input_group.add_argument(
"-d",
"--from-dynamic-url",
Expand All @@ -60,6 +61,16 @@ def get_args() -> argparse.Namespace:
"audio-space/master_playlist.m3u8"
),
)
input_group.add_argument(
"-M",
"--input-metadata",
type=str,
metavar="PATH",
help=(
"use a metadata json file instead of input url\n"
"(useful for very old ended spaces)"
),
)

output_group.add_argument(
"-o",
Expand Down Expand Up @@ -88,7 +99,6 @@ def get_args() -> argparse.Namespace:
output_group.add_argument(
"--write-url", type=str, metavar="URL_OUTPUT", help="write master url to file"
)

if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
Expand All @@ -98,23 +108,43 @@ def get_args() -> argparse.Namespace:

def main() -> None:
args = get_args()
if (
not args.input_url
and not args.input_metadata
and not args.user_url
and not args.from_master_url
has_input = (
args.input_url
or args.input_metadata
or args.user_url
or args.from_master_url
or args.from_dynamic_url
)
has_partial_login = (
args.username or args.password or args.output_cookie_file # has at least one
) and not (
args.username and args.password and args.output_cookie_file # has both
) # has one but not both

if has_partial_login:
print("login needs both username, password, and output file")
sys.exit(2)

if not has_input and not (
args.username or args.password or args.output_cookie_file
):
print("Either space url, user url or master url should be provided")
sys.exit(1)
sys.exit(2)

logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)

if args.username and args.password and args.output_cookie_file:
login = Login(args.username, args.password, TwspaceDL.guest_token())
auth_token = login.login()
write_to_file(auth_token, args.output_cookie_file)

if args.input_url:
twspace_dl = TwspaceDL.from_space_url(args.input_url, args.output)
elif args.user_url:
if args.username and args.password:
if args.input_cookie_file:
auth_token = load_from_file(args.input_cookie_file)
twspace_dl = TwspaceDL.from_user_avatar(
args.user_url, args.output, args.username, args.password
args.user_url, args.output, auth_token
)
else:
twspace_dl = TwspaceDL.from_user_tweets(args.user_url, args.output)
Expand Down
54 changes: 37 additions & 17 deletions twspace_dl/login.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,21 @@
"Module providing login utilities for twspace_dl"
import re

import requests
from typing import Optional


def load_from_file(filename: str) -> str:
"""return auth_token from netscape cookie file"""
return re.findall(
r"(?<=auth_token.{8}).{33}", open(filename, "r", encoding="utf-8").read()
)[0]


def write_to_file(auth_token: str, filename: str) -> None:
"""Write cookie to file in a format recognizable by the module
(`auth_token {auth_token}`)"""
with open(filename, "w", encoding="utf-8") as cookie_file:
cookie_file.write("auth_token" + 8 * " " + auth_token)


class Login:
Expand All @@ -11,7 +27,7 @@ def __init__(self, username, password, guest_token):
self.task_url = "https://twitter.com/i/api/1.1/onboarding/task.json"
self.flow_token: str

def login(self) -> Optional[str]:
def login(self) -> str:
request_flow = self.session.post(
self.task_url,
params={"flow_name": "login"},
Expand All @@ -20,49 +36,53 @@ def login(self) -> Optional[str]:
)
try:
self.flow_token = request_flow.json()["flow_token"]
except KeyError:
print("Error while intiial_params:", request_flow.json())
return None
except KeyError as err:
raise RuntimeError(
"Error while initiating parameters:", request_flow.json()
) from err

# js instrumentation subtask
request_flow = self.session.post(
self.task_url, headers=self._headers, json=self._js_instrumentation_data
)
try:
self.flow_token = request_flow.json()["flow_token"]
except KeyError:
print("Error while task0:", request_flow.json())
return None
except KeyError as err:
raise RuntimeError(
"Error while performing js instrumentation:", request_flow.json()
) from err

# user identifier sso subtask
request_flow = self.session.post(
self.task_url, headers=self._headers, json=self._user_identifier_sso_data
)
try:
self.flow_token = request_flow.json()["flow_token"]
except KeyError:
print("Error while task1:", request_flow.json())
return None
except KeyError as err:
raise RuntimeError("Error identifying user:", request_flow.json()) from err

# account duplication check
request_flow = self.session.post(
self.task_url, headers=self._headers, json=self._account_dup_check_data
)
try:
self.flow_token = request_flow.json()["flow_token"]
except KeyError:
print("Error while task2:", request_flow.json())
return None
except KeyError as err:
raise RuntimeError(
"Error while checking account duplication:", request_flow.json()
) from err

# enter password
request_flow = self.session.post(
self.task_url, headers=self._headers, json=self._enter_password_data
)
try:
auth_token = str(request_flow.cookies["auth_token"])
except KeyError:
print("Error while task6:", request_flow.json())
return None
except KeyError as err:
raise RuntimeError(
"Error while while entering password:", request_flow.json()
) from err

return auth_token

@property
Expand Down
28 changes: 16 additions & 12 deletions twspace_dl/twspace_dl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,13 @@
import shutil
import subprocess
import tempfile
import time

from functools import cached_property
from urllib.parse import urlparse

import requests

from .format_info import FormatInfo
from .login import Login


class TwspaceDL:
Expand All @@ -22,10 +20,12 @@ class TwspaceDL:
def __init__(self, space_id: str, format_str: str) -> None:
self.id = space_id
self.format_str = format_str or FormatInfo.DEFAULT_FNAME_FORMAT
self.session = requests.Session()
self._tmpdir: str

@classmethod
def from_space_url(cls, url: str, format_str: str):
"""Create a TwspaceDL object from a space url"""
if not url:
logging.warning("No space url given, file won't have any metadata")
space_id = "no_id"
Expand All @@ -36,6 +36,8 @@ def from_space_url(cls, url: str, format_str: str):

@classmethod
def from_user_tweets(cls, url: str, format_str: str):
"""Create a TwspaceDL object from the first space
found in the 20 last user tweets"""
user_id = TwspaceDL.user_id(url)
headers = {
"authorization": (
Expand Down Expand Up @@ -77,9 +79,8 @@ def from_user_tweets(cls, url: str, format_str: str):
return cls(space_id, format_str)

@classmethod
def from_user_avatar(cls, user_url, format_str, username, password):
login = Login(username, password, TwspaceDL.guest_token())
auth_token = login.login()
def from_user_avatar(cls, user_url, format_str, auth_token):
"""Create a TwspaceDL object from a twitter user ongoing space"""
headers = {
"authorization": (
"Bearer "
Expand All @@ -89,19 +90,21 @@ def from_user_avatar(cls, user_url, format_str, username, password):
"cookie": f"auth_token={auth_token};",
}
user_id = TwspaceDL.user_id(user_url)
r = requests.get(
f"https://twitter.com/i/api/fleets/v1/avatar_content?user_ids={user_id}&only_spaces=true",
params = {"user_ids": user_id, "only_spaces": "true"}
avatar_content = requests.get(
f"https://twitter.com/i/api/fleets/v1/avatar_content",
params=params,
headers=headers,
)
).json()

obj = r.json()
broadcast_id = obj["users"][user_id]["spaces"]["live_content"]["audiospace"][
"broadcast_id"
]
broadcast_id = avatar_content["users"][user_id]["spaces"]["live_content"][
"audiospace"
]["broadcast_id"]
return cls(broadcast_id, format_str)

@staticmethod
def user_id(user_url: str) -> str:
"""Get the id of a twitter using the url linking to their account"""
screen_name = re.findall(r"(?<=twitter.com/)\w*", user_url)[0]

params = {
Expand Down Expand Up @@ -133,6 +136,7 @@ def user_id(user_url: str) -> str:

@staticmethod
def guest_token() -> str:
"""Generate a guest token to authorize twitter api requests"""
headers = {
"authorization": (
"Bearer "
Expand Down

0 comments on commit 4f4c693

Please sign in to comment.