diff --git a/lib/yt_dlp/YoutubeDL.py b/lib/yt_dlp/YoutubeDL.py index 1feed3052..f322b12a2 100644 --- a/lib/yt_dlp/YoutubeDL.py +++ b/lib/yt_dlp/YoutubeDL.py @@ -60,7 +60,7 @@ get_postprocessor, ) from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping -from .update import REPOSITORY, current_git_head, detect_variant +from .update import REPOSITORY, _get_system_deprecation, current_git_head, detect_variant from .utils import ( DEFAULT_OUTTMPL, IDENTITY, @@ -239,9 +239,9 @@ class YoutubeDL: 'selected' (check selected formats), or None (check only if requested by extractor) paths: Dictionary of output paths. The allowed keys are 'home' - 'temp' and the keys of OUTTMPL_TYPES (in utils.py) + 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py) outtmpl: Dictionary of templates for output names. Allowed keys - are 'default' and the keys of OUTTMPL_TYPES (in utils.py). + are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py). For compatibility with youtube-dl, a single string can also be used outtmpl_na_placeholder: Placeholder for unavailable meta fields. restrictfilenames: Do not allow "&" and spaces in file names @@ -422,7 +422,7 @@ class YoutubeDL: asked whether to download the video. - Raise utils.DownloadCancelled(msg) to abort remaining downloads when a video is rejected. - match_filter_func in utils.py is one example for this. + match_filter_func in utils/_utils.py is one example for this. color: A Dictionary with output stream names as keys and their respective color policy as values. Can also just be a single color policy, @@ -640,17 +640,9 @@ def process_color_policy(stream): for name, stream in self._out_files.items_ if name != 'console' }) - # The code is left like this to be reused for future deprecations - MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7) - current_version = sys.version_info[:2] - if current_version < MIN_RECOMMENDED: - msg = ('Support for Python version %d.%d has been deprecated. ' - 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.' - '\n You will no longer receive updates on this version') - if current_version < MIN_SUPPORTED: - msg = 'Python version %d.%d is no longer supported' - self.deprecated_feature( - f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED)) + system_deprecation = _get_system_deprecation() + if system_deprecation: + self.deprecated_feature(system_deprecation.replace('\n', '\n ')) if self.params.get('allow_unplayable_formats'): self.report_warning( diff --git a/lib/yt_dlp/compat/__init__.py b/lib/yt_dlp/compat/__init__.py index 832a9138d..5ad5c70ec 100644 --- a/lib/yt_dlp/compat/__init__.py +++ b/lib/yt_dlp/compat/__init__.py @@ -30,7 +30,7 @@ def compat_etree_fromstring(text): if compat_os_name == 'nt': def compat_shlex_quote(s): import re - return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') + return s if re.match(r'^[-_\w./]+$', s) else s.replace('"', '""').join('""') else: from shlex import quote as compat_shlex_quote # noqa: F401 diff --git a/lib/yt_dlp/compat/urllib/__init__.py b/lib/yt_dlp/compat/urllib/__init__.py index b27cc6133..9084b3c2b 100644 --- a/lib/yt_dlp/compat/urllib/__init__.py +++ b/lib/yt_dlp/compat/urllib/__init__.py @@ -1,7 +1,7 @@ # flake8: noqa: F405 from urllib import * # noqa: F403 -del request +del request # noqa: F821 from . import request # noqa: F401 from ..compat_utils import passthrough_module diff --git a/lib/yt_dlp/extractor/_extractors.py b/lib/yt_dlp/extractor/_extractors.py index 9cda06d8f..908abb8ac 100644 --- a/lib/yt_dlp/extractor/_extractors.py +++ b/lib/yt_dlp/extractor/_extractors.py @@ -137,10 +137,6 @@ ArteTVCategoryIE, ) from .arnes import ArnesIE -from .asiancrush import ( - AsianCrushIE, - AsianCrushPlaylistIE, -) from .atresplayer import AtresPlayerIE from .atscaleconf import AtScaleConfEventIE from .atttechchannel import ATTTechChannelIE @@ -275,6 +271,10 @@ BrightcoveLegacyIE, BrightcoveNewIE, ) +from .brilliantpala import ( + BrilliantpalaElearnIE, + BrilliantpalaClassesIE, +) from .businessinsider import BusinessInsiderIE from .bundesliga import BundesligaIE from .buzzfeed import BuzzFeedIE @@ -358,6 +358,10 @@ from .cinchcast import CinchcastIE from .cinemax import CinemaxIE from .cinetecamilano import CinetecaMilanoIE +from .cineverse import ( + CineverseIE, + CineverseDetailsIE, +) from .ciscolive import ( CiscoLiveSessionIE, CiscoLiveSearchIE, @@ -1126,6 +1130,7 @@ MofosexEmbedIE, ) from .mojvideo import MojvideoIE +from .monstercat import MonstercatIE from .morningstar import MorningstarIE from .motherless import ( MotherlessIE, @@ -1298,6 +1303,11 @@ NineCNineMediaIE, CPTwentyFourIE, ) +from .niconicochannelplus import ( + NiconicoChannelPlusIE, + NiconicoChannelPlusChannelVideosIE, + NiconicoChannelPlusChannelLivesIE, +) from .ninegag import NineGagIE from .ninenow import NineNowIE from .nintendo import NintendoIE @@ -1451,6 +1461,7 @@ from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .piapro import PiaproIE +from .piaulizaportal import PIAULIZAPortalIE from .picarto import ( PicartoIE, PicartoVodIE, diff --git a/lib/yt_dlp/extractor/abc.py b/lib/yt_dlp/extractor/abc.py index f56133eb3..d2cf5f7c5 100644 --- a/lib/yt_dlp/extractor/abc.py +++ b/lib/yt_dlp/extractor/abc.py @@ -180,7 +180,6 @@ class ABCIViewIE(InfoExtractor): _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P[^/?#]+)' _GEO_COUNTRIES = ['AU'] - # ABC iview programs are normally available for 14 days only. _TESTS = [{ 'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00', 'md5': '67715ce3c78426b11ba167d875ac6abf', diff --git a/lib/yt_dlp/extractor/arte.py b/lib/yt_dlp/extractor/arte.py index e3cc5afb0..a19cd2a3a 100644 --- a/lib/yt_dlp/extractor/arte.py +++ b/lib/yt_dlp/extractor/arte.py @@ -169,7 +169,7 @@ def _real_extract(self, url): ))) short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?') - if stream['protocol'].startswith('HLS'): + if 'HLS' in stream['protocol']: fmts, subs = self._extract_m3u8_formats_and_subtitles( stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False) for fmt in fmts: diff --git a/lib/yt_dlp/extractor/asiancrush.py b/lib/yt_dlp/extractor/asiancrush.py deleted file mode 100644 index 23f310edb..000000000 --- a/lib/yt_dlp/extractor/asiancrush.py +++ /dev/null @@ -1,196 +0,0 @@ -import functools -import re - -from .common import InfoExtractor -from .kaltura import KalturaIE -from ..utils import ( - extract_attributes, - int_or_none, - OnDemandPagedList, - parse_age_limit, - strip_or_none, - try_get, -) - - -class AsianCrushBaseIE(InfoExtractor): - _VALID_URL_BASE = r'https?://(?:www\.)?(?P(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))' - _KALTURA_KEYS = [ - 'video_url', 'progressive_url', 'download_url', 'thumbnail_url', - 'widescreen_thumbnail_url', 'screencap_widescreen', - ] - _API_SUFFIX = {'retrocrush.tv': '-ott'} - - def _call_api(self, host, endpoint, video_id, query, resource): - return self._download_json( - 'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id, - 'Downloading %s JSON metadata' % resource, query=query, - headers=self.geo_verification_headers())['objects'] - - def _download_object_data(self, host, object_id, resource): - return self._call_api( - host, 'search', object_id, {'id': object_id}, resource)[0] - - def _get_object_description(self, obj): - return strip_or_none(obj.get('long_description') or obj.get('short_description')) - - def _parse_video_data(self, video): - title = video['name'] - - entry_id, partner_id = [None] * 2 - for k in self._KALTURA_KEYS: - k_url = video.get(k) - if k_url: - mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url) - if mobj: - partner_id, entry_id = mobj.groups() - break - - meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or [] - categories = list(filter(None, [c.get('name') for c in meta_categories])) - - show_info = video.get('show_info') or {} - - return { - '_type': 'url_transparent', - 'url': 'kaltura:%s:%s' % (partner_id, entry_id), - 'ie_key': KalturaIE.ie_key(), - 'id': entry_id, - 'title': title, - 'description': self._get_object_description(video), - 'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')), - 'categories': categories, - 'series': show_info.get('show_name'), - 'season_number': int_or_none(show_info.get('season_num')), - 'season_id': show_info.get('season_id'), - 'episode_number': int_or_none(show_info.get('episode_num')), - } - - -class AsianCrushIE(AsianCrushBaseIE): - _VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE - _TESTS = [{ - 'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt', - 'md5': 'c3b740e48d0ba002a42c0b72857beae6', - 'info_dict': { - 'id': '1_y4tmjm5r', - 'ext': 'mp4', - 'title': 'Women Who Flirt', - 'description': 'md5:b65c7e0ae03a85585476a62a186f924c', - 'timestamp': 1496936429, - 'upload_date': '20170608', - 'uploader_id': 'craig@crifkin.com', - 'age_limit': 13, - 'categories': 'count:5', - 'duration': 5812, - }, - }, { - 'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/', - 'only_matching': True, - }, { - 'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/', - 'only_matching': True, - }, { - 'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/', - 'only_matching': True, - }, { - 'url': 'https://www.midnightpulp.com/video/010400v/drifters/', - 'only_matching': True, - }, { - 'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/', - 'only_matching': True, - }, { - 'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/', - 'only_matching': True, - }, { - 'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears', - 'only_matching': True, - }] - - def _real_extract(self, url): - host, video_id = self._match_valid_url(url).groups() - - if host == 'cocoro.tv': - webpage = self._download_webpage(url, video_id) - embed_vars = self._parse_json(self._search_regex( - r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars', - default='{}'), video_id, fatal=False) or {} - video_id = embed_vars.get('entry_id') or video_id - - video = self._download_object_data(host, video_id, 'video') - return self._parse_video_data(video) - - -class AsianCrushPlaylistIE(AsianCrushBaseIE): - _VALID_URL = r'%s/series/0+(?P\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE - _TESTS = [{ - 'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai', - 'info_dict': { - 'id': '6447', - 'title': 'Fruity Samurai', - 'description': 'md5:7535174487e4a202d3872a7fc8f2f154', - }, - 'playlist_count': 13, - }, { - 'url': 'https://www.yuyutv.com/series/013920s/peep-show/', - 'only_matching': True, - }, { - 'url': 'https://www.midnightpulp.com/series/016375s/mononoke/', - 'only_matching': True, - }, { - 'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/', - 'only_matching': True, - }, { - 'url': 'https://www.retrocrush.tv/series/012355s/true-tears', - 'only_matching': True, - }] - _PAGE_SIZE = 1000000000 - - def _fetch_page(self, domain, parent_id, page): - videos = self._call_api( - domain, 'getreferencedobjects', parent_id, { - 'max': self._PAGE_SIZE, - 'object_type': 'video', - 'parent_id': parent_id, - 'start': page * self._PAGE_SIZE, - }, 'page %d' % (page + 1)) - for video in videos: - yield self._parse_video_data(video) - - def _real_extract(self, url): - host, playlist_id = self._match_valid_url(url).groups() - - if host == 'cocoro.tv': - webpage = self._download_webpage(url, playlist_id) - - entries = [] - - for mobj in re.finditer( - r']+href=(["\'])(?P%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL, - webpage): - attrs = extract_attributes(mobj.group(0)) - if attrs.get('class') == 'clearfix': - entries.append(self.url_result( - mobj.group('url'), ie=AsianCrushIE.ie_key())) - - title = self._html_search_regex( - r'(?s)]\bid=["\']movieTitle[^>]+>(.+?)', webpage, - 'title', default=None) or self._og_search_title( - webpage, default=None) or self._html_search_meta( - 'twitter:title', webpage, 'title', - default=None) or self._html_extract_title(webpage) - if title: - title = re.sub(r'\s*\|\s*.+?$', '', title) - - description = self._og_search_description( - webpage, default=None) or self._html_search_meta( - 'twitter:description', webpage, 'description', fatal=False) - else: - show = self._download_object_data(host, playlist_id, 'show') - title = show.get('name') - description = self._get_object_description(show) - entries = OnDemandPagedList( - functools.partial(self._fetch_page, host, playlist_id), - self._PAGE_SIZE) - - return self.playlist_result(entries, playlist_id, title, description) diff --git a/lib/yt_dlp/extractor/brilliantpala.py b/lib/yt_dlp/extractor/brilliantpala.py new file mode 100644 index 000000000..6fd5b8148 --- /dev/null +++ b/lib/yt_dlp/extractor/brilliantpala.py @@ -0,0 +1,127 @@ +import hashlib + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + traverse_obj, + urlencode_postdata, +) + + +class BrilliantpalaBaseIE(InfoExtractor): + _NETRC_MACHINE = 'brilliantpala' + _DOMAIN = '{subdomain}.brilliantpala.org' + + def _initialize_pre_login(self): + self._HOMEPAGE = f'https://{self._DOMAIN}' + self._LOGIN_API = f'{self._HOMEPAGE}/login/' + self._LOGOUT_DEVICES_API = f'{self._HOMEPAGE}/logout_devices/?next=/' + self._CONTENT_API = f'{self._HOMEPAGE}/api/v2.4/contents/{{content_id}}/' + self._HLS_AES_URI = f'{self._HOMEPAGE}/api/v2.5/video_contents/{{content_id}}/key/' + + def _get_logged_in_username(self, url, video_id): + webpage, urlh = self._download_webpage_handle(url, video_id) + if self._LOGIN_API == urlh.url: + self.raise_login_required() + return self._html_search_regex( + r'"username"\s*:\s*"(?P[^"]+)"', webpage, 'stream page info', 'username') + + def _perform_login(self, username, password): + login_form = self._hidden_inputs(self._download_webpage( + self._LOGIN_API, None, 'Downloading login page')) + login_form.update({ + 'username': username, + 'password': password, + }) + self._set_cookie(self._DOMAIN, 'csrftoken', login_form['csrfmiddlewaretoken']) + + logged_page = self._download_webpage( + self._LOGIN_API, None, note='Logging in', headers={'Referer': self._LOGIN_API}, + data=urlencode_postdata(login_form)) + + if self._html_search_regex( + r'(Your username / email and password)', logged_page, 'auth fail', default=None): + raise ExtractorError('wrong username or password', expected=True) + + # the maximum number of logins is one + if self._html_search_regex( + r'(Logout Other Devices)', logged_page, 'logout devices button', default=None): + logout_device_form = self._hidden_inputs(logged_page) + self._download_webpage( + self._LOGOUT_DEVICES_API, None, headers={'Referer': self._LOGIN_API}, + note='Logging out other devices', data=urlencode_postdata(logout_device_form)) + + def _real_extract(self, url): + course_id, content_id = self._match_valid_url(url).group('course_id', 'content_id') + video_id = f'{course_id}-{content_id}' + + username = self._get_logged_in_username(url, video_id) + + content_json = self._download_json( + self._CONTENT_API.format(content_id=content_id), video_id, + note='Fetching content info', errnote='Unable to fetch content info') + + entries = [] + for stream in traverse_obj(content_json, ('video', 'streams', lambda _, v: v['id'] and v['url'])): + formats = self._extract_m3u8_formats(stream['url'], video_id, fatal=False) + if not formats: + continue + entries.append({ + 'id': str(stream['id']), + 'title': content_json.get('title'), + 'formats': formats, + 'hls_aes': {'uri': self._HLS_AES_URI.format(content_id=content_id)}, + 'http_headers': {'X-Key': hashlib.sha256(username.encode('ascii')).hexdigest()}, + 'thumbnail': content_json.get('cover_image'), + }) + + return self.playlist_result( + entries, playlist_id=video_id, playlist_title=content_json.get('title')) + + +class BrilliantpalaElearnIE(BrilliantpalaBaseIE): + IE_NAME = 'Brilliantpala:Elearn' + IE_DESC = 'VoD on elearn.brilliantpala.org' + _VALID_URL = r'https?://elearn\.brilliantpala\.org/courses/(?P\d+)/contents/(?P\d+)/?' + _TESTS = [{ + 'url': 'https://elearn.brilliantpala.org/courses/42/contents/12345/', + 'only_matching': True, + }, { + 'url': 'https://elearn.brilliantpala.org/courses/98/contents/36683/', + 'info_dict': { + 'id': '23577', + 'ext': 'mp4', + 'title': 'Physical World, Units and Measurements - 1', + 'thumbnail': 'https://d1j3vi2u94ebt0.cloudfront.net/institute/brilliantpalalms/chapter_contents/26237/e657f81b90874be19795c7ea081f8d5c.png', + 'live_status': 'not_live', + }, + 'params': { + 'skip_download': True, + }, + }] + + _DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='elearn') + + +class BrilliantpalaClassesIE(BrilliantpalaBaseIE): + IE_NAME = 'Brilliantpala:Classes' + IE_DESC = 'VoD on classes.brilliantpala.org' + _VALID_URL = r'https?://classes\.brilliantpala\.org/courses/(?P\d+)/contents/(?P\d+)/?' + _TESTS = [{ + 'url': 'https://classes.brilliantpala.org/courses/42/contents/12345/', + 'only_matching': True, + }, { + 'url': 'https://classes.brilliantpala.org/courses/416/contents/25445/', + 'info_dict': { + 'id': '9128', + 'ext': 'mp4', + 'title': 'Motion in a Straight Line - Class 1', + 'thumbnail': 'https://d3e4y8hquds3ek.cloudfront.net/institute/brilliantpalaelearn/chapter_contents/ff5ba838d0ec43419f67387fe1a01fa8.png', + 'live_status': 'not_live', + }, + 'params': { + 'skip_download': True, + }, + }] + + _DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='classes') diff --git a/lib/yt_dlp/extractor/cineverse.py b/lib/yt_dlp/extractor/cineverse.py new file mode 100644 index 000000000..c9fa789b7 --- /dev/null +++ b/lib/yt_dlp/extractor/cineverse.py @@ -0,0 +1,136 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + filter_dict, + int_or_none, + parse_age_limit, + smuggle_url, + traverse_obj, + unsmuggle_url, + url_or_none, +) + + +class CineverseBaseIE(InfoExtractor): + _VALID_URL_BASE = r'https://www\.(?P%s)' % '|'.join(map(re.escape, ( + 'cineverse.com', + 'asiancrush.com', + 'dovechannel.com', + 'screambox.com', + 'midnightpulp.com', + 'fandor.com', + 'retrocrush.tv', + ))) + + +class CineverseIE(CineverseBaseIE): + _VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/watch/(?P[A-Z0-9]+)' + _TESTS = [{ + 'url': 'https://www.asiancrush.com/watch/DMR00018919/Women-Who-Flirt', + 'skip': 'geo-blocked', + 'info_dict': { + 'title': 'Women Who Flirt', + 'ext': 'mp4', + 'id': 'DMR00018919', + 'modified_timestamp': 1678744575289, + 'cast': ['Xun Zhou', 'Xiaoming Huang', 'Yi-Lin Sie', 'Sonia Sui', 'Quniciren'], + 'duration': 5811.597, + 'description': 'md5:892fd62a05611d394141e8394ace0bc6', + 'age_limit': 13, + } + }, { + 'url': 'https://www.retrocrush.tv/watch/1000000023016/Archenemy! Crystal Bowie', + 'skip': 'geo-blocked', + 'info_dict': { + 'title': 'Archenemy! Crystal Bowie', + 'ext': 'mp4', + 'id': '1000000023016', + 'episode_number': 3, + 'season_number': 1, + 'cast': ['Nachi Nozawa', 'Yoshiko Sakakibara', 'Toshiko Fujita'], + 'age_limit': 0, + 'episode': 'Episode 3', + 'season': 'Season 1', + 'duration': 1485.067, + 'description': 'Cobra meets a beautiful bounty hunter by the name of Jane Royal.', + 'series': 'Space Adventure COBRA (Original Japanese)', + } + }] + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, default={}) + self._initialize_geo_bypass({ + 'countries': smuggled_data.get('geo_countries'), + }) + video_id = self._match_id(url) + html = self._download_webpage(url, video_id) + idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails'] + + if idetails.get('err_code') == 1200: + self.raise_geo_restricted( + 'This video is not available from your location due to geo restriction. ' + 'You may be able to bypass it by using the /details/ page instead of the /watch/ page', + countries=smuggled_data.get('geo_countries')) + + return { + 'subtitles': filter_dict({ + 'en': traverse_obj(idetails, (('cc_url_vtt', 'subtitle_url'), {'url': {url_or_none}})) or None, + }), + 'formats': self._extract_m3u8_formats(idetails['url'], video_id), + **traverse_obj(idetails, { + 'title': 'title', + 'id': ('details', 'item_id'), + 'description': ('details', 'description'), + 'duration': ('duration', {lambda x: x / 1000}), + 'cast': ('details', 'cast', {lambda x: x.split(', ')}), + 'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}), + 'season_number': ('details', 'season', {int_or_none}), + 'episode_number': ('details', 'episode', {int_or_none}), + 'age_limit': ('details', 'rating_code', {parse_age_limit}), + 'series': ('details', 'series_details', 'title'), + }), + } + + +class CineverseDetailsIE(CineverseBaseIE): + _VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/details/(?P[A-Z0-9]+)' + _TESTS = [{ + 'url': 'https://www.retrocrush.tv/details/1000000023012/Space-Adventure-COBRA-(Original-Japanese)', + 'playlist_mincount': 30, + 'info_dict': { + 'title': 'Space Adventure COBRA (Original Japanese)', + 'id': '1000000023012', + } + }, { + 'url': 'https://www.asiancrush.com/details/NNVG4938/Hansel-and-Gretel', + 'info_dict': { + 'id': 'NNVG4938', + 'ext': 'mp4', + 'title': 'Hansel and Gretel', + 'description': 'md5:e3e4c35309c2e82aee044f972c2fb05d', + 'cast': ['Jeong-myeong Cheon', 'Eun Won-jae', 'Shim Eun-gyeong', 'Ji-hee Jin', 'Hee-soon Park', 'Lydia Park', 'Kyeong-ik Kim'], + 'duration': 7030.732, + }, + }] + + def _real_extract(self, url): + host, series_id = self._match_valid_url(url).group('host', 'id') + html = self._download_webpage(url, series_id) + pageprops = self._search_nextjs_data(html, series_id)['props']['pageProps'] + + geo_countries = traverse_obj(pageprops, ('itemDetailsData', 'geo_country', {lambda x: x.split(', ')})) + geoblocked = traverse_obj(pageprops, ( + 'itemDetailsData', 'playback_err_msg')) == 'This title is not available in your location.' + + def item_result(item): + item_url = f'https://www.{host}/watch/{item["item_id"]}/{item["title"]}' + if geoblocked: + item_url = smuggle_url(item_url, {'geo_countries': geo_countries}) + return self.url_result(item_url, CineverseIE) + + season = traverse_obj(pageprops, ('seasonEpisodes', ..., 'episodes', lambda _, v: v['item_id'] and v['title'])) + if season: + return self.playlist_result([item_result(ep) for ep in season], playlist_id=series_id, + playlist_title=traverse_obj(pageprops, ('itemDetailsData', 'title'))) + return item_result(pageprops['itemDetailsData']) diff --git a/lib/yt_dlp/extractor/ign.py b/lib/yt_dlp/extractor/ign.py index 64875f8ce..1c4f105e9 100644 --- a/lib/yt_dlp/extractor/ign.py +++ b/lib/yt_dlp/extractor/ign.py @@ -197,10 +197,6 @@ class IGNVideoIE(IGNBaseIE): 'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg', 'duration': 298, 'tags': 'count:13', - 'display_id': '112203', - 'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg', - 'duration': 298, - 'tags': 'count:13', }, 'expected_warnings': ['HTTP Error 400: Bad Request'], }, { diff --git a/lib/yt_dlp/extractor/monstercat.py b/lib/yt_dlp/extractor/monstercat.py new file mode 100644 index 000000000..7f04825fc --- /dev/null +++ b/lib/yt_dlp/extractor/monstercat.py @@ -0,0 +1,79 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + extract_attributes, + get_element_by_class, + get_element_html_by_class, + get_element_text_and_html_by_tag, + int_or_none, + unified_strdate, + strip_or_none, + traverse_obj, + try_call, +) + + +class MonstercatIE(InfoExtractor): + _VALID_URL = r'https://www\.monstercat\.com/release/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.monstercat.com/release/742779548009', + 'playlist_count': 20, + 'info_dict': { + 'title': 'The Secret Language of Trees', + 'id': '742779548009', + 'thumbnail': 'https://www.monstercat.com/release/742779548009/cover', + 'release_year': 2023, + 'release_date': '20230711', + 'album': 'The Secret Language of Trees', + 'album_artist': 'BT', + } + }] + + def _extract_tracks(self, table, album_meta): + for td in re.findall(r'((?:(?!)[\w\W])+)', table): # regex by chatgpt due to lack of get_elements_by_tag + title = clean_html(try_call( + lambda: get_element_by_class('d-inline-flex flex-column', td).partition(' ]+id="[^"]*video-config-[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}[^"]*"[^>]*>\s*({.+});?\s*' _ANVATO_PREFIX = 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:' + _CLIENT_DATA = { + 'clientKey': '4cFUW6DmwJpzT9L7LrG3qRAcABG5s04g', + 'clientSecret': 'CZuvCL49d9OwfGsR', + 'deviceId': str(uuid.uuid4()), + 'deviceInfo': base64.b64encode(json.dumps({ + 'model': 'desktop', + 'version': 'Chrome', + 'osName': 'Windows', + 'osVersion': '10.0', + }, separators=(',', ':')).encode()).decode(), + 'networkType': 'other', + 'nflClaimGroupsToAdd': [], + 'nflClaimGroupsToRemove': [], + } + _ACCOUNT_INFO = {} + _API_KEY = None + + _TOKEN = None + _TOKEN_EXPIRY = 0 + + def _get_account_info(self, url, slug): + if not self._API_KEY: + webpage = self._download_webpage(url, slug, fatal=False) or '' + self._API_KEY = self._search_regex( + r'window\.gigyaApiKey\s*=\s*["\'](\w+)["\'];', webpage, 'API key', + fatal=False) or '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f' + + cookies = self._get_cookies('https://auth-id.nfl.com/') + login_token = traverse_obj(cookies, ( + (f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False) + if not login_token: + self.raise_login_required() + if 'ucid' not in cookies: + raise ExtractorError( + 'Required cookies for the auth-id.nfl.com domain were not found among passed cookies. ' + 'If using --cookies, these cookies must be exported along with .nfl.com cookies, ' + 'or else try using --cookies-from-browser instead', expected=True) + + account = self._download_json( + 'https://auth-id.nfl.com/accounts.getAccountInfo', slug, + note='Downloading account info', data=urlencode_postdata({ + 'include': 'profile,data', + 'lang': 'en', + 'APIKey': self._API_KEY, + 'sdk': 'js_latest', + 'login_token': login_token, + 'authMode': 'cookie', + 'pageURL': url, + 'sdkBuild': traverse_obj(cookies, ( + 'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'), + 'format': 'json', + }), headers={'Content-Type': 'application/x-www-form-urlencoded'}) + + self._ACCOUNT_INFO = traverse_obj(account, { + 'signatureTimestamp': 'signatureTimestamp', + 'uid': 'UID', + 'uidSignature': 'UIDSignature', + }) + + if len(self._ACCOUNT_INFO) != 3: + raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True) + + def _get_auth_token(self, url, slug): + if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30): + return + + if not self._ACCOUNT_INFO: + self._get_account_info(url, slug) + + token = self._download_json( + 'https://api.nfl.com/identity/v3/token%s' % ( + '/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''), + slug, headers={'Content-Type': 'application/json'}, note='Downloading access token', + data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode()) + + self._TOKEN = token['accessToken'] + self._TOKEN_EXPIRY = token['expiresIn'] + self._ACCOUNT_INFO['refreshToken'] = token['refreshToken'] + def _parse_video_config(self, video_config, display_id): video_config = self._parse_json(video_config, display_id) item = video_config['playlist'][0] @@ -168,7 +247,7 @@ def _real_extract(self, url): class NFLPlusReplayIE(NFLBaseIE): IE_NAME = 'nfl.com:plus:replay' - _VALID_URL = r'https?://(?:www\.)?nfl.com/plus/games/[\w-]+/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?nfl.com/plus/games/(?P[\w-]+)(?:/(?P\d+))?' _TESTS = [{ 'url': 'https://www.nfl.com/plus/games/giants-at-vikings-2022-post-1/1572108', 'info_dict': { @@ -185,23 +264,92 @@ class NFLPlusReplayIE(NFLBaseIE): 'thumbnail': r're:^https?://.*\.jpg', }, 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'Subscription required', + 'url': 'https://www.nfl.com/plus/games/giants-at-vikings-2022-post-1', + 'playlist_count': 4, + 'info_dict': { + 'id': 'giants-at-vikings-2022-post-1', + }, + }, { + 'note': 'Subscription required', + 'url': 'https://www.nfl.com/plus/games/giants-at-patriots-2011-pre-4', + 'playlist_count': 2, + 'info_dict': { + 'id': 'giants-at-patriots-2011-pre-4', + }, + }, { + 'note': 'Subscription required', + 'url': 'https://www.nfl.com/plus/games/giants-at-patriots-2011-pre-4', + 'info_dict': { + 'id': '950701', + 'ext': 'mp4', + 'title': 'Giants @ Patriots', + 'description': 'Giants at Patriots on September 01, 2011', + 'uploader': 'NFL', + 'upload_date': '20210724', + 'timestamp': 1627085874, + 'duration': 1532, + 'categories': ['Game Highlights'], + 'tags': ['play-by-play'], + 'thumbnail': r're:^https?://.*\.jpg', + }, + 'params': { + 'skip_download': 'm3u8', + 'extractor_args': {'nflplusreplay': {'type': ['condensed_game']}}, + }, }] + _REPLAY_TYPES = { + 'full_game': 'Full Game', + 'full_game_spanish': 'Full Game - Spanish', + 'condensed_game': 'Condensed Game', + 'all_22': 'All-22', + } + def _real_extract(self, url): - video_id = self._match_id(url) - return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) + slug, video_id = self._match_valid_url(url).group('slug', 'id') + requested_types = self._configuration_arg('type', ['all']) + if 'all' in requested_types: + requested_types = list(self._REPLAY_TYPES.keys()) + requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types)) + + if not video_id: + self._get_auth_token(url, slug) + headers = {'Authorization': f'Bearer {self._TOKEN}'} + game_id = self._download_json( + f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug, + 'Downloading game ID', query={'withExternalIds': 'true'}, headers=headers)['id'] + replays = self._download_json( + 'https://api.nfl.com/content/v1/videos/replays', slug, 'Downloading replays JSON', + query={'gameId': game_id}, headers=headers) + if len(requested_types) == 1: + video_id = traverse_obj(replays, ( + 'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False) + + if video_id: + return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) + + def entries(): + for replay in traverse_obj( + replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types) + ): + video_id = replay['mcpPlaybackId'] + yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) + + return self.playlist_result(entries(), slug) class NFLPlusEpisodeIE(NFLBaseIE): IE_NAME = 'nfl.com:plus:episode' _VALID_URL = r'https?://(?:www\.)?nfl.com/plus/episodes/(?P[\w-]+)' _TESTS = [{ - 'note': 'premium content', + 'note': 'Subscription required', 'url': 'https://www.nfl.com/plus/episodes/kurt-s-qb-insider-conference-championships', 'info_dict': { 'id': '1576832', 'ext': 'mp4', - 'title': 'Kurt\'s QB Insider: Conference Championships', + 'title': 'Conference Championships', 'description': 'md5:944f7fab56f7a37430bf8473f5473857', 'uploader': 'NFL', 'upload_date': '20230127', @@ -214,85 +362,9 @@ class NFLPlusEpisodeIE(NFLBaseIE): 'params': {'skip_download': 'm3u8'}, }] - _CLIENT_DATA = { - 'clientKey': '4cFUW6DmwJpzT9L7LrG3qRAcABG5s04g', - 'clientSecret': 'CZuvCL49d9OwfGsR', - 'deviceId': str(uuid.uuid4()), - 'deviceInfo': base64.b64encode(json.dumps({ - 'model': 'desktop', - 'version': 'Chrome', - 'osName': 'Windows', - 'osVersion': '10.0', - }, separators=(',', ':')).encode()).decode(), - 'networkType': 'other', - 'nflClaimGroupsToAdd': [], - 'nflClaimGroupsToRemove': [], - } - _ACCOUNT_INFO = {} - _API_KEY = None - - _TOKEN = None - _TOKEN_EXPIRY = 0 - - def _get_account_info(self, url, video_id): - cookies = self._get_cookies('https://www.nfl.com/') - login_token = traverse_obj(cookies, ( - (f'glt_{self._API_KEY}', f'gig_loginToken_{self._API_KEY}', - lambda k, _: k.startswith('glt_') or k.startswith('gig_loginToken_')), - {lambda x: x.value}), get_all=False) - if not login_token: - self.raise_login_required() - - account = self._download_json( - 'https://auth-id.nfl.com/accounts.getAccountInfo', video_id, - note='Downloading account info', data=urlencode_postdata({ - 'include': 'profile,data', - 'lang': 'en', - 'APIKey': self._API_KEY, - 'sdk': 'js_latest', - 'login_token': login_token, - 'authMode': 'cookie', - 'pageURL': url, - 'sdkBuild': traverse_obj(cookies, ( - 'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='13642'), - 'format': 'json', - }), headers={'Content-Type': 'application/x-www-form-urlencoded'}) - - self._ACCOUNT_INFO = traverse_obj(account, { - 'signatureTimestamp': 'signatureTimestamp', - 'uid': 'UID', - 'uidSignature': 'UIDSignature', - }) - - if len(self._ACCOUNT_INFO) != 3: - raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True) - - def _get_auth_token(self, url, video_id): - if not self._ACCOUNT_INFO: - self._get_account_info(url, video_id) - - token = self._download_json( - 'https://api.nfl.com/identity/v3/token%s' % ( - '/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''), - video_id, headers={'Content-Type': 'application/json'}, note='Downloading access token', - data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode()) - - self._TOKEN = token['accessToken'] - self._TOKEN_EXPIRY = token['expiresIn'] - self._ACCOUNT_INFO['refreshToken'] = token['refreshToken'] - def _real_extract(self, url): slug = self._match_id(url) - - if not self._API_KEY: - webpage = self._download_webpage(url, slug, fatal=False) or '' - self._API_KEY = self._search_regex( - r'window\.gigyaApiKey=["\'](\w+)["\'];', webpage, 'API key', - default='3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f') - - if not self._TOKEN or self._TOKEN_EXPIRY <= int(time.time()): - self._get_auth_token(url, slug) - + self._get_auth_token(url, slug) video_id = self._download_json( f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={ 'Authorization': f'Bearer {self._TOKEN}', diff --git a/lib/yt_dlp/extractor/niconicochannelplus.py b/lib/yt_dlp/extractor/niconicochannelplus.py new file mode 100644 index 000000000..89af3f7b5 --- /dev/null +++ b/lib/yt_dlp/extractor/niconicochannelplus.py @@ -0,0 +1,426 @@ +import functools +import json + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + OnDemandPagedList, + filter_dict, + int_or_none, + parse_qs, + str_or_none, + traverse_obj, + unified_timestamp, + url_or_none, +) + + +class NiconicoChannelPlusBaseIE(InfoExtractor): + _WEBPAGE_BASE_URL = 'https://nicochannel.jp' + + def _call_api(self, path, item_id, *args, **kwargs): + return self._download_json( + f'https://nfc-api.nicochannel.jp/fc/{path}', video_id=item_id, *args, **kwargs) + + def _find_fanclub_site_id(self, channel_name): + fanclub_list_json = self._call_api( + 'content_providers/channels', item_id=f'channels/{channel_name}', + note='Fetching channel list', errnote='Unable to fetch channel list', + )['data']['content_providers'] + fanclub_id = traverse_obj(fanclub_list_json, ( + lambda _, v: v['domain'] == f'{self._WEBPAGE_BASE_URL}/{channel_name}', 'id'), + get_all=False) + if not fanclub_id: + raise ExtractorError(f'Channel {channel_name} does not exist', expected=True) + return fanclub_id + + def _get_channel_base_info(self, fanclub_site_id): + return traverse_obj(self._call_api( + f'fanclub_sites/{fanclub_site_id}/page_base_info', item_id=f'fanclub_sites/{fanclub_site_id}', + note='Fetching channel base info', errnote='Unable to fetch channel base info', fatal=False, + ), ('data', 'fanclub_site', {dict})) or {} + + def _get_channel_user_info(self, fanclub_site_id): + return traverse_obj(self._call_api( + f'fanclub_sites/{fanclub_site_id}/user_info', item_id=f'fanclub_sites/{fanclub_site_id}', + note='Fetching channel user info', errnote='Unable to fetch channel user info', fatal=False, + data=json.dumps('null').encode('ascii'), + ), ('data', 'fanclub_site', {dict})) or {} + + +class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): + IE_NAME = 'NiconicoChannelPlus' + IE_DESC = 'ニコニコチャンネルプラス' + _VALID_URL = r'https?://nicochannel\.jp/(?P[\w.-]+)/(?:video|live)/(?Psm\w+)' + _TESTS = [{ + 'url': 'https://nicochannel.jp/kaorin/video/smsDd8EdFLcVZk9yyAhD6H7H', + 'info_dict': { + 'id': 'smsDd8EdFLcVZk9yyAhD6H7H', + 'title': '前田佳織里はニコ生がしたい!', + 'ext': 'mp4', + 'channel': '前田佳織里の世界攻略計画', + 'channel_id': 'kaorin', + 'channel_url': 'https://nicochannel.jp/kaorin', + 'live_status': 'not_live', + 'thumbnail': 'https://nicochannel.jp/public_html/contents/video_pages/74/thumbnail_path', + 'description': '2021年11月に放送された\n「前田佳織里はニコ生がしたい!」アーカイブになります。', + 'timestamp': 1641360276, + 'duration': 4097, + 'comment_count': int, + 'view_count': int, + 'tags': [], + 'upload_date': '20220105', + }, + 'params': { + 'skip_download': True, + }, + }, { + # age limited video; test purpose channel. + 'url': 'https://nicochannel.jp/testman/video/smDXbcrtyPNxLx9jc4BW69Ve', + 'info_dict': { + 'id': 'smDXbcrtyPNxLx9jc4BW69Ve', + 'title': 'test oshiro', + 'ext': 'mp4', + 'channel': '本番チャンネルプラステストマン', + 'channel_id': 'testman', + 'channel_url': 'https://nicochannel.jp/testman', + 'age_limit': 18, + 'live_status': 'was_live', + 'timestamp': 1666344616, + 'duration': 86465, + 'comment_count': int, + 'view_count': int, + 'tags': [], + 'upload_date': '20221021', + }, + 'params': { + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + content_code, channel_id = self._match_valid_url(url).group('code', 'channel') + fanclub_site_id = self._find_fanclub_site_id(channel_id) + + data_json = self._call_api( + f'video_pages/{content_code}', item_id=content_code, headers={'fc_use_device': 'null'}, + note='Fetching video page info', errnote='Unable to fetch video page info', + )['data']['video_page'] + + live_status, session_id = self._get_live_status_and_session_id(content_code, data_json) + + release_timestamp_str = data_json.get('live_scheduled_start_at') + + formats = [] + + if live_status == 'is_upcoming': + if release_timestamp_str: + msg = f'This live event will begin at {release_timestamp_str} UTC' + else: + msg = 'This event has not started yet' + self.raise_no_formats(msg, expected=True, video_id=content_code) + else: + formats = self._extract_m3u8_formats( + # "authenticated_url" is a format string that contains "{session_id}". + m3u8_url=data_json['video_stream']['authenticated_url'].format(session_id=session_id), + video_id=content_code) + + return { + 'id': content_code, + 'formats': formats, + '_format_sort_fields': ('tbr', 'vcodec', 'acodec'), + 'channel': self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name'), + 'channel_id': channel_id, + 'channel_url': f'{self._WEBPAGE_BASE_URL}/{channel_id}', + 'age_limit': traverse_obj(self._get_channel_user_info(fanclub_site_id), ('content_provider', 'age_limit')), + 'live_status': live_status, + 'release_timestamp': unified_timestamp(release_timestamp_str), + **traverse_obj(data_json, { + 'title': ('title', {str}), + 'thumbnail': ('thumbnail_url', {url_or_none}), + 'description': ('description', {str}), + 'timestamp': ('released_at', {unified_timestamp}), + 'duration': ('active_video_filename', 'length', {int_or_none}), + 'comment_count': ('video_aggregate_info', 'number_of_comments', {int_or_none}), + 'view_count': ('video_aggregate_info', 'total_views', {int_or_none}), + 'tags': ('video_tags', ..., 'tag', {str}), + }), + '__post_extractor': self.extract_comments( + content_code=content_code, + comment_group_id=traverse_obj(data_json, ('video_comment_setting', 'comment_group_id'))), + } + + def _get_comments(self, content_code, comment_group_id): + item_id = f'{content_code}/comments' + + if not comment_group_id: + return None + + comment_access_token = self._call_api( + f'video_pages/{content_code}/comments_user_token', item_id, + note='Getting comment token', errnote='Unable to get comment token', + )['data']['access_token'] + + comment_list = self._download_json( + 'https://comm-api.sheeta.com/messages.history', video_id=item_id, + note='Fetching comments', errnote='Unable to fetch comments', + headers={'Content-Type': 'application/json'}, + query={ + 'sort_direction': 'asc', + 'limit': int_or_none(self._configuration_arg('max_comments', [''])[0]) or 120, + }, + data=json.dumps({ + 'token': comment_access_token, + 'group_id': comment_group_id, + }).encode('ascii')) + + for comment in traverse_obj(comment_list, ...): + yield traverse_obj(comment, { + 'author': ('nickname', {str}), + 'author_id': ('sender_id', {str_or_none}), + 'id': ('id', {str_or_none}), + 'text': ('message', {str}), + 'timestamp': (('updated_at', 'sent_at', 'created_at'), {unified_timestamp}), + 'author_is_uploader': ('sender_id', {lambda x: x == '-1'}), + }, get_all=False) + + def _get_live_status_and_session_id(self, content_code, data_json): + video_type = data_json.get('type') + live_finished_at = data_json.get('live_finished_at') + + payload = {} + if video_type == 'vod': + if live_finished_at: + live_status = 'was_live' + else: + live_status = 'not_live' + elif video_type == 'live': + if not data_json.get('live_started_at'): + return 'is_upcoming', '' + + if not live_finished_at: + live_status = 'is_live' + else: + live_status = 'was_live' + payload = {'broadcast_type': 'dvr'} + + video_allow_dvr_flg = traverse_obj(data_json, ('video', 'allow_dvr_flg')) + video_convert_to_vod_flg = traverse_obj(data_json, ('video', 'convert_to_vod_flg')) + + self.write_debug(f'allow_dvr_flg = {video_allow_dvr_flg}, convert_to_vod_flg = {video_convert_to_vod_flg}.') + + if not (video_allow_dvr_flg and video_convert_to_vod_flg): + raise ExtractorError( + 'Live was ended, there is no video for download.', video_id=content_code, expected=True) + else: + raise ExtractorError(f'Unknown type: {video_type}', video_id=content_code, expected=False) + + self.write_debug(f'{content_code}: video_type={video_type}, live_status={live_status}') + + session_id = self._call_api( + f'video_pages/{content_code}/session_ids', item_id=f'{content_code}/session', + data=json.dumps(payload).encode('ascii'), headers={ + 'Content-Type': 'application/json', + 'fc_use_device': 'null', + 'origin': 'https://nicochannel.jp', + }, + note='Getting session id', errnote='Unable to get session id', + )['data']['session_id'] + + return live_status, session_id + + +class NiconicoChannelPlusChannelBaseIE(NiconicoChannelPlusBaseIE): + _PAGE_SIZE = 12 + + def _fetch_paged_channel_video_list(self, path, query, channel_name, item_id, page): + response = self._call_api( + path, item_id, query={ + **query, + 'page': (page + 1), + 'per_page': self._PAGE_SIZE, + }, + headers={'fc_use_device': 'null'}, + note=f'Getting channel info (page {page + 1})', + errnote=f'Unable to get channel info (page {page + 1})') + + for content_code in traverse_obj(response, ('data', 'video_pages', 'list', ..., 'content_code')): + # "video/{content_code}" works for both VOD and live, but "live/{content_code}" doesn't work for VOD + yield self.url_result( + f'{self._WEBPAGE_BASE_URL}/{channel_name}/video/{content_code}', NiconicoChannelPlusIE) + + +class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE): + IE_NAME = 'NiconicoChannelPlus:channel:videos' + IE_DESC = 'ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos' + _VALID_URL = r'https?://nicochannel\.jp/(?P[a-z\d\._-]+)/videos(?:\?.*)?' + _TESTS = [{ + # query: None + 'url': 'https://nicochannel.jp/testman/videos', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 18, + }, { + # query: None + 'url': 'https://nicochannel.jp/testtarou/videos', + 'info_dict': { + 'id': 'testtarou-videos', + 'title': 'チャンネルプラステスト太郎-videos', + }, + 'playlist_mincount': 2, + }, { + # query: None + 'url': 'https://nicochannel.jp/testjirou/videos', + 'info_dict': { + 'id': 'testjirou-videos', + 'title': 'チャンネルプラステスト二郎-videos', + }, + 'playlist_mincount': 12, + }, { + # query: tag + 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 6, + }, { + # query: vodType + 'url': 'https://nicochannel.jp/testman/videos?vodType=1', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 18, + }, { + # query: sort + 'url': 'https://nicochannel.jp/testman/videos?sort=-released_at', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 18, + }, { + # query: tag, vodType + 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 6, + }, { + # query: tag, sort + 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&sort=-released_at', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 6, + }, { + # query: vodType, sort + 'url': 'https://nicochannel.jp/testman/videos?vodType=1&sort=-released_at', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 18, + }, { + # query: tag, vodType, sort + 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1&sort=-released_at', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 6, + }] + + def _real_extract(self, url): + """ + API parameters: + sort: + -released_at 公開日が新しい順 (newest to oldest) + released_at 公開日が古い順 (oldest to newest) + -number_of_vod_views 再生数が多い順 (most play count) + number_of_vod_views コメントが多い順 (most comments) + vod_type (is "vodType" in "url"): + 0 すべて (all) + 1 会員限定 (members only) + 2 一部無料 (partially free) + 3 レンタル (rental) + 4 生放送アーカイブ (live archives) + 5 アップロード動画 (uploaded videos) + """ + + channel_id = self._match_id(url) + fanclub_site_id = self._find_fanclub_site_id(channel_id) + channel_name = self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name') + qs = parse_qs(url) + + return self.playlist_result( + OnDemandPagedList( + functools.partial( + self._fetch_paged_channel_video_list, f'fanclub_sites/{fanclub_site_id}/video_pages', + filter_dict({ + 'tag': traverse_obj(qs, ('tag', 0)), + 'sort': traverse_obj(qs, ('sort', 0), default='-released_at'), + 'vod_type': traverse_obj(qs, ('vodType', 0), default='0'), + }), + channel_id, f'{channel_id}/videos'), + self._PAGE_SIZE), + playlist_id=f'{channel_id}-videos', playlist_title=f'{channel_name}-videos') + + +class NiconicoChannelPlusChannelLivesIE(NiconicoChannelPlusChannelBaseIE): + IE_NAME = 'NiconicoChannelPlus:channel:lives' + IE_DESC = 'ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives' + _VALID_URL = r'https?://nicochannel\.jp/(?P[a-z\d\._-]+)/lives' + _TESTS = [{ + 'url': 'https://nicochannel.jp/testman/lives', + 'info_dict': { + 'id': 'testman-lives', + 'title': '本番チャンネルプラステストマン-lives', + }, + 'playlist_mincount': 18, + }, { + 'url': 'https://nicochannel.jp/testtarou/lives', + 'info_dict': { + 'id': 'testtarou-lives', + 'title': 'チャンネルプラステスト太郎-lives', + }, + 'playlist_mincount': 2, + }, { + 'url': 'https://nicochannel.jp/testjirou/lives', + 'info_dict': { + 'id': 'testjirou-lives', + 'title': 'チャンネルプラステスト二郎-lives', + }, + 'playlist_mincount': 6, + }] + + def _real_extract(self, url): + """ + API parameters: + live_type: + 1 放送中 (on air) + 2 放送予定 (scheduled live streams, oldest to newest) + 3 過去の放送 - すべて (all ended live streams, newest to oldest) + 4 過去の放送 - 生放送アーカイブ (all archives for live streams, oldest to newest) + We use "4" instead of "3" because some recently ended live streams could not be downloaded. + """ + + channel_id = self._match_id(url) + fanclub_site_id = self._find_fanclub_site_id(channel_id) + channel_name = self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name') + + return self.playlist_result( + OnDemandPagedList( + functools.partial( + self._fetch_paged_channel_video_list, f'fanclub_sites/{fanclub_site_id}/live_pages', + { + 'live_type': 4, + }, + channel_id, f'{channel_id}/lives'), + self._PAGE_SIZE), + playlist_id=f'{channel_id}-lives', playlist_title=f'{channel_name}-lives') diff --git a/lib/yt_dlp/extractor/peekvids.py b/lib/yt_dlp/extractor/peekvids.py index d1fc058b9..41f591b09 100644 --- a/lib/yt_dlp/extractor/peekvids.py +++ b/lib/yt_dlp/extractor/peekvids.py @@ -146,7 +146,6 @@ class PlayVidsIE(PeekVidsBaseIE): 'uploader': 'Brazzers', 'age_limit': 18, 'view_count': int, - 'age_limit': 18, 'categories': list, 'tags': list, }, diff --git a/lib/yt_dlp/extractor/piaulizaportal.py b/lib/yt_dlp/extractor/piaulizaportal.py new file mode 100644 index 000000000..1eb6d92b7 --- /dev/null +++ b/lib/yt_dlp/extractor/piaulizaportal.py @@ -0,0 +1,70 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + parse_qs, + time_seconds, + traverse_obj, +) + + +class PIAULIZAPortalIE(InfoExtractor): + IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM' + _VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' + _TESTS = [{ + 'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44', + 'info_dict': { + 'id': '005f18b7-e810-5618-cb82-0987c5755d44', + 'title': 'プレゼンテーションプレイヤーのサンプル', + 'live_status': 'not_live', + }, + 'params': { + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + }, { + 'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1', + 'info_dict': { + 'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d', + 'title': '【確認用】視聴サンプルページ(ULIZA)', + 'live_status': 'not_live', + }, + 'params': { + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0))) + if expires and expires <= time_seconds(): + raise ExtractorError('The link is expired.', video_id=video_id, expected=True) + + webpage = self._download_webpage(url, video_id) + + player_data = self._download_webpage( + self._search_regex( + r'