diff --git a/lib/yt_dlp/extractor/xhamster.py b/lib/yt_dlp/extractor/xhamster.py index 37224799b..aec1f20bb 100644 --- a/lib/yt_dlp/extractor/xhamster.py +++ b/lib/yt_dlp/extractor/xhamster.py @@ -407,7 +407,7 @@ def _real_extract(self, url): class XHamsterUserIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P[^/?#&]+)' % XHamsterIE._DOMAINS + _VALID_URL = rf'https?://(?:[^/?#]+\.)?{XHamsterIE._DOMAINS}/(?:(?Pusers)|creators)/(?P[^/?#&]+)' _TESTS = [{ # Paginated user profile 'url': 'https://xhamster.com/users/netvideogirls/videos', @@ -422,6 +422,12 @@ class XHamsterUserIE(InfoExtractor): 'id': 'firatkaan', }, 'playlist_mincount': 1, + }, { + 'url': 'https://xhamster.com/creators/squirt-orgasm-69', + 'info_dict': { + 'id': 'squirt-orgasm-69', + }, + 'playlist_mincount': 150, }, { 'url': 'https://xhday.com/users/mobhunter', 'only_matching': True, @@ -430,8 +436,9 @@ class XHamsterUserIE(InfoExtractor): 'only_matching': True, }] - def _entries(self, user_id): - next_page_url = 'https://xhamster.com/users/%s/videos/1' % user_id + def _entries(self, user_id, is_user): + prefix, suffix = ('users', 'videos') if is_user else ('creators', 'exclusive') + next_page_url = f'https://xhamster.com/{prefix}/{user_id}/{suffix}/1' for pagenum in itertools.count(1): page = self._download_webpage( next_page_url, user_id, 'Downloading page %s' % pagenum) @@ -454,5 +461,5 @@ def _entries(self, user_id): break def _real_extract(self, url): - user_id = self._match_id(url) - return self.playlist_result(self._entries(user_id), user_id) + user, user_id = self._match_valid_url(url).group('user', 'id') + return self.playlist_result(self._entries(user_id, bool(user)), user_id) diff --git a/lib/yt_dlp/extractor/youtube.py b/lib/yt_dlp/extractor/youtube.py index a39d17cf1..7e13aa779 100644 --- a/lib/yt_dlp/extractor/youtube.py +++ b/lib/yt_dlp/extractor/youtube.py @@ -941,7 +941,13 @@ def _parse_time_text(self, text): def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None, ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None, default_client='web'): - for retry in self.RetryManager(): + raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE)) + # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal. + icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete)) + icd_rm = next(icd_retries) + main_retries = iter(self.RetryManager()) + main_rm = next(main_retries) + for _ in range(main_rm.retries + icd_rm.retries + 1): try: response = self._call_api( ep=ep, fatal=True, headers=headers, @@ -953,7 +959,8 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers if not isinstance(e.cause, network_exceptions): return self._error_or_warning(e, fatal=fatal) elif not isinstance(e.cause, HTTPError): - retry.error = e + main_rm.error = e + next(main_retries) continue first_bytes = e.cause.response.read(512) @@ -965,27 +972,32 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers if yt_error: self._report_alerts([('ERROR', yt_error)], fatal=False) # Downloading page may result in intermittent 5xx HTTP error - # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289 + # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289 # We also want to catch all other network exceptions since errors in later pages can be troublesome # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210 if e.cause.status not in (403, 429): - retry.error = e + main_rm.error = e + next(main_retries) continue return self._error_or_warning(e, fatal=fatal) try: self._extract_and_report_alerts(response, only_once=True) except ExtractorError as e: - # YouTube servers may return errors we want to retry on in a 200 OK response + # YouTube's servers may return errors we want to retry on in a 200 OK response # See: https://github.com/yt-dlp/yt-dlp/issues/839 if 'unknown error' in e.msg.lower(): - retry.error = e + main_rm.error = e + next(main_retries) continue return self._error_or_warning(e, fatal=fatal) # Youtube sometimes sends incomplete data # See: https://github.com/ytdl-org/youtube-dl/issues/28194 if not traverse_obj(response, *variadic(check_get_keys)): - retry.error = ExtractorError('Incomplete data received', expected=True) + icd_rm.error = ExtractorError('Incomplete data received', expected=True) + should_retry = next(icd_retries, None) + if not should_retry: + return None continue return response diff --git a/lib/yt_dlp_version b/lib/yt_dlp_version index 0ea97a3f1..3fd16ae00 100644 --- a/lib/yt_dlp_version +++ b/lib/yt_dlp_version @@ -1 +1 @@ -c54ddfba0f7d68034339426223d75373c5fc86df \ No newline at end of file +cc8d8441524ec3442d7c0d3f8f33f15b66aa06f3 \ No newline at end of file