-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CI] auto update yt_dlp to upstream commit b532556d0a85e7d76f8f088086…
…1232fb706ddbc5
- Loading branch information
1 parent
d6d6ae2
commit 908bcee
Showing
3 changed files
with
136 additions
and
78 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,97 +1,155 @@ | ||
import re | ||
import json | ||
from datetime import date | ||
from urllib.parse import unquote | ||
|
||
from .common import InfoExtractor | ||
from ..utils import merge_dicts | ||
from ..compat import functools | ||
from ..utils import ExtractorError, make_archive_id, urljoin | ||
from ..utils.traversal import traverse_obj | ||
|
||
|
||
class Pr0grammStaticIE(InfoExtractor): | ||
# Possible urls: | ||
# https://pr0gramm.com/static/5466437 | ||
_VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)' | ||
_TEST = { | ||
'url': 'https://pr0gramm.com/static/5466437', | ||
'md5': '52fa540d70d3edc286846f8ca85938aa', | ||
class Pr0grammIE(InfoExtractor): | ||
_VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)' | ||
_TESTS = [{ | ||
# Tags require account | ||
'url': 'https://pr0gramm.com/new/video/5466437', | ||
'info_dict': { | ||
'id': '5466437', | ||
'ext': 'mp4', | ||
'title': 'pr0gramm-5466437 by g11st', | ||
'tags': ['Neon Genesis Evangelion', 'Touhou Project', 'Fly me to the Moon', 'Marisad', 'Marisa Kirisame', 'video', 'sound', 'Marisa', 'Anime'], | ||
'uploader': 'g11st', | ||
'uploader_id': 394718, | ||
'upload_timestamp': 1671590240, | ||
'upload_date': '20221221', | ||
} | ||
} | ||
|
||
def _real_extract(self, url): | ||
video_id = self._match_id(url) | ||
webpage = self._download_webpage(url, video_id) | ||
|
||
# Fetch media sources | ||
entries = self._parse_html5_media_entries(url, webpage, video_id) | ||
media_info = entries[0] | ||
|
||
# Fetch author | ||
uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader') | ||
'like_count': int, | ||
'dislike_count': int, | ||
'age_limit': 0, | ||
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg', | ||
}, | ||
}, { | ||
# Tags require account | ||
'url': 'https://pr0gramm.com/new/3052805:comment28391322', | ||
'info_dict': { | ||
'id': '3052805', | ||
'ext': 'mp4', | ||
'title': 'pr0gramm-3052805 by Hansking1', | ||
'tags': 'count:15', | ||
'uploader': 'Hansking1', | ||
'uploader_id': 385563, | ||
'upload_timestamp': 1552930408, | ||
'upload_date': '20190318', | ||
'like_count': int, | ||
'dislike_count': int, | ||
'age_limit': 0, | ||
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg', | ||
}, | ||
}, { | ||
# Requires verified account | ||
'url': 'https://pr0gramm.com/new/Gianna%20Michaels/5848332', | ||
'info_dict': { | ||
'id': '5848332', | ||
'ext': 'mp4', | ||
'title': 'pr0gramm-5848332 by erd0pfel', | ||
'tags': 'count:18', | ||
'uploader': 'erd0pfel', | ||
'uploader_id': 349094, | ||
'upload_timestamp': 1694489652, | ||
'upload_date': '20230912', | ||
'like_count': int, | ||
'dislike_count': int, | ||
'age_limit': 18, | ||
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg', | ||
}, | ||
}, { | ||
'url': 'https://pr0gramm.com/static/5466437', | ||
'only_matching': True, | ||
}, { | ||
'url': 'https://pr0gramm.com/new/rowan%20atkinson%20herr%20bohne/3052805', | ||
'only_matching': True, | ||
}, { | ||
'url': 'https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290', | ||
'only_matching': True, | ||
}] | ||
|
||
# Fetch approx upload timestamp from filename | ||
# Have None-defaults in case the extraction fails | ||
uploadDay = None | ||
uploadMon = None | ||
uploadYear = None | ||
uploadTimestr = None | ||
# (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4) | ||
m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage) | ||
BASE_URL = 'https://pr0gramm.com' | ||
|
||
if (m): | ||
# Up to a day of accuracy should suffice... | ||
uploadDay = m.groupdict().get('day') | ||
uploadMon = m.groupdict().get('mon') | ||
uploadYear = m.groupdict().get('year') | ||
uploadTimestr = uploadYear + uploadMon + uploadDay | ||
@functools.cached_property | ||
def _is_logged_in(self): | ||
return 'pp' in self._get_cookies(self.BASE_URL) | ||
|
||
return merge_dicts({ | ||
'id': video_id, | ||
'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''), | ||
'uploader': uploader, | ||
'upload_date': uploadTimestr | ||
}, media_info) | ||
@functools.cached_property | ||
def _maximum_flags(self): | ||
# We need to guess the flags for the content otherwise the api will raise an error | ||
# We can guess the maximum allowed flags for the account from the cookies | ||
# Bitflags are (msbf): nsfp, nsfl, nsfw, sfw | ||
flags = 0b0001 | ||
if self._is_logged_in: | ||
flags |= 0b1000 | ||
cookies = self._get_cookies(self.BASE_URL) | ||
if 'me' not in cookies: | ||
self._download_webpage(self.BASE_URL, None, 'Refreshing verification information') | ||
if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')): | ||
flags |= 0b0110 | ||
|
||
return flags | ||
|
||
# This extractor is for the primary url (used for sharing, and appears in the | ||
# location bar) Since this page loads the DOM via JS, yt-dl can't find any | ||
# video information here. So let's redirect to a compatibility version of | ||
# the site, which does contain the <video>-element by itself, without requiring | ||
# js to be ran. | ||
class Pr0grammIE(InfoExtractor): | ||
# Possible urls: | ||
# https://pr0gramm.com/new/546637 | ||
# https://pr0gramm.com/new/video/546637 | ||
# https://pr0gramm.com/top/546637 | ||
# https://pr0gramm.com/top/video/546637 | ||
# https://pr0gramm.com/user/g11st/uploads/5466437 | ||
# https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290 | ||
# https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030 | ||
# https://pr0gramm.com/user/froschler/1elf/5232030 | ||
# https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id! | ||
# https://pr0gramm.com/top/fruher war alles damals/5498175 | ||
def _call_api(self, endpoint, video_id, query={}, note='Downloading API json'): | ||
data = self._download_json( | ||
f'https://pr0gramm.com/api/items/{endpoint}', | ||
video_id, note, query=query, expected_status=403) | ||
|
||
_VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)' | ||
_TEST = { | ||
'url': 'https://pr0gramm.com/new/video/5466437', | ||
'info_dict': { | ||
'id': '5466437', | ||
'ext': 'mp4', | ||
'title': 'pr0gramm-5466437 by g11st', | ||
'uploader': 'g11st', | ||
'upload_date': '20221221', | ||
} | ||
} | ||
error = traverse_obj(data, ('error', {str})) | ||
if error in ('nsfwRequired', 'nsflRequired', 'nsfpRequired', 'verificationRequired'): | ||
if not self._is_logged_in: | ||
self.raise_login_required() | ||
raise ExtractorError(f'Unverified account cannot access NSFW/NSFL ({error})', expected=True) | ||
elif error: | ||
message = traverse_obj(data, ('msg', {str})) or error | ||
raise ExtractorError(f'API returned error: {message}', expected=True) | ||
|
||
def _generic_title(): | ||
return "oof" | ||
return data | ||
|
||
def _real_extract(self, url): | ||
video_id = self._match_id(url) | ||
video_info = traverse_obj( | ||
self._call_api('get', video_id, {'id': video_id, 'flags': self._maximum_flags}), | ||
('items', 0, {dict})) | ||
|
||
source = urljoin('https://img.pr0gramm.com', video_info.get('image')) | ||
if not source or not source.endswith('mp4'): | ||
self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id) | ||
|
||
return self.url_result( | ||
'https://pr0gramm.com/static/' + video_id, | ||
video_id=video_id, | ||
ie=Pr0grammStaticIE.ie_key()) | ||
tags = None | ||
if self._is_logged_in: | ||
metadata = self._call_api('info', video_id, {'itemId': video_id}) | ||
tags = traverse_obj(metadata, ('tags', ..., 'tag', {str})) | ||
# Sorted by "confidence", higher confidence = earlier in list | ||
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float}))) | ||
if confidences: | ||
tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)] | ||
|
||
return { | ||
'id': video_id, | ||
'title': f'pr0gramm-{video_id} by {video_info.get("user")}', | ||
'formats': [{ | ||
'url': source, | ||
'ext': 'mp4', | ||
**traverse_obj(video_info, { | ||
'width': ('width', {int}), | ||
'height': ('height', {int}), | ||
}), | ||
}], | ||
'tags': tags, | ||
'age_limit': 18 if traverse_obj(video_info, ('flags', {0b110.__and__})) else 0, | ||
'_old_archive_ids': [make_archive_id('Pr0grammStatic', video_id)], | ||
**traverse_obj(video_info, { | ||
'uploader': ('user', {str}), | ||
'uploader_id': ('userId', {int}), | ||
'like_count': ('up', {int}), | ||
'dislike_count': ('down', {int}), | ||
'upload_timestamp': ('created', {int}), | ||
'upload_date': ('created', {int}, {date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}), | ||
'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)}) | ||
}), | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
cf11b40ac40e3d23a6352753296f3a732886efb9 | ||
b532556d0a85e7d76f8f0880861232fb706ddbc5 |