Compare commits

...

6 Commits

Author SHA1 Message Date
Oliver Pfeiffer
91209fcb63
Merge 8934a232fb1e2491932fb627df3b4329f8318317 into 673277e510ebd996b62a2fcc76169bf3cce29910 2025-03-06 02:47:13 +00:00
dirkf
673277e510
[YouTube] Fix 91b1569 2025-02-28 01:02:20 +00:00
dirkf
91b1569f68
[YouTube] Fix channel playlist extraction (#33074)
* [YouTube] Extract playlist items from LOCKUP_VIEW_MODEL_...
* resolves #33073
* thx seproDev (yt-dlp/yt-dlp#11615)

Co-authored-by: sepro <sepro@sepr0.com>
2025-02-28 00:02:10 +00:00
Oliver Pfeiffer
8934a232fb [douyin] Incorporated dirkf's remarks part 2. Adjusted test data. 2022-04-20 23:05:08 +02:00
Oliver Pfeiffer
0ec08cef90 [douyin] Incorporated dirkf's remarks 2021-12-17 21:54:41 +01:00
Oliver Pfeiffer
d3725a6ab2 [douyin] Add new extractor 2021-12-01 21:50:40 +01:00
3 changed files with 148 additions and 2 deletions

View File

@ -0,0 +1,97 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
try_get,
orderedSet
)
class DouyinVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'https://www.douyin.com/video/6989098563519270181',
'md5': '99c5667992b8a8d46c145907f677c92b',
'info_dict': {
'id': '6989098563519270181',
'url': 'https://aweme.snssdk.com/aweme/v1/playwm/?video_id=v0300fg10000c3v47dbc77u9fvb20tbg&ratio=720p&line=0',
'title': '杨集#我的家乡 ',
'uploader': '🌹永恒的爱🌹',
'uploader_id': '104081949894',
'timestamp': 1627276320000.0,
'ext': 'mp4'
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
iteminfo = self._download_json('https://www.douyin.com/web/api/v2/aweme/iteminfo',
video_id, query={'item_ids': video_id}) or {}
status_code = iteminfo.get('status_code', 'status_code missing')
if status_code:
raise ExtractorError('%s (%s)' % (iteminfo.get('status_msg', 'status_msg missing'), status_code), video_id=video_id)
item_list = iteminfo.get('item_list')
if not item_list:
raise ExtractorError('The video you want to download does not exist any more',
video_id=video_id, expected=True)
item = item_list[0]
return {
'id': video_id,
'title': item['desc'],
'url': item['video']['play_addr']['url_list'][0],
'uploader': try_get(item, lambda x: x['author']['nickname'], compat_str),
'uploader_id': try_get(item, lambda x: x['author']['uid'], compat_str),
'duration': int_or_none(item.get('duration') or try_get(item, lambda x: x['video']['duration'], int), scale=1000),
'timestamp': int_or_none(item.get('create_time'), invscale=1000),
'width': try_get(item, lambda x: x['video']['width'], int),
'height': try_get(item, lambda x: x['video']['height'], int),
'vbr': try_get(item, lambda x: x['video']['bit_rate'], int),
'ext': 'mp4'
}
class DouyinUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?douyin\.com/user/(?P<id>[a-zA-Z0-9_\-]+)'
_TEST = {
'url': 'https://www.douyin.com/user/MS4wLjABAAAAP5Q7Z-SwleIzAACYIu-LrwGbEZzN2dc5PT3hGNSTkSM',
'info_dict': {
'id': 'MS4wLjABAAAAP5Q7Z-SwleIzAACYIu-LrwGbEZzN2dc5PT3hGNSTkSM'
},
'playlist_mincount': 31
}
def _real_extract(self, url):
sec_uid = self._match_id(url)
has_more = True
max_cursor = ''
entries = []
while has_more:
post = self._download_json('https://www.douyin.com/web/api/v2/aweme/post',
sec_uid, query={'sec_uid': sec_uid, 'max_cursor': max_cursor, 'count': 50}) or {}
status_code = post.get('status_code', 'status_code missing')
if status_code:
raise ExtractorError('%s (%s)' % (post.get('status_msg', 'status_msg missing'), status_code), video_id=sec_uid)
aweme_list = post.get('aweme_list')
if aweme_list is None:
raise ExtractorError('JSON response does not contain aweme_list', video_id=sec_uid)
entries.extend([self.url_result('https://www.douyin.com/video/%s' % aweme_id,
ie=DouyinVideoIE.ie_key(), video_id=aweme_id)
for aweme_id in filter(None,
(aweme.get('aweme_id') for aweme in aweme_list
if isinstance(aweme, dict)))])
has_more = post.get('has_more')
max_cursor = post.get('max_cursor')
return self.playlist_result(orderedSet(entries), sec_uid)

View File

@ -305,6 +305,10 @@ from .dfb import DFBIE
from .dhm import DHMIE from .dhm import DHMIE
from .digg import DiggIE from .digg import DiggIE
from .dotsub import DotsubIE from .dotsub import DotsubIE
from .douyin import (
DouyinVideoIE,
DouyinUserIE
)
from .douyutv import ( from .douyutv import (
DouyuShowIE, DouyuShowIE,
DouyuTVIE, DouyuTVIE,

View File

@ -27,6 +27,7 @@ from ..compat import (
) )
from ..jsinterp import JSInterpreter from ..jsinterp import JSInterpreter
from ..utils import ( from ..utils import (
bug_reports_message,
clean_html, clean_html,
dict_get, dict_get,
error_to_compat_str, error_to_compat_str,
@ -65,6 +66,7 @@ from ..utils import (
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
variadic,
) )
@ -460,6 +462,26 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'uploader': uploader, 'uploader': uploader,
} }
@staticmethod
def _extract_thumbnails(data, *path_list, **kw_final_key):
"""
Extract thumbnails from thumbnails dict
@param path_list: path list to level that contains 'thumbnails' key
"""
final_key = kw_final_key.get('final_key', 'thumbnails')
return traverse_obj(data, ((
tuple(variadic(path) + (final_key, Ellipsis)
for path in path_list or [()])), {
'url': ('url', T(url_or_none),
# Sometimes youtube gives a wrong thumbnail URL. See:
# https://github.com/yt-dlp/yt-dlp/issues/233
# https://github.com/ytdl-org/youtube-dl/issues/28023
T(lambda u: update_url(u, query=None) if u and 'maxresdefault' in u else u)),
'height': ('height', T(int_or_none)),
'width': ('width', T(int_or_none)),
}, T(lambda t: t if t.get('url') else None)))
def _search_results(self, query, params): def _search_results(self, query, params):
data = { data = {
'context': { 'context': {
@ -3183,8 +3205,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
expected_type=txt_or_none) expected_type=txt_or_none)
def _grid_entries(self, grid_renderer): def _grid_entries(self, grid_renderer):
for item in grid_renderer['items']: for item in traverse_obj(grid_renderer, ('items', Ellipsis, T(dict))):
if not isinstance(item, dict): lockup_view_model = traverse_obj(item, ('lockupViewModel', T(dict)))
if lockup_view_model:
entry = self._extract_lockup_view_model(lockup_view_model)
if entry:
yield entry
continue continue
renderer = self._extract_grid_item_renderer(item) renderer = self._extract_grid_item_renderer(item)
if not isinstance(renderer, dict): if not isinstance(renderer, dict):
@ -3268,6 +3294,25 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
continue continue
yield self._extract_video(renderer) yield self._extract_video(renderer)
def _extract_lockup_view_model(self, view_model):
content_id = view_model.get('contentId')
if not content_id:
return
content_type = view_model.get('contentType')
if content_type not in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'):
self.report_warning(
'Unsupported lockup view model content type "{0}"{1}'.format(content_type, bug_reports_message()), only_once=True)
return
return merge_dicts(self.url_result(
update_url_query('https://www.youtube.com/playlist', {'list': content_id}),
ie=YoutubeTabIE.ie_key(), video_id=content_id), {
'title': traverse_obj(view_model, (
'metadata', 'lockupMetadataViewModel', 'title', 'content', T(compat_str))),
'thumbnails': self._extract_thumbnails(view_model, (
'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail',
'thumbnailViewModel', 'image'), final_key='sources'),
})
def _video_entry(self, video_renderer): def _video_entry(self, video_renderer):
video_id = video_renderer.get('videoId') video_id = video_renderer.get('videoId')
if video_id: if video_id: