[YouTube] Support @owner format in uploader_id etc

* implement https://github.com/ytdl-org/youtube-dl/issues/31530#issuecomment-1435734719
* update affected tests
* misc clean-ups
This commit is contained in:
dirkf 2023-02-24 02:48:37 +00:00
parent e67e52a8f8
commit f7ce98a21e
1 changed files with 194 additions and 125 deletions

View File

@ -31,6 +31,7 @@ from ..utils import (
get_element_by_attribute, get_element_by_attribute,
int_or_none, int_or_none,
js_to_json, js_to_json,
merge_dicts,
mimetype2ext, mimetype2ext,
parse_codecs, parse_codecs,
parse_duration, parse_duration,
@ -400,6 +401,62 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
break break
data['continuation'] = token data['continuation'] = token
@staticmethod
def _owner_endpoints_path():
return [
Ellipsis,
lambda k, _: k.endswith('SecondaryInfoRenderer'),
('owner', 'videoOwner'), 'videoOwnerRenderer', 'title',
'runs', Ellipsis]
def _extract_channel_id(self, webpage, videodetails={}, metadata={}, renderers=[]):
channel_id = None
if any((videodetails, metadata, renderers)):
channel_id = (
traverse_obj(videodetails, 'channelId')
or traverse_obj(metadata, 'externalChannelId', 'externalId')
or traverse_obj(renderers,
self._owner_endpoints_path() + [
'navigationEndpoint', 'browseEndpoint', 'browseId'],
get_all=False)
)
return channel_id or self._html_search_meta(
'channelId', webpage, 'channel id', default=None)
def _extract_author_var(self, webpage, var_name,
videodetails={}, metadata={}, renderers=[]):
result = None
paths = {
# (HTML, videodetails, metadata, renderers)
'name': ('content', 'author', (('ownerChannelName', None), 'title'), ['text']),
'url': ('href', 'ownerProfileUrl', 'vanityChannelUrl',
['navigationEndpoint', 'browseEndpoint', 'canonicalBaseUrl'])
}
if any((videodetails, metadata, renderers)):
result = (
traverse_obj(videodetails, paths[var_name][1], get_all=False)
or traverse_obj(metadata, paths[var_name][2], get_all=False)
or traverse_obj(renderers,
self._owner_endpoints_path() + paths[var_name][3],
get_all=False)
)
return result or traverse_obj(
extract_attributes(self._search_regex(
r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')%s\2[^>]*>)'''
% re.escape(var_name),
get_element_by_attribute('itemprop', 'author', webpage) or '',
'author link', default='')),
paths[var_name][0])
@staticmethod
def _yt_urljoin(url_or_path):
return urljoin('https://www.youtube.com', url_or_path)
def _extract_uploader_id(self, uploader_url):
return self._search_regex(
r'/(?:(?:channel|user)/|(?=@))([^/?&#]+)', uploader_url or '',
'uploader id', default=None)
class YoutubeIE(YoutubeBaseInfoExtractor): class YoutubeIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com' IE_DESC = 'YouTube.com'
@ -516,8 +573,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'youtube-dl test video "\'/\\ä↭𝕐', 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
'uploader': 'Philipp Hagemeister', 'uploader': 'Philipp Hagemeister',
'uploader_id': 'phihag', 'uploader_id': '@PhilippHagemeister',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@PhilippHagemeister',
'channel': 'Philipp Hagemeister', 'channel': 'Philipp Hagemeister',
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q', 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q', 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
@ -557,8 +614,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'youtube-dl test video "\'/\\ä↭𝕐', 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
'uploader': 'Philipp Hagemeister', 'uploader': 'Philipp Hagemeister',
'uploader_id': 'phihag', 'uploader_id': '@PhilippHagemeister',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@PhilippHagemeister',
'upload_date': '20121002', 'upload_date': '20121002',
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
'categories': ['Science & Technology'], 'categories': ['Science & Technology'],
@ -600,7 +657,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:8f5e2b82460520b619ccac1f509d43bf', 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
'duration': 244, 'duration': 244,
'uploader': 'AfrojackVEVO', 'uploader': 'AfrojackVEVO',
'uploader_id': 'AfrojackVEVO', 'uploader_id': '@AfrojackVEVO',
'upload_date': '20131011', 'upload_date': '20131011',
'abr': 129.495, 'abr': 129.495,
}, },
@ -618,8 +675,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'duration': 219, 'duration': 219,
'upload_date': '20100909', 'upload_date': '20100909',
'uploader': 'Amazing Atheist', 'uploader': 'Amazing Atheist',
'uploader_id': 'TheAmazingAtheist', 'uploader_id': '@theamazingatheist',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@theamazingatheist',
'title': 'Burning Everyone\'s Koran', 'title': 'Burning Everyone\'s Koran',
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html', 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
} }
@ -635,8 +692,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}', 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
'duration': 142, 'duration': 142,
'uploader': 'The Witcher', 'uploader': 'The Witcher',
'uploader_id': 'WitcherGame', 'uploader_id': '@thewitcher',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@thewitcher',
'upload_date': '20140605', 'upload_date': '20140605',
'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg', 'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
'age_limit': 18, 'age_limit': 18,
@ -659,7 +716,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:bf77e03fcae5529475e500129b05668a', 'description': 'md5:bf77e03fcae5529475e500129b05668a',
'duration': 177, 'duration': 177,
'uploader': 'FlyingKitty', 'uploader': 'FlyingKitty',
'uploader_id': 'FlyingKitty900', 'uploader_id': '@FlyingKitty900',
'upload_date': '20200408', 'upload_date': '20200408',
'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg', 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
'age_limit': 18, 'age_limit': 18,
@ -682,7 +739,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:17eccca93a786d51bc67646756894066', 'description': 'md5:17eccca93a786d51bc67646756894066',
'duration': 106, 'duration': 106,
'uploader': 'Projekt Melody', 'uploader': 'Projekt Melody',
'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ', 'uploader_id': '@ProjektMelody',
'upload_date': '20191227', 'upload_date': '20191227',
'age_limit': 18, 'age_limit': 18,
'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg', 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
@ -704,10 +761,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'OOMPH! - Such Mich Find Mich (Lyrics)', 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
'description': 'Fan Video. Music & Lyrics by OOMPH!.', 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
'duration': 210, 'duration': 210,
'uploader': 'Herr Lurik',
'uploader_id': 'st3in234',
'upload_date': '20130730', 'upload_date': '20130730',
'uploader_url': 'http://www.youtube.com/user/st3in234', 'uploader': 'Herr Lurik',
'uploader_id': '@HerrLurik',
'uploader_url': 'http://www.youtube.com/@HerrLurik',
'age_limit': 0, 'age_limit': 0,
'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/hqdefault.jpg', 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/hqdefault.jpg',
'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'], 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
@ -740,8 +797,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'duration': 266, 'duration': 266,
'upload_date': '20100430', 'upload_date': '20100430',
'uploader_id': 'deadmau5', 'uploader_id': '@deadmau5',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@deadmau5',
'creator': 'deadmau5', 'creator': 'deadmau5',
'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336', 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
'uploader': 'deadmau5', 'uploader': 'deadmau5',
@ -762,8 +819,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': r're:(?s)(?:.+\s)?HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games\s*', 'description': r're:(?s)(?:.+\s)?HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games\s*',
'duration': 6085, 'duration': 6085,
'upload_date': '20150827', 'upload_date': '20150827',
'uploader_id': 'olympic', 'uploader_id': '@Olympics',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@Olympics',
'uploader': r're:Olympics?', 'uploader': r're:Olympics?',
'age_limit': 0, 'age_limit': 0,
'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg', 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
@ -785,8 +842,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'stretched_ratio': 16 / 9., 'stretched_ratio': 16 / 9.,
'duration': 85, 'duration': 85,
'upload_date': '20110310', 'upload_date': '20110310',
'uploader_id': 'AllenMeow', 'uploader_id': '@AllenMeow',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@AllenMeow',
'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯', 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
'uploader': '孫ᄋᄅ', 'uploader': '孫ᄋᄅ',
'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人', 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
@ -905,6 +962,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Not multifeed any more',
}, },
{ {
# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536) # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
@ -938,8 +996,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a', 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
'duration': 133, 'duration': 133,
'upload_date': '20151119', 'upload_date': '20151119',
'uploader_id': 'IronSoulElf', 'uploader_id': '@IronSoulElf',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@IronSoulElf',
'uploader': 'IronSoulElf', 'uploader': 'IronSoulElf',
'creator': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan', 'creator': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
'track': 'Dark Walk', 'track': 'Dark Walk',
@ -987,8 +1045,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:a677553cf0840649b731a3024aeff4cc', 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
'duration': 721, 'duration': 721,
'upload_date': '20150127', 'upload_date': '20150127',
'uploader_id': 'BerkmanCenter', 'uploader_id': '@BKCHarvard',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@BKCHarvard',
'uploader': 'The Berkman Klein Center for Internet & Society', 'uploader': 'The Berkman Klein Center for Internet & Society',
'license': 'Creative Commons Attribution license (reuse allowed)', 'license': 'Creative Commons Attribution license (reuse allowed)',
}, },
@ -1007,8 +1065,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'duration': 4060, 'duration': 4060,
'upload_date': '20151119', 'upload_date': '20151119',
'uploader': 'Bernie Sanders', 'uploader': 'Bernie Sanders',
'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg', 'uploader_id': '@BernieSanders',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@BernieSanders',
'license': 'Creative Commons Attribution license (reuse allowed)', 'license': 'Creative Commons Attribution license (reuse allowed)',
}, },
'params': { 'params': {
@ -1054,8 +1112,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'duration': 2085, 'duration': 2085,
'upload_date': '20170118', 'upload_date': '20170118',
'uploader': 'Vsauce', 'uploader': 'Vsauce',
'uploader_id': 'Vsauce', 'uploader_id': '@Vsauce',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@Vsauce',
'series': 'Mind Field', 'series': 'Mind Field',
'season_number': 1, 'season_number': 1,
'episode_number': 1, 'episode_number': 1,
@ -1191,8 +1249,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'IMG 3456', 'title': 'IMG 3456',
'description': '', 'description': '',
'upload_date': '20170613', 'upload_date': '20170613',
'uploader_id': 'ElevageOrVert',
'uploader': 'ElevageOrVert', 'uploader': 'ElevageOrVert',
'uploader_id': '@ElevageOrVert',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -1210,8 +1268,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'Part 77 Sort a list of simple types in c#', 'title': 'Part 77 Sort a list of simple types in c#',
'description': 'md5:b8746fa52e10cdbf47997903f13b20dc', 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
'upload_date': '20130831', 'upload_date': '20130831',
'uploader_id': 'kudvenkat',
'uploader': 'kudvenkat', 'uploader': 'kudvenkat',
'uploader_id': '@Csharp-video-tutorialsBlogspot',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -1263,8 +1321,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'md5:ea770e474b7cd6722b4c95b833c03630', 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
'upload_date': '20201120', 'upload_date': '20201120',
'uploader': 'Walk around Japan', 'uploader': 'Walk around Japan',
'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw', 'uploader_id': '@walkaroundjapan7124',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@walkaroundjapan7124',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -1276,11 +1334,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'info_dict': { 'info_dict': {
'id': '4L2J27mJ3Dc', 'id': '4L2J27mJ3Dc',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Midwest Squid Game #Shorts',
'description': 'md5:976512b8a29269b93bbd8a61edc45a6d',
'upload_date': '20211025', 'upload_date': '20211025',
'uploader': 'Charlie Berens', 'uploader': 'Charlie Berens',
'description': 'md5:976512b8a29269b93bbd8a61edc45a6d', 'uploader_id': '@CharlieBerens',
'uploader_id': 'fivedlrmilkshake',
'title': 'Midwest Squid Game #Shorts',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -2088,25 +2146,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
thumbnails = [{'url': thumbnail}] thumbnails = [{'url': thumbnail}]
category = microformat.get('category') or search_meta('genre') category = microformat.get('category') or search_meta('genre')
channel_id = video_details.get('channelId') \ channel_id = self._extract_channel_id(
or microformat.get('externalChannelId') \ webpage, videodetails=video_details, metadata=microformat)
or search_meta('channelId')
duration = int_or_none( duration = int_or_none(
video_details.get('lengthSeconds') video_details.get('lengthSeconds')
or microformat.get('lengthSeconds')) \ or microformat.get('lengthSeconds')) \
or parse_duration(search_meta('duration')) or parse_duration(search_meta('duration'))
is_live = video_details.get('isLive') is_live = video_details.get('isLive')
def gen_owner_profile_url(): owner_profile_url = self._yt_urljoin(self._extract_author_var(
yield microformat.get('ownerProfileUrl') webpage, 'url', videodetails=video_details, metadata=microformat))
yield extract_attributes(self._search_regex(
r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')url\2[^>]*>)''',
get_element_by_attribute('itemprop', 'author', webpage),
'owner_profile_url', default='')).get('href')
owner_profile_url = next( uploader = self._extract_author_var(
(x for x in map(url_or_none, gen_owner_profile_url()) if x), webpage, 'name', videodetails=video_details, metadata=microformat)
None)
if not player_url: if not player_url:
player_url = self._extract_player_url(webpage) player_url = self._extract_player_url(webpage)
@ -2121,13 +2173,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': unified_strdate( 'upload_date': unified_strdate(
microformat.get('uploadDate') microformat.get('uploadDate')
or search_meta('uploadDate')), or search_meta('uploadDate')),
'uploader': video_details['author'], 'uploader': uploader,
'uploader_id': self._search_regex(
r'/(?:channel|user)/([^/?&#]+)', owner_profile_url,
'uploader id', fatal=False) if owner_profile_url else None,
'uploader_url': owner_profile_url,
'channel_id': channel_id, 'channel_id': channel_id,
'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
'duration': duration, 'duration': duration,
'view_count': int_or_none( 'view_count': int_or_none(
video_details.get('viewCount') video_details.get('viewCount')
@ -2257,6 +2304,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
initial_data, initial_data,
lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
list) or [] list) or []
if not info['channel_id']:
channel_id = self._extract_channel_id('', renderers=contents)
if not info['uploader']:
info['uploader'] = self._extract_author_var('', 'name', renderers=contents)
if not owner_profile_url:
owner_profile_url = self._yt_urljoin(self._extract_author_var('', 'url', renderers=contents))
for content in contents: for content in contents:
vpir = content.get('videoPrimaryInfoRenderer') vpir = content.get('videoPrimaryInfoRenderer')
if vpir: if vpir:
@ -2304,10 +2358,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}) })
vsir = content.get('videoSecondaryInfoRenderer') vsir = content.get('videoSecondaryInfoRenderer')
if vsir: if vsir:
info['channel'] = get_text(try_get(
vsir,
lambda x: x['owner']['videoOwnerRenderer']['title'],
dict))
rows = try_get( rows = try_get(
vsir, vsir,
lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'], lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
@ -2365,7 +2415,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.mark_watched(video_id, player_response) self.mark_watched(video_id, player_response)
return info return merge_dicts(
info, {
'uploader_id': self._extract_uploader_id(owner_profile_url),
'uploader_url': owner_profile_url,
'channel_id': channel_id,
'channel_url': channel_id and self._yt_urljoin('/channel/' + channel_id),
'channel': info['uploader'],
})
class YoutubeTabIE(YoutubeBaseInfoExtractor): class YoutubeTabIE(YoutubeBaseInfoExtractor):
@ -2394,6 +2451,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'description': 'Short clips from Super Cooper Sundays!', 'description': 'Short clips from Super Cooper Sundays!',
'id': 'UCKMA8kHZ8bPYpnMNaUSxfEQ', 'id': 'UCKMA8kHZ8bPYpnMNaUSxfEQ',
'title': 'Super Cooper Shorts - Shorts', 'title': 'Super Cooper Shorts - Shorts',
'uploader': 'Super Cooper Shorts',
'uploader_id': '@SuperCooperShorts',
} }
}, { }, {
# Channel that does not have a Shorts tab. Test should just download videos on Home tab instead # Channel that does not have a Shorts tab. Test should just download videos on Home tab instead
@ -2404,14 +2463,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'title': 'Emergency Awesome - Home', 'title': 'Emergency Awesome - Home',
}, },
'playlist_mincount': 5, 'playlist_mincount': 5,
'skip': 'new test page needed to replace `Emergency Awesome - Shorts`',
}, { }, {
# playlists, multipage # playlists, multipage
'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid', 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
'playlist_mincount': 94, 'playlist_mincount': 94,
'info_dict': { 'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg', 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Игорь Клейнер - Playlists', 'title': 'Igor Kleiner - Playlists',
'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
'uploader': 'Igor Kleiner',
'uploader_id': '@IgorDataScience',
}, },
}, { }, {
# playlists, multipage, different order # playlists, multipage, different order
@ -2419,8 +2481,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'playlist_mincount': 94, 'playlist_mincount': 94,
'info_dict': { 'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg', 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Игорь Клейнер - Playlists', 'title': 'Igor Kleiner - Playlists',
'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
'uploader': 'Igor Kleiner',
'uploader_id': '@IgorDataScience',
}, },
}, { }, {
# playlists, series # playlists, series
@ -2430,6 +2494,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'id': 'UCYO_jab_esuFRV4b17AJtAw', 'id': 'UCYO_jab_esuFRV4b17AJtAw',
'title': '3Blue1Brown - Playlists', 'title': '3Blue1Brown - Playlists',
'description': 'md5:e1384e8a133307dd10edee76e875d62f', 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
'uploader': '3Blue1Brown',
'uploader_id': '@3blue1brown',
}, },
}, { }, {
# playlists, singlepage # playlists, singlepage
@ -2439,6 +2505,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'id': 'UCAEtajcuhQ6an9WEzY9LEMQ', 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
'title': 'ThirstForScience - Playlists', 'title': 'ThirstForScience - Playlists',
'description': 'md5:609399d937ea957b0f53cbffb747a14c', 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
'uploader': 'ThirstForScience',
'uploader_id': '@ThirstForScience',
} }
}, { }, {
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
@ -2447,20 +2515,22 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
# basic, single video playlist # basic, single video playlist
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'info_dict': { 'info_dict': {
'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
'uploader': 'Sergey M.',
'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'title': 'youtube-dl public playlist', 'title': 'youtube-dl public playlist',
'uploader': 'Sergey M.',
'uploader_id': '@sergeym.6173',
'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
}, },
'playlist_count': 1, 'playlist_count': 1,
}, { }, {
# empty playlist # empty playlist
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf', 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
'info_dict': { 'info_dict': {
'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
'uploader': 'Sergey M.',
'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf', 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
'title': 'youtube-dl empty playlist', 'title': 'youtube-dl empty playlist',
'uploader': 'Sergey M.',
'uploader_id': '@sergeym.6173',
'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
}, },
'playlist_count': 0, 'playlist_count': 0,
}, { }, {
@ -2470,6 +2540,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Home', 'title': 'lex will - Home',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'uploader': 'lex will',
'uploader_id': '@lexwill718',
}, },
'playlist_mincount': 2, 'playlist_mincount': 2,
}, { }, {
@ -2479,6 +2551,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Videos', 'title': 'lex will - Videos',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'uploader': 'lex will',
'uploader_id': '@lexwill718',
}, },
'playlist_mincount': 975, 'playlist_mincount': 975,
}, { }, {
@ -2488,6 +2562,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Videos', 'title': 'lex will - Videos',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'uploader': 'lex will',
'uploader_id': '@lexwill718',
}, },
'playlist_mincount': 199, 'playlist_mincount': 199,
}, { }, {
@ -2497,6 +2573,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Playlists', 'title': 'lex will - Playlists',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'uploader': 'lex will',
'uploader_id': '@lexwill718',
}, },
'playlist_mincount': 17, 'playlist_mincount': 17,
}, { }, {
@ -2506,6 +2584,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Community', 'title': 'lex will - Community',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'uploader': 'lex will',
'uploader_id': '@lexwill718',
}, },
'playlist_mincount': 18, 'playlist_mincount': 18,
}, { }, {
@ -2515,8 +2595,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Channels', 'title': 'lex will - Channels',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'uploader': 'lex will',
'uploader_id': '@lexwill718',
}, },
'playlist_mincount': 138, 'playlist_mincount': 75,
}, { }, {
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA', 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'only_matching': True, 'only_matching': True,
@ -2533,7 +2615,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'title': '29C3: Not my department', 'title': '29C3: Not my department',
'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
'uploader': 'Christiaan008', 'uploader': 'Christiaan008',
'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg', 'uploader_id': '@ChRiStIaAn008',
'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
}, },
'playlist_count': 96, 'playlist_count': 96,
}, { }, {
@ -2543,7 +2626,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'title': 'Uploads from Cauchemar', 'title': 'Uploads from Cauchemar',
'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q', 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
'uploader': 'Cauchemar', 'uploader': 'Cauchemar',
'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q', 'uploader_id': '@Cauchemar89',
'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
}, },
'playlist_mincount': 1123, 'playlist_mincount': 1123,
}, { }, {
@ -2557,7 +2641,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'title': 'Uploads from Interstellar Movie', 'title': 'Uploads from Interstellar Movie',
'id': 'UUXw-G3eDE9trcvY2sBMM_aA', 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
'uploader': 'Interstellar Movie', 'uploader': 'Interstellar Movie',
'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA', 'uploader_id': '@InterstellarMovie',
'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
}, },
'playlist_mincount': 21, 'playlist_mincount': 21,
}, { }, {
@ -2566,8 +2651,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'info_dict': { 'info_dict': {
'title': 'Data Analysis with Dr Mike Pound', 'title': 'Data Analysis with Dr Mike Pound',
'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
'uploader': 'Computerphile', 'uploader': 'Computerphile',
'uploader_id': '@Computerphile',
'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
}, },
'playlist_mincount': 11, 'playlist_mincount': 11,
}, { }, {
@ -2605,14 +2691,14 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
}, { }, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
'info_dict': { 'info_dict': {
'id': '9Auq9mYxFEE', 'id': r're:[\da-zA-Z_-]{8,}',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Watch Sky News live', 'title': r're:(?s)[A-Z].{20,}',
'uploader': 'Sky News', 'uploader': 'Sky News',
'uploader_id': 'skynews', 'uploader_id': '@SkyNews',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@SkyNews',
'upload_date': '20191102', 'upload_date': r're:\d{8}',
'description': 'md5:78de4e1c2359d0ea3ed829678e38b662', 'description': r're:(?s)(?:.*\n)+SUBSCRIBE to our YouTube channel for more videos: http://www\.youtube\.com/skynews *\n.*',
'categories': ['News & Politics'], 'categories': ['News & Politics'],
'tags': list, 'tags': list,
'like_count': int, 'like_count': int,
@ -2701,34 +2787,22 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
}, { }, {
'note': 'Search tab', 'note': 'Search tab',
'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra', 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
'playlist_mincount': 40, 'playlist_mincount': 20,
'info_dict': { 'info_dict': {
'id': 'UCYO_jab_esuFRV4b17AJtAw', 'id': 'UCYO_jab_esuFRV4b17AJtAw',
'title': '3Blue1Brown - Search - linear algebra', 'title': '3Blue1Brown - Search - linear algebra',
'description': 'md5:e1384e8a133307dd10edee76e875d62f', 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
'uploader': '3Blue1Brown', 'uploader': '3Blue1Brown',
'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw', 'uploader_id': '@3blue1brown',
'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
} }
}] }]
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return False if YoutubeIE.suitable(url) else super( return not YoutubeIE.suitable(url) and super(
YoutubeTabIE, cls).suitable(url) YoutubeTabIE, cls).suitable(url)
def _extract_channel_id(self, webpage):
channel_id = self._html_search_meta(
'channelId', webpage, 'channel id', default=None)
if channel_id:
return channel_id
channel_url = self._html_search_meta(
('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
'twitter:app:url:googleplay'), webpage, 'channel url')
return self._search_regex(
r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
channel_url, 'channel id')
@staticmethod @staticmethod
def _extract_grid_item_renderer(item): def _extract_grid_item_renderer(item):
assert isinstance(item, dict) assert isinstance(item, dict)
@ -3116,27 +3190,18 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
else: else:
raise ExtractorError('Unable to find selected tab') raise ExtractorError('Unable to find selected tab')
@staticmethod def _extract_uploader(self, metadata, data):
def _extract_uploader(data):
uploader = {} uploader = {}
sidebar_renderer = try_get( renderers = traverse_obj(data,
data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) ('sidebar', 'playlistSidebarRenderer', 'items'))
if sidebar_renderer: uploader['channel_id'] = self._extract_channel_id('', metadata=metadata, renderers=renderers)
for item in sidebar_renderer: uploader['uploader'] = (
if not isinstance(item, dict): self._extract_author_var('', 'name', renderers=renderers)
continue or self._extract_author_var('', 'name', metadata=metadata))
renderer = item.get('playlistSidebarSecondaryInfoRenderer') uploader['uploader_url'] = self._yt_urljoin(
if not isinstance(renderer, dict): self._extract_author_var('', 'url', metadata=metadata, renderers=renderers))
continue uploader['uploader_id'] = self._extract_uploader_id(uploader['uploader_url'])
owner = try_get( uploader['channel'] = uploader['uploader']
renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
if owner:
uploader['uploader'] = owner.get('text')
uploader['uploader_id'] = try_get(
owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
uploader['uploader_url'] = urljoin(
'https://www.youtube.com/',
try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
return uploader return uploader
@staticmethod @staticmethod
@ -3187,8 +3252,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
self._entries(selected_tab, item_id, webpage), self._entries(selected_tab, item_id, webpage),
playlist_id=playlist_id, playlist_title=title, playlist_id=playlist_id, playlist_title=title,
playlist_description=description) playlist_description=description)
playlist.update(self._extract_uploader(data)) return merge_dicts(playlist, self._extract_uploader(renderer, data))
return playlist
def _extract_from_playlist(self, item_id, url, data, playlist): def _extract_from_playlist(self, item_id, url, data, playlist):
title = playlist.get('title') or try_get( title = playlist.get('title') or try_get(
@ -3275,8 +3339,9 @@ class YoutubePlaylistIE(InfoExtractor):
'info_dict': { 'info_dict': {
'title': '[OLD]Team Fortress 2 (Class-based LP)', 'title': '[OLD]Team Fortress 2 (Class-based LP)',
'id': 'PLBB231211A4F62143', 'id': 'PLBB231211A4F62143',
'uploader': 'Wickydoo', 'uploader': 'Wickman',
'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q', 'uploader_id': '@WickmanVT',
'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
}, },
'playlist_mincount': 29, 'playlist_mincount': 29,
}, { }, {
@ -3290,21 +3355,25 @@ class YoutubePlaylistIE(InfoExtractor):
}, { }, {
'note': 'embedded', 'note': 'embedded',
'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
'playlist_count': 4, # TODO: full playlist requires _reload_with_unavailable_videos()
# 'playlist_count': 4,
'playlist_mincount': 1,
'info_dict': { 'info_dict': {
'title': 'JODA15', 'title': 'JODA15',
'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
'uploader': 'milan', 'uploader': 'milan',
'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw', 'uploader_id': '@milan5503',
'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
} }
}, { }, {
'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
'playlist_mincount': 982, 'playlist_mincount': 455,
'info_dict': { 'info_dict': {
'title': '2018 Chinese New Singles (11/6 updated)', 'title': '2018 Chinese New Singles (11/6 updated)',
'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
'uploader': 'LBK', 'uploader': 'LBK',
'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA', 'uploader_id': '@music_king',
'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
} }
}, { }, {
'url': 'TLGGrESM50VT6acwMjAyMjAxNw', 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
@ -3342,8 +3411,8 @@ class YoutubeYtBeIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Small Scale Baler and Braiding Rugs', 'title': 'Small Scale Baler and Braiding Rugs',
'uploader': 'Backus-Page House Museum', 'uploader': 'Backus-Page House Museum',
'uploader_id': 'backuspagemuseum', 'uploader_id': '@backuspagemuseum',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
'upload_date': '20161008', 'upload_date': '20161008',
'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a', 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
'categories': ['Nonprofits & Activism'], 'categories': ['Nonprofits & Activism'],