2013-06-24 02:58:33 +09:00
# coding: utf-8
2014-09-13 14:51:06 +09:00
from __future__ import unicode_literals
2024-01-16 03:34:21 +09:00
import collections
2013-09-22 17:37:23 +09:00
import itertools
2013-06-24 02:58:33 +09:00
import json
2013-09-22 07:35:03 +09:00
import os . path
2016-03-01 04:01:33 +09:00
import random
2013-06-24 02:58:33 +09:00
import re
2013-09-21 21:19:30 +09:00
import traceback
2013-06-24 02:58:33 +09:00
2013-06-24 03:28:15 +09:00
from . common import InfoExtractor , SearchInfoExtractor
2014-12-11 18:08:17 +09:00
from . . compat import (
2013-09-22 17:30:02 +09:00
compat_chr ,
2020-12-29 04:11:48 +09:00
compat_HTTPError ,
2021-11-01 22:34:29 +09:00
compat_map as map ,
2021-02-01 22:30:59 +09:00
compat_str ,
2023-02-07 01:19:21 +09:00
compat_urllib_parse ,
compat_urllib_parse_parse_qs as compat_parse_qs ,
2015-07-18 02:51:57 +09:00
compat_urllib_parse_unquote_plus ,
2015-07-21 04:10:28 +09:00
compat_urllib_parse_urlparse ,
2023-04-12 01:36:27 +09:00
compat_zip as zip ,
2014-12-11 18:08:17 +09:00
)
2021-02-01 22:30:59 +09:00
from . . jsinterp import JSInterpreter
2014-12-11 18:08:17 +09:00
from . . utils import (
2021-02-01 22:30:59 +09:00
clean_html ,
2021-03-25 20:53:18 +09:00
dict_get ,
2022-02-02 08:22:57 +09:00
error_to_compat_str ,
2024-01-16 03:34:21 +09:00
ExtractorError ,
2015-02-12 02:39:31 +09:00
float_or_none ,
2023-02-09 03:16:51 +09:00
extract_attributes ,
get_element_by_attribute ,
2014-01-19 13:47:20 +09:00
int_or_none ,
2023-04-24 06:58:35 +09:00
join_nonempty ,
2022-02-01 23:39:03 +09:00
js_to_json ,
2023-03-12 18:16:09 +09:00
LazyList ,
2023-02-24 11:48:37 +09:00
merge_dicts ,
2016-01-25 02:02:19 +09:00
mimetype2ext ,
2024-01-16 03:34:21 +09:00
NO_DEFAULT ,
2017-02-12 20:09:53 +09:00
parse_codecs ,
2024-01-16 03:38:43 +09:00
parse_count ,
2015-07-21 04:10:28 +09:00
parse_duration ,
2023-02-07 01:19:21 +09:00
parse_qs ,
2021-02-01 22:30:59 +09:00
qualities ,
2017-05-07 06:19:11 +09:00
remove_start ,
2015-07-26 00:30:34 +09:00
smuggle_url ,
2018-11-03 08:26:16 +09:00
str_or_none ,
2015-06-29 03:48:06 +09:00
str_to_int ,
2024-01-16 03:38:43 +09:00
T ,
2023-02-09 03:16:51 +09:00
traverse_obj ,
2024-01-16 03:34:21 +09:00
try_call ,
2017-01-26 23:43:14 +09:00
try_get ,
2023-04-24 06:58:35 +09:00
txt_or_none ,
2013-06-24 02:58:33 +09:00
unescapeHTML ,
unified_strdate ,
2015-07-26 00:30:34 +09:00
unsmuggle_url ,
2022-01-31 13:28:54 +09:00
update_url ,
2020-11-18 05:32:42 +09:00
update_url_query ,
2018-12-16 21:35:48 +09:00
url_or_none ,
2016-03-26 05:19:24 +09:00
urlencode_postdata ,
2020-11-12 08:16:37 +09:00
urljoin ,
2013-06-24 02:58:33 +09:00
)
2014-11-24 04:41:03 +09:00
2013-09-11 22:48:23 +09:00
class YoutubeBaseInfoExtractor ( InfoExtractor ) :
2013-07-25 03:40:12 +09:00
""" Provide base functions for Youtube extractors """
_LOGIN_URL = ' https://accounts.google.com/ServiceLogin '
2015-08-14 12:11:11 +09:00
_TWOFACTOR_URL = ' https://accounts.google.com/signin/challenge '
2017-05-07 01:58:47 +09:00
_LOOKUP_URL = ' https://accounts.google.com/_/signin/sl/lookup '
2017-05-07 06:19:11 +09:00
_CHALLENGE_URL = ' https://accounts.google.com/_/signin/sl/challenge '
_TFA_URL = ' https://accounts.google.com/_/signin/challenge?hl=en&TL= {0} '
2017-05-07 01:58:47 +09:00
2013-07-25 03:40:12 +09:00
_NETRC_MACHINE = ' youtube '
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
2020-11-18 05:32:42 +09:00
_PLAYLIST_ID_RE = r ' (?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_] { 10,}|RDMM) '
2017-03-25 03:17:17 +09:00
2013-07-25 03:40:12 +09:00
def _login ( self ) :
2014-08-17 06:28:41 +09:00
"""
Attempt to log in to YouTube .
True is returned if successful or skipped .
False is returned if login failed .
If _LOGIN_REQUIRED is set and no authentication was provided , an error is raised .
"""
2018-05-27 00:12:44 +09:00
username , password = self . _get_login_info ( )
2013-07-25 03:40:12 +09:00
# No authentication to be performed
if username is None :
2018-04-22 08:08:05 +09:00
if self . _LOGIN_REQUIRED and self . _downloader . params . get ( ' cookiefile ' ) is None :
2014-09-24 16:51:45 +09:00
raise ExtractorError ( ' No login info available, needed for using %s . ' % self . IE_NAME , expected = True )
2014-08-17 06:28:41 +09:00
return True
2013-07-25 03:40:12 +09:00
2013-12-09 09:49:01 +09:00
login_page = self . _download_webpage (
self . _LOGIN_URL , None ,
2014-09-24 16:51:45 +09:00
note = ' Downloading login page ' ,
errnote = ' unable to fetch login page ' , fatal = False )
2013-12-09 09:49:01 +09:00
if login_page is False :
return
2013-07-25 03:40:12 +09:00
2016-08-24 02:25:21 +09:00
login_form = self . _hidden_inputs ( login_page )
2013-06-24 02:58:33 +09:00
2017-05-07 01:58:47 +09:00
def req ( url , f_req , note , errnote ) :
data = login_form . copy ( )
data . update ( {
' pstMsg ' : 1 ,
' checkConnection ' : ' youtube ' ,
' checkedDomains ' : ' youtube ' ,
' hl ' : ' en ' ,
' deviceinfo ' : ' [null,null,null,[],null, " US " ,null,null,[], " GlifWebSignIn " ,null,[null,null,[]]] ' ,
2017-05-07 06:19:11 +09:00
' f.req ' : json . dumps ( f_req ) ,
2017-05-07 01:58:47 +09:00
' flowName ' : ' GlifWebSignIn ' ,
' flowEntry ' : ' ServiceLogin ' ,
2019-07-12 04:26:05 +09:00
# TODO: reverse actual botguard identifier generation algo
' bgRequest ' : ' [ " identifier " , " " ] ' ,
2015-08-16 01:03:43 +09:00
} )
2017-05-07 01:58:47 +09:00
return self . _download_json (
url , None , note = note , errnote = errnote ,
transform_source = lambda s : re . sub ( r ' ^[^[]* ' , ' ' , s ) ,
fatal = False ,
data = urlencode_postdata ( data ) , headers = {
' Content-Type ' : ' application/x-www-form-urlencoded;charset=utf-8 ' ,
' Google-Accounts-XSRF ' : 1 ,
} )
2017-05-07 06:19:11 +09:00
def warn ( message ) :
self . _downloader . report_warning ( message )
lookup_req = [
username ,
None , [ ] , None , ' US ' , None , None , 2 , False , True ,
[
None , None ,
[ 2 , 1 , None , 1 ,
' https://accounts.google.com/ServiceLogin?passive=true&continue=https % 3A %2F %2F www.youtube.com %2F signin %3F next % 3D %252F %26a ction_handle_signin % 3Dtrue % 26hl % 3Den %26a pp % 3Ddesktop %26f eature % 3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath= %2F ServiceLogin&Page=PasswordSeparationSignIn ' ,
None , [ ] , 4 ] ,
1 , [ None , None , [ ] ] , None , None , None , True
] ,
username ,
]
2017-05-07 01:58:47 +09:00
lookup_results = req (
2017-05-07 06:19:11 +09:00
self . _LOOKUP_URL , lookup_req ,
2017-05-07 01:58:47 +09:00
' Looking up account info ' , ' Unable to look up account info ' )
if lookup_results is False :
return False
2015-08-16 01:03:43 +09:00
2017-05-07 06:19:11 +09:00
user_hash = try_get ( lookup_results , lambda x : x [ 0 ] [ 2 ] , compat_str )
if not user_hash :
warn ( ' Unable to extract user hash ' )
return False
challenge_req = [
user_hash ,
None , 1 , None , [ 1 , None , None , None , [ password , None , True ] ] ,
[
None , None , [ 2 , 1 , None , 1 , ' https://accounts.google.com/ServiceLogin?passive=true&continue=https % 3A %2F %2F www.youtube.com %2F signin %3F next % 3D %252F %26a ction_handle_signin % 3Dtrue % 26hl % 3Den %26a pp % 3Ddesktop %26f eature % 3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath= %2F ServiceLogin&Page=PasswordSeparationSignIn ' , None , [ ] , 4 ] ,
1 , [ None , None , [ ] ] , None , None , None , True
] ]
2014-08-17 06:28:41 +09:00
2017-05-07 06:19:11 +09:00
challenge_results = req (
self . _CHALLENGE_URL , challenge_req ,
' Logging in ' , ' Unable to log in ' )
2014-08-17 06:28:41 +09:00
2017-05-07 06:19:11 +09:00
if challenge_results is False :
2017-05-07 01:58:47 +09:00
return
2014-08-17 06:28:41 +09:00
2017-05-07 06:19:11 +09:00
login_res = try_get ( challenge_results , lambda x : x [ 0 ] [ 5 ] , list )
if login_res :
login_msg = try_get ( login_res , lambda x : x [ 5 ] , compat_str )
warn (
' Unable to login: %s ' % ' Invalid password '
if login_msg == ' INCORRECT_ANSWER_ENTERED ' else login_msg )
return False
res = try_get ( challenge_results , lambda x : x [ 0 ] [ - 1 ] , list )
if not res :
warn ( ' Unable to extract result entry ' )
return False
2018-07-05 02:36:35 +09:00
login_challenge = try_get ( res , lambda x : x [ 0 ] [ 0 ] , list )
if login_challenge :
challenge_str = try_get ( login_challenge , lambda x : x [ 2 ] , compat_str )
if challenge_str == ' TWO_STEP_VERIFICATION ' :
2017-05-07 06:19:11 +09:00
# SEND_SUCCESS - TFA code has been successfully sent to phone
# QUOTA_EXCEEDED - reached the limit of TFA codes
2018-07-05 02:36:35 +09:00
status = try_get ( login_challenge , lambda x : x [ 5 ] , compat_str )
2017-05-07 06:19:11 +09:00
if status == ' QUOTA_EXCEEDED ' :
warn ( ' Exceeded the limit of TFA codes, try later ' )
return False
tl = try_get ( challenge_results , lambda x : x [ 1 ] [ 2 ] , compat_str )
if not tl :
warn ( ' Unable to extract TL ' )
return False
tfa_code = self . _get_tfa_info ( ' 2-step verification code ' )
if not tfa_code :
warn (
' Two-factor authentication required. Provide it either interactively or with --twofactor <code> '
' (Note that only TOTP (Google Authenticator App) codes work at this time.) ' )
return False
tfa_code = remove_start ( tfa_code , ' G- ' )
tfa_req = [
user_hash , None , 2 , None ,
[
9 , None , None , None , None , None , None , None ,
[ None , tfa_code , True , 2 ]
] ]
tfa_results = req (
self . _TFA_URL . format ( tl ) , tfa_req ,
' Submitting TFA code ' , ' Unable to submit TFA code ' )
if tfa_results is False :
return False
tfa_res = try_get ( tfa_results , lambda x : x [ 0 ] [ 5 ] , list )
if tfa_res :
tfa_msg = try_get ( tfa_res , lambda x : x [ 5 ] , compat_str )
warn (
' Unable to finish TFA: %s ' % ' Invalid TFA code '
if tfa_msg == ' INCORRECT_ANSWER_ENTERED ' else tfa_msg )
return False
check_cookie_url = try_get (
tfa_results , lambda x : x [ 0 ] [ - 1 ] [ 2 ] , compat_str )
2018-07-05 02:36:35 +09:00
else :
CHALLENGES = {
' LOGIN_CHALLENGE ' : " This device isn ' t recognized. For your security, Google wants to make sure it ' s really you. " ,
' USERNAME_RECOVERY ' : ' Please provide additional information to aid in the recovery process. ' ,
' REAUTH ' : " There is something unusual about your activity. For your security, Google wants to make sure it ' s really you. " ,
}
challenge = CHALLENGES . get (
challenge_str ,
' %s returned error %s . ' % ( self . IE_NAME , challenge_str ) )
warn ( ' %s \n Go to https://accounts.google.com/, login and solve a challenge. ' % challenge )
return False
2017-05-07 06:19:11 +09:00
else :
check_cookie_url = try_get ( res , lambda x : x [ 2 ] , compat_str )
if not check_cookie_url :
warn ( ' Unable to extract CheckCookie URL ' )
return False
2017-05-07 01:58:47 +09:00
check_cookie_results = self . _download_webpage (
2017-05-07 06:19:11 +09:00
check_cookie_url , None , ' Checking cookie ' , fatal = False )
if check_cookie_results is False :
return False
2017-05-07 01:58:47 +09:00
2017-05-07 06:19:11 +09:00
if ' https://myaccount.google.com/ ' not in check_cookie_results :
warn ( ' Unable to log in ' )
2013-07-25 03:40:12 +09:00
return False
2017-05-07 01:58:47 +09:00
2013-07-25 03:40:12 +09:00
return True
2021-04-01 06:05:10 +09:00
def _initialize_consent ( self ) :
cookies = self . _get_cookies ( ' https://www.youtube.com/ ' )
if cookies . get ( ' __Secure-3PSID ' ) :
return
2023-11-16 08:54:31 +09:00
socs = cookies . get ( ' SOCS ' )
if socs and not socs . value . startswith ( ' CAA ' ) : # not consented
return
self . _set_cookie ( ' .youtube.com ' , ' SOCS ' , ' CAI ' , secure = True ) # accept all (required for mixes)
2021-04-01 06:05:10 +09:00
2013-07-25 03:40:12 +09:00
def _real_initialize ( self ) :
2021-04-01 06:05:10 +09:00
self . _initialize_consent ( )
2013-07-25 03:40:12 +09:00
if self . _downloader is None :
return
if not self . _login ( ) :
return
2013-06-24 02:58:33 +09:00
2020-11-12 08:16:37 +09:00
_DEFAULT_API_DATA = {
' context ' : {
' client ' : {
' clientName ' : ' WEB ' ,
' clientVersion ' : ' 2.20201021.03.00 ' ,
}
} ,
}
2013-08-08 15:54:10 +09:00
2020-11-21 01:21:52 +09:00
_YT_INITIAL_DATA_RE = r ' (?:window \ s* \ [ \ s*[ " \' ]ytInitialData[ " \' ] \ s* \ ]|ytInitialData) \ s*= \ s*( { .+?}) \ s*; '
2020-11-28 17:02:31 +09:00
_YT_INITIAL_PLAYER_RESPONSE_RE = r ' ytInitialPlayerResponse \ s*= \ s*( { .+?}) \ s*; '
2020-12-29 04:29:34 +09:00
_YT_INITIAL_BOUNDARY_RE = r ' (?:var \ s+meta|</script| \ n) '
2020-11-21 01:21:52 +09:00
2022-01-31 13:28:54 +09:00
def _call_api ( self , ep , query , video_id , fatal = True , headers = None ) :
2020-11-12 08:16:37 +09:00
data = self . _DEFAULT_API_DATA . copy ( )
data . update ( query )
2022-01-31 13:28:54 +09:00
real_headers = { ' content-type ' : ' application/json ' }
if headers :
real_headers . update ( headers )
2015-11-22 08:01:01 +09:00
2021-02-01 22:30:59 +09:00
return self . _download_json (
2020-11-12 08:16:37 +09:00
' https://www.youtube.com/youtubei/v1/ %s ' % ep , video_id = video_id ,
note = ' Downloading API JSON ' , errnote = ' Unable to download API page ' ,
2021-02-01 22:30:59 +09:00
data = json . dumps ( data ) . encode ( ' utf8 ' ) , fatal = fatal ,
2022-01-31 13:28:54 +09:00
headers = real_headers ,
2020-11-12 08:16:37 +09:00
query = { ' key ' : ' AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8 ' } )
2015-11-22 08:01:01 +09:00
2020-11-12 08:16:37 +09:00
def _extract_yt_initial_data ( self , video_id , webpage ) :
return self . _parse_json (
self . _search_regex (
2020-12-29 04:29:34 +09:00
( r ' %s \ s* %s ' % ( self . _YT_INITIAL_DATA_RE , self . _YT_INITIAL_BOUNDARY_RE ) ,
2020-11-21 01:21:52 +09:00
self . _YT_INITIAL_DATA_RE ) , webpage , ' yt initial data ' ) ,
2020-11-12 08:16:37 +09:00
video_id )
2015-11-22 07:17:07 +09:00
2020-12-20 02:48:44 +09:00
def _extract_ytcfg ( self , video_id , webpage ) :
return self . _parse_json (
self . _search_regex (
r ' ytcfg \ .set \ s* \ ( \ s*( { .+?}) \ s* \ ) \ s*; ' , webpage , ' ytcfg ' ,
2021-04-09 04:09:52 +09:00
default = ' {} ' ) , video_id , fatal = False ) or { }
2020-12-20 02:48:44 +09:00
2021-01-12 01:35:13 +09:00
def _extract_video ( self , renderer ) :
video_id = renderer [ ' videoId ' ]
title = try_get (
renderer ,
( lambda x : x [ ' title ' ] [ ' runs ' ] [ 0 ] [ ' text ' ] ,
2023-02-03 02:26:31 +09:00
lambda x : x [ ' title ' ] [ ' simpleText ' ] ,
lambda x : x [ ' headline ' ] [ ' simpleText ' ] ) , compat_str )
2021-01-12 01:35:13 +09:00
description = try_get (
renderer , lambda x : x [ ' descriptionSnippet ' ] [ ' runs ' ] [ 0 ] [ ' text ' ] ,
compat_str )
duration = parse_duration ( try_get (
renderer , lambda x : x [ ' lengthText ' ] [ ' simpleText ' ] , compat_str ) )
view_count_text = try_get (
renderer , lambda x : x [ ' viewCountText ' ] [ ' simpleText ' ] , compat_str ) or ' '
view_count = str_to_int ( self . _search_regex (
r ' ^([ \ d,]+) ' , re . sub ( r ' \ s ' , ' ' , view_count_text ) ,
' view count ' , default = None ) )
uploader = try_get (
2021-02-02 15:46:39 +09:00
renderer ,
( lambda x : x [ ' ownerText ' ] [ ' runs ' ] [ 0 ] [ ' text ' ] ,
lambda x : x [ ' shortBylineText ' ] [ ' runs ' ] [ 0 ] [ ' text ' ] ) , compat_str )
2021-01-12 01:35:13 +09:00
return {
2021-04-03 16:23:35 +09:00
' _type ' : ' url ' ,
2021-01-12 01:35:13 +09:00
' ie_key ' : YoutubeIE . ie_key ( ) ,
' id ' : video_id ,
' url ' : video_id ,
' title ' : title ,
' description ' : description ,
' duration ' : duration ,
' view_count ' : view_count ,
' uploader ' : uploader ,
}
2022-01-30 04:11:47 +09:00
def _search_results ( self , query , params ) :
data = {
' context ' : {
' client ' : {
' clientName ' : ' WEB ' ,
' clientVersion ' : ' 2.20201021.03.00 ' ,
}
} ,
' query ' : query ,
}
if params :
data [ ' params ' ] = params
for page_num in itertools . count ( 1 ) :
search = self . _download_json (
' https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8 ' ,
video_id = ' query " %s " ' % query ,
note = ' Downloading page %s ' % page_num ,
errnote = ' Unable to download API page ' , fatal = False ,
data = json . dumps ( data ) . encode ( ' utf8 ' ) ,
headers = { ' content-type ' : ' application/json ' } )
if not search :
break
slr_contents = try_get (
search ,
( lambda x : x [ ' contents ' ] [ ' twoColumnSearchResultsRenderer ' ] [ ' primaryContents ' ] [ ' sectionListRenderer ' ] [ ' contents ' ] ,
lambda x : x [ ' onResponseReceivedCommands ' ] [ 0 ] [ ' appendContinuationItemsAction ' ] [ ' continuationItems ' ] ) ,
list )
if not slr_contents :
break
2022-01-30 04:37:28 +09:00
for slr_content in slr_contents :
isr_contents = try_get (
slr_content ,
lambda x : x [ ' itemSectionRenderer ' ] [ ' contents ' ] ,
list )
if not isr_contents :
2022-01-30 04:11:47 +09:00
continue
2022-01-30 04:37:28 +09:00
for content in isr_contents :
if not isinstance ( content , dict ) :
continue
video = content . get ( ' videoRenderer ' )
if not isinstance ( video , dict ) :
continue
video_id = video . get ( ' videoId ' )
if not video_id :
continue
yield self . _extract_video ( video )
2022-01-30 04:11:47 +09:00
token = try_get (
slr_contents ,
2022-01-30 04:37:28 +09:00
lambda x : x [ - 1 ] [ ' continuationItemRenderer ' ] [ ' continuationEndpoint ' ] [ ' continuationCommand ' ] [ ' token ' ] ,
2022-01-30 04:11:47 +09:00
compat_str )
if not token :
break
data [ ' continuation ' ] = token
2023-02-24 11:48:37 +09:00
@staticmethod
def _owner_endpoints_path ( ) :
return [
Ellipsis ,
lambda k , _ : k . endswith ( ' SecondaryInfoRenderer ' ) ,
( ' owner ' , ' videoOwner ' ) , ' videoOwnerRenderer ' , ' title ' ,
' runs ' , Ellipsis ]
def _extract_channel_id ( self , webpage , videodetails = { } , metadata = { } , renderers = [ ] ) :
channel_id = None
if any ( ( videodetails , metadata , renderers ) ) :
channel_id = (
traverse_obj ( videodetails , ' channelId ' )
or traverse_obj ( metadata , ' externalChannelId ' , ' externalId ' )
or traverse_obj ( renderers ,
self . _owner_endpoints_path ( ) + [
' navigationEndpoint ' , ' browseEndpoint ' , ' browseId ' ] ,
get_all = False )
)
return channel_id or self . _html_search_meta (
' channelId ' , webpage , ' channel id ' , default = None )
def _extract_author_var ( self , webpage , var_name ,
videodetails = { } , metadata = { } , renderers = [ ] ) :
result = None
paths = {
# (HTML, videodetails, metadata, renderers)
' name ' : ( ' content ' , ' author ' , ( ( ' ownerChannelName ' , None ) , ' title ' ) , [ ' text ' ] ) ,
' url ' : ( ' href ' , ' ownerProfileUrl ' , ' vanityChannelUrl ' ,
[ ' navigationEndpoint ' , ' browseEndpoint ' , ' canonicalBaseUrl ' ] )
}
if any ( ( videodetails , metadata , renderers ) ) :
result = (
traverse_obj ( videodetails , paths [ var_name ] [ 1 ] , get_all = False )
or traverse_obj ( metadata , paths [ var_name ] [ 2 ] , get_all = False )
or traverse_obj ( renderers ,
self . _owner_endpoints_path ( ) + paths [ var_name ] [ 3 ] ,
get_all = False )
)
return result or traverse_obj (
extract_attributes ( self . _search_regex (
r ''' (?s)(<link \ b[^>]+ \ bitemprop \ s*= \ s*( " | ' ) %s \ 2[^>]*>) '''
% re . escape ( var_name ) ,
2023-06-23 07:10:04 +09:00
get_element_by_attribute ( ' itemprop ' , ' author ' , webpage or ' ' ) or ' ' ,
2023-02-24 11:48:37 +09:00
' author link ' , default = ' ' ) ) ,
paths [ var_name ] [ 0 ] )
@staticmethod
def _yt_urljoin ( url_or_path ) :
return urljoin ( ' https://www.youtube.com ' , url_or_path )
def _extract_uploader_id ( self , uploader_url ) :
return self . _search_regex (
r ' /(?:(?:channel|user)/|(?=@))([^/?&#]+) ' , uploader_url or ' ' ,
' uploader id ' , default = None )
2015-11-22 07:17:07 +09:00
2015-02-17 05:44:17 +09:00
class YoutubeIE ( YoutubeBaseInfoExtractor ) :
2014-09-13 14:51:06 +09:00
IE_DESC = ' YouTube.com '
2021-02-18 06:59:56 +09:00
_INVIDIOUS_SITES = (
# invidious-redirect websites
r ' (?:www \ .)?redirect \ .invidious \ .io ' ,
r ' (?:(?:www|dev) \ .)?invidio \ .us ' ,
# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
r ' (?:(?:www|no) \ .)?invidiou \ .sh ' ,
r ' (?:(?:www|fi) \ .)?invidious \ .snopyta \ .org ' ,
r ' (?:www \ .)?invidious \ .kabi \ .tk ' ,
r ' (?:www \ .)?invidious \ .13ad \ .de ' ,
r ' (?:www \ .)?invidious \ .mastodon \ .host ' ,
r ' (?:www \ .)?invidious \ .zapashcanon \ .fr ' ,
2021-06-21 02:42:09 +09:00
r ' (?:www \ .)?(?:invidious(?:-us)?|piped) \ .kavin \ .rocks ' ,
2021-04-17 02:31:34 +09:00
r ' (?:www \ .)?invidious \ .tinfoil-hat \ .net ' ,
r ' (?:www \ .)?invidious \ .himiko \ .cloud ' ,
r ' (?:www \ .)?invidious \ .reallyancient \ .tech ' ,
2021-02-18 06:59:56 +09:00
r ' (?:www \ .)?invidious \ .tube ' ,
r ' (?:www \ .)?invidiou \ .site ' ,
r ' (?:www \ .)?invidious \ .site ' ,
r ' (?:www \ .)?invidious \ .xyz ' ,
r ' (?:www \ .)?invidious \ .nixnet \ .xyz ' ,
2021-04-17 02:31:34 +09:00
r ' (?:www \ .)?invidious \ .048596 \ .xyz ' ,
2021-02-18 06:59:56 +09:00
r ' (?:www \ .)?invidious \ .drycat \ .fr ' ,
2021-04-17 02:31:34 +09:00
r ' (?:www \ .)?inv \ .skyn3t \ .in ' ,
2021-02-18 06:59:56 +09:00
r ' (?:www \ .)?tube \ .poal \ .co ' ,
r ' (?:www \ .)?tube \ .connect \ .cafe ' ,
r ' (?:www \ .)?vid \ .wxzm \ .sx ' ,
r ' (?:www \ .)?vid \ .mint \ .lgbt ' ,
2021-04-17 02:31:34 +09:00
r ' (?:www \ .)?vid \ .puffyan \ .us ' ,
2021-02-18 06:59:56 +09:00
r ' (?:www \ .)?yewtu \ .be ' ,
r ' (?:www \ .)?yt \ .elukerio \ .org ' ,
r ' (?:www \ .)?yt \ .lelux \ .fi ' ,
r ' (?:www \ .)?invidious \ .ggc-project \ .de ' ,
r ' (?:www \ .)?yt \ .maisputain \ .ovh ' ,
2021-04-17 02:31:34 +09:00
r ' (?:www \ .)?ytprivate \ .com ' ,
2021-02-18 06:59:56 +09:00
r ' (?:www \ .)?invidious \ .13ad \ .de ' ,
r ' (?:www \ .)?invidious \ .toot \ .koeln ' ,
r ' (?:www \ .)?invidious \ .fdn \ .fr ' ,
r ' (?:www \ .)?watch \ .nettohikari \ .com ' ,
2021-06-21 02:42:09 +09:00
r ' (?:www \ .)?invidious \ .namazso \ .eu ' ,
r ' (?:www \ .)?invidious \ .silkky \ .cloud ' ,
r ' (?:www \ .)?invidious \ .exonip \ .de ' ,
r ' (?:www \ .)?invidious \ .riverside \ .rocks ' ,
r ' (?:www \ .)?invidious \ .blamefran \ .net ' ,
r ' (?:www \ .)?invidious \ .moomoo \ .de ' ,
r ' (?:www \ .)?ytb \ .trom \ .tf ' ,
r ' (?:www \ .)?yt \ .cyberhost \ .uk ' ,
2021-02-18 06:59:56 +09:00
r ' (?:www \ .)?kgg2m7yk5aybusll \ .onion ' ,
r ' (?:www \ .)?qklhadlycap4cnod \ .onion ' ,
r ' (?:www \ .)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid \ .onion ' ,
r ' (?:www \ .)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid \ .onion ' ,
r ' (?:www \ .)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad \ .onion ' ,
r ' (?:www \ .)?invidious \ .l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd \ .onion ' ,
r ' (?:www \ .)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya \ .b32 \ .i2p ' ,
r ' (?:www \ .)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd \ .onion ' ,
2021-06-21 02:42:09 +09:00
r ' (?:www \ .)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd \ .onion ' ,
r ' (?:www \ .)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad \ .onion ' ,
r ' (?:www \ .)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad \ .onion ' ,
r ' (?:www \ .)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid \ .onion ' ,
2021-02-18 06:59:56 +09:00
)
2013-11-19 00:42:35 +09:00
_VALID_URL = r """ (?x)^
2013-06-24 02:58:33 +09:00
(
2014-09-12 04:47:25 +09:00
( ? : https ? : / / | / / ) # http(s):// or protocol-independent URL
2021-02-18 06:59:56 +09:00
( ? : ( ? : ( ? : ( ? : \w + \. ) ? [ yY ] [ oO ] [ uU ] [ tT ] [ uU ] [ bB ] [ eE ] ( ? : - nocookie | kids ) ? \. com |
( ? : www \. ) ? deturl \. com / www \. youtube \. com |
( ? : www \. ) ? pwnyoutube \. com |
( ? : www \. ) ? hooktube \. com |
( ? : www \. ) ? yourepeat \. com |
tube \. majestyc \. net |
% ( invidious ) s |
youtube \. googleapis \. com ) / # the various hostnames, with wildcard subdomains
2013-06-24 02:58:33 +09:00
( ? : . * ? \#/)? # handle anchor (#/) redirect urls
( ? : # the various things that can precede the ID:
2014-09-24 17:34:29 +09:00
( ? : ( ? : v | embed | e ) / ( ? ! videoseries ) ) # v/ or embed/ or e/
2022-01-31 09:02:56 +09:00
| shorts /
2013-06-24 02:58:33 +09:00
| ( ? : # or the v= param in all its forms
2014-02-19 04:00:54 +09:00
( ? : ( ? : watch | movie ) ( ? : _popup ) ? ( ? : \. php ) ? / ? ) ? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
2013-06-24 02:58:33 +09:00
( ? : \? | \#!?) # the params delimiter ? or # or #!
2015-11-30 00:01:59 +09:00
( ? : . * ? [ & ; ] ) ? ? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
2013-06-24 02:58:33 +09:00
v =
)
2013-09-06 05:38:23 +09:00
) )
2015-08-17 05:04:13 +09:00
| ( ? :
youtu \. be | # just youtu.be/xxxx
2016-04-04 05:26:20 +09:00
vid \. plus | # or vid.plus/xxxx
zwearz \. com / watch | # or zwearz.com/watch/xxxx
2021-02-18 06:59:56 +09:00
% ( invidious ) s
2015-08-17 05:04:13 +09:00
) /
2014-09-12 04:47:25 +09:00
| ( ? : www \. ) ? cleanvideosearch \. com / media / action / yt / watch \? videoId =
2013-09-06 05:38:23 +09:00
)
2013-06-24 02:58:33 +09:00
) ? # all until now is optional -> you can pass the naked ID
2021-04-17 02:07:32 +09:00
( ? P < id > [ 0 - 9 A - Za - z_ - ] { 11 } ) # here is it! the YouTube video ID
2013-06-24 02:58:33 +09:00
( ? ( 1 ) . + ) ? # if we found the ID, everything can follow
2021-02-18 06:59:56 +09:00
$ """ % {
' invidious ' : ' | ' . join ( _INVIDIOUS_SITES ) ,
}
2020-05-02 09:18:08 +09:00
_PLAYER_INFO_RE = (
2021-02-08 17:20:28 +09:00
r ' /s/player/(?P<id>[a-zA-Z0-9_-] { 8,})/player ' ,
r ' /(?P<id>[a-zA-Z0-9_-] { 8,})/player(?:_ias \ .vflset(?:/[a-zA-Z] { 2,3}_[a-zA-Z] { 2,3})?|-plasma-ias-(?:phone|tablet)-[a-z] {2} _[A-Z] {2} \ .vflset)/base \ .js$ ' ,
2021-02-01 22:30:59 +09:00
r ' \ b(?P<id>vfl[a-zA-Z0-9_-]+) \ b.*? \ .js$ ' ,
2020-05-02 09:18:08 +09:00
)
2022-06-09 23:25:23 +09:00
_SUBTITLE_FORMATS = ( ' json3 ' , ' srv1 ' , ' srv2 ' , ' srv3 ' , ' ttml ' , ' vtt ' )
2013-08-20 10:22:25 +09:00
2017-02-26 18:51:21 +09:00
_GEO_BYPASS = False
2014-09-13 14:51:06 +09:00
IE_NAME = ' youtube '
2013-06-28 02:13:11 +09:00
_TESTS = [
{
2016-09-17 23:48:20 +09:00
' url ' : ' https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9 ' ,
2014-09-24 16:49:53 +09:00
' info_dict ' : {
' id ' : ' BaW_jenozKc ' ,
' ext ' : ' mp4 ' ,
' title ' : ' youtube-dl test video " \' / \\ ä↭𝕐 ' ,
' uploader ' : ' Philipp Hagemeister ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @PhilippHagemeister ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/@PhilippHagemeister ' ,
2022-01-31 13:28:54 +09:00
' channel ' : ' Philipp Hagemeister ' ,
2018-09-15 03:24:26 +09:00
' channel_id ' : ' UCLqxVugv74EIW3VWh2NOa3Q ' ,
' channel_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UCLqxVugv74EIW3VWh2NOa3Q ' ,
2014-09-24 16:49:53 +09:00
' upload_date ' : ' 20121002 ' ,
' description ' : ' test chars: " \' / \\ ä↭𝕐 \n test URL: https://github.com/rg3/youtube-dl/issues/1892 \n \n This is a test video for youtube-dl. \n \n For more information, contact phihag@phihag.de . ' ,
' categories ' : [ ' Science & Technology ' ] ,
2015-07-29 06:43:32 +09:00
' tags ' : [ ' youtube-dl ' ] ,
2017-01-26 23:43:14 +09:00
' duration ' : 10 ,
2018-11-03 08:26:16 +09:00
' view_count ' : int ,
2014-09-01 01:10:05 +09:00
' like_count ' : int ,
2022-01-31 13:28:54 +09:00
' thumbnail ' : ' https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg ' ,
2015-07-21 04:10:28 +09:00
' start_time ' : 1 ,
2015-07-23 20:20:21 +09:00
' end_time ' : 9 ,
2022-01-31 13:28:54 +09:00
} ,
2013-06-28 02:55:39 +09:00
} ,
2013-11-18 21:05:18 +09:00
{
2014-09-24 16:49:53 +09:00
' url ' : ' //www.YouTube.com/watch?v=yZIXLfi8CZQ ' ,
' note ' : ' Embed-only video (#1746) ' ,
' info_dict ' : {
' id ' : ' yZIXLfi8CZQ ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20120608 ' ,
' title ' : ' Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012 ' ,
' description ' : ' md5:09b78bd971f1e3e289601dfba15ca4f7 ' ,
' uploader ' : ' SET India ' ,
2015-11-24 00:35:23 +09:00
' uploader_id ' : ' setindia ' ,
2017-01-02 21:08:07 +09:00
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/setindia ' ,
2015-11-24 00:35:23 +09:00
' age_limit ' : 18 ,
2021-02-01 22:30:59 +09:00
} ,
' skip ' : ' Private video ' ,
2013-11-18 21:05:18 +09:00
} ,
2015-08-11 03:52:38 +09:00
{
2020-11-16 23:03:56 +09:00
' url ' : ' https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ ' ,
2015-08-11 03:52:38 +09:00
' note ' : ' Use the first video ID in the URL ' ,
' info_dict ' : {
' id ' : ' BaW_jenozKc ' ,
' ext ' : ' mp4 ' ,
' title ' : ' youtube-dl test video " \' / \\ ä↭𝕐 ' ,
' uploader ' : ' Philipp Hagemeister ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @PhilippHagemeister ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/@PhilippHagemeister ' ,
2015-08-11 03:52:38 +09:00
' upload_date ' : ' 20121002 ' ,
' description ' : ' test chars: " \' / \\ ä↭𝕐 \n test URL: https://github.com/rg3/youtube-dl/issues/1892 \n \n This is a test video for youtube-dl. \n \n For more information, contact phihag@phihag.de . ' ,
' categories ' : [ ' Science & Technology ' ] ,
' tags ' : [ ' youtube-dl ' ] ,
2017-01-26 23:43:14 +09:00
' duration ' : 10 ,
2018-11-03 08:26:16 +09:00
' view_count ' : int ,
2015-08-11 03:52:38 +09:00
' like_count ' : int ,
2015-08-11 04:22:06 +09:00
} ,
' params ' : {
' skip_download ' : True ,
} ,
2015-08-11 03:52:38 +09:00
} ,
2014-01-19 13:47:20 +09:00
{
2016-09-17 23:48:20 +09:00
' url ' : ' https://www.youtube.com/watch?v=a9LDPn-MO4I ' ,
2014-09-24 16:49:53 +09:00
' note ' : ' 256k DASH audio (format 141) via DASH manifest ' ,
' info_dict ' : {
' id ' : ' a9LDPn-MO4I ' ,
' ext ' : ' m4a ' ,
' upload_date ' : ' 20121002 ' ,
' uploader_id ' : ' 8KVIDEO ' ,
2017-01-02 21:08:07 +09:00
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/8KVIDEO ' ,
2014-09-24 16:49:53 +09:00
' description ' : ' ' ,
' uploader ' : ' 8KVIDEO ' ,
' title ' : ' UHDTV TEST 8K VIDEO.mp4 '
2014-01-23 05:56:37 +09:00
} ,
2014-09-24 16:49:53 +09:00
' params ' : {
' youtube_include_dash_manifest ' : True ,
' format ' : ' 141 ' ,
2014-01-23 05:56:37 +09:00
} ,
2023-02-24 11:48:37 +09:00
' skip ' : ' format 141 not served any more ' ,
2014-01-19 13:47:20 +09:00
} ,
2014-02-21 23:15:58 +09:00
# DASH manifest with encrypted signature
{
2014-09-13 14:51:06 +09:00
' url ' : ' https://www.youtube.com/watch?v=IB3lcPjvWLA ' ,
' info_dict ' : {
' id ' : ' IB3lcPjvWLA ' ,
' ext ' : ' m4a ' ,
2019-01-16 04:18:27 +09:00
' title ' : ' Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson ' ,
' description ' : ' md5:8f5e2b82460520b619ccac1f509d43bf ' ,
2017-01-26 23:43:14 +09:00
' duration ' : 244 ,
2014-09-13 14:51:06 +09:00
' uploader ' : ' AfrojackVEVO ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @AfrojackVEVO ' ,
2014-09-13 14:51:06 +09:00
' upload_date ' : ' 20131011 ' ,
2021-02-07 03:54:46 +09:00
' abr ' : 129.495 ,
2014-02-21 23:15:58 +09:00
} ,
2014-09-24 16:49:53 +09:00
' params ' : {
2014-09-13 14:51:06 +09:00
' youtube_include_dash_manifest ' : True ,
2016-06-25 00:27:55 +09:00
' format ' : ' 141/bestaudio[ext=m4a] ' ,
2014-02-21 23:15:58 +09:00
} ,
} ,
2014-11-23 17:59:02 +09:00
# Controversy video
{
' url ' : ' https://www.youtube.com/watch?v=T4XJQO3qol8 ' ,
' info_dict ' : {
' id ' : ' T4XJQO3qol8 ' ,
' ext ' : ' mp4 ' ,
2017-01-26 23:43:14 +09:00
' duration ' : 219 ,
2014-11-23 17:59:02 +09:00
' upload_date ' : ' 20100909 ' ,
2019-01-16 04:18:27 +09:00
' uploader ' : ' Amazing Atheist ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @theamazingatheist ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/@theamazingatheist ' ,
2014-11-23 17:59:02 +09:00
' title ' : ' Burning Everyone \' s Koran ' ,
2021-02-01 22:30:59 +09:00
' description ' : ' SUBSCRIBE: http://www.youtube.com/saturninefilms \r \n \r \n Even Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html ' ,
2014-11-23 17:59:02 +09:00
}
2014-12-01 05:45:49 +09:00
} ,
2022-01-31 13:28:54 +09:00
# Age-gated videos
2014-12-01 05:45:49 +09:00
{
2022-01-31 13:28:54 +09:00
' note ' : ' Age-gated video (No vevo, embed allowed) ' ,
2016-09-17 23:48:20 +09:00
' url ' : ' https://youtube.com/watch?v=HtVdAasjOgU ' ,
2014-12-01 05:45:49 +09:00
' info_dict ' : {
' id ' : ' HtVdAasjOgU ' ,
' ext ' : ' mp4 ' ,
' title ' : ' The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer ' ,
2017-01-02 21:08:07 +09:00
' description ' : r ' re:(?s). { 100,}About the Game \ n.*?The Witcher 3: Wild Hunt. { 100,} ' ,
2017-01-26 23:43:14 +09:00
' duration ' : 142 ,
2014-12-01 05:45:49 +09:00
' uploader ' : ' The Witcher ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @thewitcher ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/@thewitcher ' ,
2014-12-01 05:45:49 +09:00
' upload_date ' : ' 20140605 ' ,
2022-01-31 13:28:54 +09:00
' thumbnail ' : ' https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg ' ,
' age_limit ' : 18 ,
' categories ' : [ ' Gaming ' ] ,
' tags ' : ' count:17 ' ,
' channel ' : ' The Witcher ' ,
' channel_url ' : ' https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg ' ,
' channel_id ' : ' UCzybXLxv08IApdjdN0mJhEg ' ,
' view_count ' : int ,
' like_count ' : int ,
} ,
} ,
{
' note ' : ' Age-gated video with embed allowed in public site ' ,
' url ' : ' https://youtube.com/watch?v=HsUATh_Nc2U ' ,
' info_dict ' : {
' id ' : ' HsUATh_Nc2U ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Godzilla 2 (Official Video) ' ,
' description ' : ' md5:bf77e03fcae5529475e500129b05668a ' ,
' duration ' : 177 ,
' uploader ' : ' FlyingKitty ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @FlyingKitty900 ' ,
2022-01-31 13:28:54 +09:00
' upload_date ' : ' 20200408 ' ,
' thumbnail ' : ' https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg ' ,
2015-08-11 04:24:53 +09:00
' age_limit ' : 18 ,
2022-01-31 13:28:54 +09:00
' categories ' : [ ' Entertainment ' ] ,
' tags ' : [ ' Flyingkitty ' , ' godzilla 2 ' ] ,
' channel ' : ' FlyingKitty ' ,
' channel_url ' : ' https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg ' ,
' channel_id ' : ' UCYQT13AtrJC0gsM1far_zJg ' ,
' view_count ' : int ,
' like_count ' : int ,
2014-12-01 05:45:49 +09:00
} ,
} ,
2020-12-01 02:49:03 +09:00
{
2023-02-07 01:19:21 +09:00
' note ' : ' Age-gated video embeddable only with clientScreen=EMBED ' ,
2022-01-31 13:28:54 +09:00
' url ' : ' https://youtube.com/watch?v=Tq92D6wQ1mg ' ,
' info_dict ' : {
' id ' : ' Tq92D6wQ1mg ' ,
' ext ' : ' mp4 ' ,
' title ' : ' [MMD] Adios - EVERGLOW [+Motion DL] ' ,
' description ' : ' md5:17eccca93a786d51bc67646756894066 ' ,
' duration ' : 106 ,
' uploader ' : ' Projekt Melody ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @ProjektMelody ' ,
2022-01-31 13:28:54 +09:00
' upload_date ' : ' 20191227 ' ,
' age_limit ' : 18 ,
' thumbnail ' : ' https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg ' ,
' tags ' : [ ' mmd ' , ' dance ' , ' mikumikudance ' , ' kpop ' , ' vtuber ' ] ,
' categories ' : [ ' Entertainment ' ] ,
' channel ' : ' Projekt Melody ' ,
' channel_url ' : ' https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ ' ,
' channel_id ' : ' UC1yoRdFoFJaCY-AGfD9W0wQ ' ,
' view_count ' : int ,
' like_count ' : int ,
} ,
} ,
{
' note ' : ' Non-Age-gated non-embeddable video ' ,
' url ' : ' https://youtube.com/watch?v=MeJVWBSsPAY ' ,
' info_dict ' : {
' id ' : ' MeJVWBSsPAY ' ,
' ext ' : ' mp4 ' ,
' title ' : ' OOMPH! - Such Mich Find Mich (Lyrics) ' ,
' description ' : ' Fan Video. Music & Lyrics by OOMPH!. ' ,
' duration ' : 210 ,
' upload_date ' : ' 20130730 ' ,
2023-02-24 11:48:37 +09:00
' uploader ' : ' Herr Lurik ' ,
' uploader_id ' : ' @HerrLurik ' ,
' uploader_url ' : ' http://www.youtube.com/@HerrLurik ' ,
2022-01-31 13:28:54 +09:00
' age_limit ' : 0 ,
' thumbnail ' : ' https://i.ytimg.com/vi/MeJVWBSsPAY/hqdefault.jpg ' ,
' tags ' : [ ' oomph ' , ' such mich find mich ' , ' lyrics ' , ' german industrial ' , ' musica industrial ' ] ,
' categories ' : [ ' Music ' ] ,
' channel ' : ' Herr Lurik ' ,
' channel_url ' : ' https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA ' ,
' channel_id ' : ' UCdR3RSDPqub28LjZx0v9-aA ' ,
' artist ' : ' OOMPH! ' ,
' view_count ' : int ,
' like_count ' : int ,
} ,
} ,
{
' note ' : ' Non-bypassable age-gated video ' ,
' url ' : ' https://youtube.com/watch?v=Cr381pDsSsA ' ,
' only_matching ' : True ,
} ,
{
' note ' : ' Age-gated video only available with authentication (not via embed workaround) ' ,
2020-12-01 02:49:03 +09:00
' url ' : ' XgnwCQzjau8 ' ,
' only_matching ' : True ,
2022-01-31 13:28:54 +09:00
' skip ' : ''' This video has been removed for violating YouTube ' s Community Guidelines ''' ,
2020-12-01 02:49:03 +09:00
} ,
2019-03-09 21:14:41 +09:00
# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
2017-07-15 00:37:04 +09:00
# YouTube Red ad is not captured for creator
2014-12-10 21:21:24 +09:00
{
' url ' : ' __2ABJjxzNo ' ,
' info_dict ' : {
' id ' : ' __2ABJjxzNo ' ,
' ext ' : ' mp4 ' ,
2017-01-26 23:43:14 +09:00
' duration ' : 266 ,
2014-12-10 21:21:24 +09:00
' upload_date ' : ' 20100430 ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @deadmau5 ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/@deadmau5 ' ,
2021-02-01 22:30:59 +09:00
' creator ' : ' deadmau5 ' ,
' description ' : ' md5:6cbcd3a92ce1bc676fc4d6ab4ace2336 ' ,
2014-12-10 21:21:24 +09:00
' uploader ' : ' deadmau5 ' ,
' title ' : ' Deadmau5 - Some Chords (HD) ' ,
2021-02-01 22:30:59 +09:00
' alt_title ' : ' Some Chords ' ,
2014-12-10 21:21:24 +09:00
} ,
' expected_warnings ' : [
' DASH manifest missing ' ,
]
2014-12-12 00:28:07 +09:00
} ,
2019-03-09 21:14:41 +09:00
# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
2014-12-12 00:28:07 +09:00
{
' url ' : ' lqQg6PlCWgI ' ,
' info_dict ' : {
' id ' : ' lqQg6PlCWgI ' ,
' ext ' : ' mp4 ' ,
2022-01-31 13:28:54 +09:00
' title ' : ' Hockey - Women - GER-AUS - London 2012 Olympic Games ' ,
' description ' : r ' re:(?s)(?:.+ \ s)?HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games \ s* ' ,
2017-01-26 23:43:14 +09:00
' duration ' : 6085 ,
2015-11-24 00:37:21 +09:00
' upload_date ' : ' 20150827 ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @Olympics ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/@Olympics ' ,
2022-01-31 13:28:54 +09:00
' uploader ' : r ' re:Olympics? ' ,
' age_limit ' : 0 ,
' thumbnail ' : ' https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg ' ,
' categories ' : [ ' Sports ' ] ,
' tags ' : [ ' Hockey ' , ' 2012-07-31 ' , ' 31 July 2012 ' , ' Riverbank Arena ' , ' Session ' , ' Olympics ' , ' Olympic Games ' , ' London 2012 ' , ' 2012 Summer Olympics ' , ' Summer Games ' ] ,
' channel ' : ' Olympics ' ,
' channel_url ' : ' https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q ' ,
' channel_id ' : ' UCTl3QQTvqHFjurroKxexy2Q ' ,
' view_count ' : int ,
' like_count ' : int ,
2014-12-12 00:34:37 +09:00
} ,
} ,
2015-01-10 13:45:51 +09:00
# Non-square pixels
{
' url ' : ' https://www.youtube.com/watch?v=_b-2C3KPAM0 ' ,
' info_dict ' : {
' id ' : ' _b-2C3KPAM0 ' ,
' ext ' : ' mp4 ' ,
' stretched_ratio ' : 16 / 9. ,
2017-01-26 23:43:14 +09:00
' duration ' : 85 ,
2015-01-10 13:45:51 +09:00
' upload_date ' : ' 20110310 ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @AllenMeow ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/@AllenMeow ' ,
2015-01-10 13:45:51 +09:00
' description ' : ' made by Wacom from Korea | 字幕&加油添醋 by TY \' s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯 ' ,
2018-06-03 04:23:45 +09:00
' uploader ' : ' 孫ᄋᄅ ' ,
2015-01-10 13:45:51 +09:00
' title ' : ' [A-made] 變態妍字幕版 太妍 我就是這樣的人 ' ,
} ,
2015-04-06 03:35:55 +09:00
} ,
# url_encoded_fmt_stream_map is empty string
{
' url ' : ' qEJwOuvDf7I ' ,
' info_dict ' : {
' id ' : ' qEJwOuvDf7I ' ,
2015-08-13 00:27:58 +09:00
' ext ' : ' webm ' ,
2015-04-06 03:35:55 +09:00
' title ' : ' Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге ' ,
' description ' : ' ' ,
' upload_date ' : ' 20150404 ' ,
' uploader_id ' : ' spbelect ' ,
' uploader ' : ' Наблюдатели Петербурга ' ,
} ,
' params ' : {
' skip_download ' : ' requires avconv ' ,
2016-01-19 23:56:04 +09:00
} ,
' skip ' : ' This live event has ended. ' ,
2015-04-06 03:35:55 +09:00
} ,
2019-03-09 21:14:41 +09:00
# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
2015-06-27 17:55:46 +09:00
{
' url ' : ' https://www.youtube.com/watch?v=FIl7x6_3R5Y ' ,
' info_dict ' : {
' id ' : ' FIl7x6_3R5Y ' ,
2018-06-03 04:23:45 +09:00
' ext ' : ' webm ' ,
2015-06-27 17:55:46 +09:00
' title ' : ' md5:7b81415841e02ecd4313668cde88737a ' ,
' description ' : ' md5:116377fd2963b81ec4ce64b542173306 ' ,
2017-01-26 23:43:14 +09:00
' duration ' : 220 ,
2015-06-27 17:55:46 +09:00
' upload_date ' : ' 20150625 ' ,
' uploader_id ' : ' dorappi2000 ' ,
2017-01-02 21:08:07 +09:00
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/dorappi2000 ' ,
2015-06-27 17:55:46 +09:00
' uploader ' : ' dorappi2000 ' ,
2018-06-03 04:23:45 +09:00
' formats ' : ' mincount:31 ' ,
2015-06-27 17:55:46 +09:00
} ,
2023-02-24 11:48:37 +09:00
' skip ' : ' not actual any more ' ,
2015-07-21 02:34:24 +09:00
} ,
2015-06-10 15:47:02 +09:00
# DASH manifest with segment_list
{
' url ' : ' https://www.youtube.com/embed/CsmdDsKjzN8 ' ,
' md5 ' : ' 8ce563a1d667b599d21064e982ab9e31 ' ,
' info_dict ' : {
' id ' : ' CsmdDsKjzN8 ' ,
' ext ' : ' mp4 ' ,
2015-07-21 02:48:50 +09:00
' upload_date ' : ' 20150501 ' , # According to '<meta itemprop="datePublished"', but in other places it's 20150510
2015-06-10 15:47:02 +09:00
' uploader ' : ' Airtek ' ,
' description ' : ' Retransmisión en directo de la XVIII media maratón de Zaragoza. ' ,
' uploader_id ' : ' UCzTzUmjXxxacNnL8I3m4LnQ ' ,
' title ' : ' Retransmisión XVIII Media maratón Zaragoza 2015 ' ,
} ,
' params ' : {
' youtube_include_dash_manifest ' : True ,
' format ' : ' 135 ' , # bestvideo
2016-06-25 00:47:19 +09:00
} ,
' skip ' : ' This live event has ended. ' ,
2015-07-21 02:34:24 +09:00
} ,
2015-07-26 00:30:34 +09:00
{
# Multifeed videos (multiple cameras), URL is for Main Camera
2021-02-01 22:30:59 +09:00
' url ' : ' https://www.youtube.com/watch?v=jvGDaLqkpTg ' ,
2015-07-26 00:30:34 +09:00
' info_dict ' : {
2021-02-01 22:30:59 +09:00
' id ' : ' jvGDaLqkpTg ' ,
' title ' : ' Tom Clancy Free Weekend Rainbow Whatever ' ,
' description ' : ' md5:e03b909557865076822aa169218d6a5d ' ,
2015-07-26 00:30:34 +09:00
} ,
' playlist ' : [ {
' info_dict ' : {
2021-02-01 22:30:59 +09:00
' id ' : ' jvGDaLqkpTg ' ,
2015-07-26 00:30:34 +09:00
' ext ' : ' mp4 ' ,
2021-02-01 22:30:59 +09:00
' title ' : ' Tom Clancy Free Weekend Rainbow Whatever (Main Camera) ' ,
' description ' : ' md5:e03b909557865076822aa169218d6a5d ' ,
' duration ' : 10643 ,
' upload_date ' : ' 20161111 ' ,
' uploader ' : ' Team PGP ' ,
' uploader_id ' : ' UChORY56LMMETTuGjXaJXvLg ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UChORY56LMMETTuGjXaJXvLg ' ,
2015-07-26 00:30:34 +09:00
} ,
} , {
' info_dict ' : {
2021-02-01 22:30:59 +09:00
' id ' : ' 3AKt1R1aDnw ' ,
2015-07-26 00:30:34 +09:00
' ext ' : ' mp4 ' ,
2021-02-01 22:30:59 +09:00
' title ' : ' Tom Clancy Free Weekend Rainbow Whatever (Camera 2) ' ,
' description ' : ' md5:e03b909557865076822aa169218d6a5d ' ,
' duration ' : 10991 ,
' upload_date ' : ' 20161111 ' ,
' uploader ' : ' Team PGP ' ,
' uploader_id ' : ' UChORY56LMMETTuGjXaJXvLg ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UChORY56LMMETTuGjXaJXvLg ' ,
2015-07-26 00:30:34 +09:00
} ,
} , {
' info_dict ' : {
2021-02-01 22:30:59 +09:00
' id ' : ' RtAMM00gpVc ' ,
2015-07-26 00:30:34 +09:00
' ext ' : ' mp4 ' ,
2021-02-01 22:30:59 +09:00
' title ' : ' Tom Clancy Free Weekend Rainbow Whatever (Camera 3) ' ,
' description ' : ' md5:e03b909557865076822aa169218d6a5d ' ,
' duration ' : 10995 ,
' upload_date ' : ' 20161111 ' ,
' uploader ' : ' Team PGP ' ,
' uploader_id ' : ' UChORY56LMMETTuGjXaJXvLg ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UChORY56LMMETTuGjXaJXvLg ' ,
2015-07-26 00:30:34 +09:00
} ,
} , {
' info_dict ' : {
2021-02-01 22:30:59 +09:00
' id ' : ' 6N2fdlP3C5U ' ,
2015-07-26 00:30:34 +09:00
' ext ' : ' mp4 ' ,
2021-02-01 22:30:59 +09:00
' title ' : ' Tom Clancy Free Weekend Rainbow Whatever (Camera 4) ' ,
' description ' : ' md5:e03b909557865076822aa169218d6a5d ' ,
' duration ' : 10990 ,
' upload_date ' : ' 20161111 ' ,
' uploader ' : ' Team PGP ' ,
' uploader_id ' : ' UChORY56LMMETTuGjXaJXvLg ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UChORY56LMMETTuGjXaJXvLg ' ,
2015-07-26 00:30:34 +09:00
} ,
} ] ,
' params ' : {
' skip_download ' : True ,
} ,
2023-02-24 11:48:37 +09:00
' skip ' : ' Not multifeed any more ' ,
2015-08-17 05:04:13 +09:00
} ,
2016-02-13 08:18:58 +09:00
{
2019-03-09 21:14:41 +09:00
# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
2016-02-13 08:18:58 +09:00
' url ' : ' https://www.youtube.com/watch?v=gVfLd0zydlo ' ,
' info_dict ' : {
' id ' : ' gVfLd0zydlo ' ,
' title ' : ' DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30 ' ,
} ,
' playlist_count ' : 2 ,
2023-02-24 11:48:37 +09:00
' skip ' : ' Not multifeed any more ' ,
2016-02-13 08:18:58 +09:00
} ,
2015-08-17 05:04:13 +09:00
{
2016-09-17 23:48:20 +09:00
' url ' : ' https://vid.plus/FlRa-iH7PGw ' ,
2015-08-17 05:04:13 +09:00
' only_matching ' : True ,
2015-11-22 21:49:33 +09:00
} ,
2016-04-04 05:26:20 +09:00
{
2016-09-17 23:48:20 +09:00
' url ' : ' https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html ' ,
2016-04-04 05:26:20 +09:00
' only_matching ' : True ,
} ,
2015-11-22 21:49:33 +09:00
{
2019-03-09 21:14:41 +09:00
# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
2016-01-19 02:19:38 +09:00
# Also tests cut-off URL expansion in video description (see
2019-03-09 21:14:41 +09:00
# https://github.com/ytdl-org/youtube-dl/issues/1892,
# https://github.com/ytdl-org/youtube-dl/issues/8164)
2015-11-22 21:49:33 +09:00
' url ' : ' https://www.youtube.com/watch?v=lsguqyKfVQg ' ,
' info_dict ' : {
' id ' : ' lsguqyKfVQg ' ,
' ext ' : ' mp4 ' ,
' title ' : ' { dark walk}; Loki/AC/Dishonored; collab w/Elflover21 ' ,
2023-02-09 03:16:51 +09:00
' alt_title ' : ' Dark Walk ' ,
2015-11-22 21:49:33 +09:00
' description ' : ' md5:8085699c11dc3f597ce0410b0dcbb34a ' ,
2017-01-26 23:43:14 +09:00
' duration ' : 133 ,
2015-11-22 21:49:33 +09:00
' upload_date ' : ' 20151119 ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @IronSoulElf ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/@IronSoulElf ' ,
2015-11-22 21:49:33 +09:00
' uploader ' : ' IronSoulElf ' ,
2023-02-09 03:16:51 +09:00
' creator ' : r ' re:Todd Haberman[;,] \ s+Daniel Law Heath and Aaron Kaplan ' ,
' track ' : ' Dark Walk ' ,
' artist ' : r ' re:Todd Haberman[;,] \ s+Daniel Law Heath and Aaron Kaplan ' ,
2019-04-29 01:37:46 +09:00
' album ' : ' Position Music - Production Music Vol. 143 - Dark Walk ' ,
2015-11-22 21:49:33 +09:00
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
2015-11-24 00:02:37 +09:00
{
2019-03-09 21:14:41 +09:00
# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
2015-11-24 00:02:37 +09:00
' url ' : ' https://www.youtube.com/watch?v=Ms7iBXnlUO8 ' ,
' only_matching ' : True ,
} ,
2015-11-28 09:07:07 +09:00
{
# Video with yt:stretch=17:0
' url ' : ' https://www.youtube.com/watch?v=Q39EVAstoRM ' ,
' info_dict ' : {
' id ' : ' Q39EVAstoRM ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Clash Of Clans#14 Dicas De Ataque Para CV 4 ' ,
' description ' : ' md5:ee18a25c350637c8faff806845bddee9 ' ,
' upload_date ' : ' 20151107 ' ,
' uploader_id ' : ' UCCr7TALkRbo3EtFzETQF1LA ' ,
' uploader ' : ' CH GAMER DROID ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
2016-06-25 00:47:19 +09:00
' skip ' : ' This video does not exist. ' ,
2015-11-28 09:07:07 +09:00
} ,
2021-04-17 04:27:54 +09:00
{
# Video with incomplete 'yt:stretch=16:'
' url ' : ' https://www.youtube.com/watch?v=FRhJzUSJbGI ' ,
' only_matching ' : True ,
} ,
2016-03-03 02:07:25 +09:00
{
# Video licensed under Creative Commons
' url ' : ' https://www.youtube.com/watch?v=M4gD1WSo5mA ' ,
' info_dict ' : {
' id ' : ' M4gD1WSo5mA ' ,
' ext ' : ' mp4 ' ,
' title ' : ' md5:e41008789470fc2533a3252216f1c1d1 ' ,
' description ' : ' md5:a677553cf0840649b731a3024aeff4cc ' ,
2017-01-26 23:43:14 +09:00
' duration ' : 721 ,
2016-03-03 02:07:25 +09:00
' upload_date ' : ' 20150127 ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @BKCHarvard ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/@BKCHarvard ' ,
2017-01-26 23:43:14 +09:00
' uploader ' : ' The Berkman Klein Center for Internet & Society ' ,
2016-03-03 02:07:25 +09:00
' license ' : ' Creative Commons Attribution license (reuse allowed) ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
2016-03-03 02:49:10 +09:00
{
# Channel-like uploader_url
' url ' : ' https://www.youtube.com/watch?v=eQcmzGIKrzg ' ,
' info_dict ' : {
' id ' : ' eQcmzGIKrzg ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Democratic Socialism and Foreign Policy | Bernie Sanders ' ,
2021-02-01 22:30:59 +09:00
' description ' : ' md5:13a2503d7b5904ef4b223aa101628f39 ' ,
2017-01-26 23:43:14 +09:00
' duration ' : 4060 ,
2016-03-03 02:49:10 +09:00
' upload_date ' : ' 20151119 ' ,
2018-06-03 04:23:45 +09:00
' uploader ' : ' Bernie Sanders ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @BernieSanders ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/@BernieSanders ' ,
2016-03-03 02:49:10 +09:00
' license ' : ' Creative Commons Attribution license (reuse allowed) ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
2015-11-30 00:01:59 +09:00
{
' url ' : ' https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY ' ,
' only_matching ' : True ,
2016-07-12 04:10:35 +09:00
} ,
{
2019-03-09 21:14:41 +09:00
# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
2016-07-12 04:10:35 +09:00
' url ' : ' https://www.youtube.com/watch?v=i1Ko8UG-Tdo ' ,
' only_matching ' : True ,
2016-09-03 03:17:15 +09:00
} ,
{
# Rental video preview
' url ' : ' https://www.youtube.com/watch?v=yYr8q0y5Jfg ' ,
' info_dict ' : {
' id ' : ' uGpuVWrhIzE ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Piku - Trailer ' ,
' description ' : ' md5:c36bd60c3fd6f1954086c083c72092eb ' ,
' upload_date ' : ' 20150811 ' ,
' uploader ' : ' FlixMatrix ' ,
' uploader_id ' : ' FlixMatrixKaravan ' ,
2017-01-02 21:08:07 +09:00
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/FlixMatrixKaravan ' ,
2016-09-03 03:17:15 +09:00
' license ' : ' Standard YouTube License ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
2018-06-03 04:23:45 +09:00
' skip ' : ' This video is not available. ' ,
2017-01-10 00:30:46 +09:00
} ,
2017-01-21 20:10:32 +09:00
{
# YouTube Red video with episode data
' url ' : ' https://www.youtube.com/watch?v=iqKdEhx-dD4 ' ,
' info_dict ' : {
' id ' : ' iqKdEhx-dD4 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Isolation - Mind Field (Ep 1) ' ,
2021-02-01 22:30:59 +09:00
' description ' : ' md5:f540112edec5d09fc8cc752d3d4ba3cd ' ,
2017-01-26 23:43:14 +09:00
' duration ' : 2085 ,
2017-01-21 20:10:32 +09:00
' upload_date ' : ' 20170118 ' ,
' uploader ' : ' Vsauce ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @Vsauce ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/@Vsauce ' ,
2017-01-21 20:10:32 +09:00
' series ' : ' Mind Field ' ,
' season_number ' : 1 ,
' episode_number ' : 1 ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
' expected_warnings ' : [
' Skipping DASH manifest ' ,
] ,
} ,
2017-08-26 17:38:38 +09:00
{
# The following content has been identified by the YouTube community
# as inappropriate or offensive to some audiences.
' url ' : ' https://www.youtube.com/watch?v=6SJNVb0GnPI ' ,
' info_dict ' : {
' id ' : ' 6SJNVb0GnPI ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Race Differences in Intelligence ' ,
' description ' : ' md5:5d161533167390427a1f8ee89a1fc6f1 ' ,
' duration ' : 965 ,
' upload_date ' : ' 20140124 ' ,
' uploader ' : ' New Century Foundation ' ,
' uploader_id ' : ' UCEJYpZGqgUob0zVVEaLhvVg ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UCEJYpZGqgUob0zVVEaLhvVg ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
2021-02-01 22:30:59 +09:00
' skip ' : ' This video has been removed for violating YouTube \' s policy on hate speech. ' ,
2017-08-26 17:38:38 +09:00
} ,
2017-01-10 00:30:46 +09:00
{
# itag 212
' url ' : ' 1t24XAntNCY ' ,
' only_matching ' : True ,
2017-02-26 18:51:21 +09:00
} ,
{
# geo restricted to JP
' url ' : ' sJL6WA-aGkQ ' ,
' only_matching ' : True ,
} ,
2018-09-24 02:14:49 +09:00
{
' url ' : ' https://invidio.us/watch?v=BaW_jenozKc ' ,
' only_matching ' : True ,
2021-02-18 06:29:32 +09:00
} ,
{
' url ' : ' https://redirect.invidious.io/watch?v=BaW_jenozKc ' ,
' only_matching ' : True ,
} ,
{
# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
' url ' : ' https://redirect.invidious.io/Yh0AhrY9GjA ' ,
' only_matching ' : True ,
2018-09-24 02:14:49 +09:00
} ,
2018-12-26 17:30:48 +09:00
{
# DRM protected
' url ' : ' https://www.youtube.com/watch?v=s7_qI6_mIXc ' ,
' only_matching ' : True ,
2019-01-16 04:18:27 +09:00
} ,
{
# Video with unsupported adaptive stream type formats
' url ' : ' https://www.youtube.com/watch?v=Z4Vy8R84T1U ' ,
' info_dict ' : {
' id ' : ' Z4Vy8R84T1U ' ,
' ext ' : ' mp4 ' ,
' title ' : ' saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta ' ,
' description ' : ' md5:d41d8cd98f00b204e9800998ecf8427e ' ,
' duration ' : 433 ,
' upload_date ' : ' 20130923 ' ,
' uploader ' : ' Amelia Putri Harwita ' ,
' uploader_id ' : ' UCpOxM49HJxmC1qCalXyB3_Q ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UCpOxM49HJxmC1qCalXyB3_Q ' ,
' formats ' : ' maxcount:10 ' ,
} ,
' params ' : {
' skip_download ' : True ,
' youtube_include_dash_manifest ' : False ,
} ,
2023-02-24 11:48:37 +09:00
' skip ' : ' not actual any more ' ,
2019-04-22 13:26:48 +09:00
} ,
{
2019-04-27 17:16:17 +09:00
# Youtube Music Auto-generated description
2019-04-22 13:26:48 +09:00
' url ' : ' https://music.youtube.com/watch?v=MgNrAu2pzNs ' ,
' info_dict ' : {
' id ' : ' MgNrAu2pzNs ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Voyeur Girl ' ,
' description ' : ' md5:7ae382a65843d6df2685993e90a8628f ' ,
' upload_date ' : ' 20190312 ' ,
2020-03-06 02:05:50 +09:00
' uploader ' : ' Stephen - Topic ' ,
' uploader_id ' : ' UC-pWHpBjdGG69N9mM2auIAA ' ,
2019-04-22 13:26:48 +09:00
' artist ' : ' Stephen ' ,
' track ' : ' Voyeur Girl ' ,
' album ' : ' it \' s too much love to know my dear ' ,
' release_date ' : ' 20190313 ' ,
' release_year ' : 2019 ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
2019-12-01 01:51:34 +09:00
{
' url ' : ' https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q ' ,
' only_matching ' : True ,
} ,
2020-05-01 02:40:38 +09:00
{
# invalid -> valid video id redirection
' url ' : ' DJztXj2GPfl ' ,
' info_dict ' : {
' id ' : ' DJztXj2GPfk ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack) ' ,
' description ' : ' md5:bf577a41da97918e94fa9798d9228825 ' ,
' upload_date ' : ' 20090125 ' ,
' uploader ' : ' Prochorowka ' ,
' uploader_id ' : ' Prochorowka ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/Prochorowka ' ,
' artist ' : ' Panjabi MC ' ,
' track ' : ' Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix ' ,
' album ' : ' Beware of the Boys (Mundian To Bach Ke) ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
2021-02-01 22:30:59 +09:00
' skip ' : ' Video unavailable ' ,
2020-09-13 23:23:21 +09:00
} ,
{
# empty description results in an empty string
' url ' : ' https://www.youtube.com/watch?v=x41yOUIvK2k ' ,
' info_dict ' : {
' id ' : ' x41yOUIvK2k ' ,
' ext ' : ' mp4 ' ,
' title ' : ' IMG 3456 ' ,
' description ' : ' ' ,
' upload_date ' : ' 20170613 ' ,
2024-01-16 03:38:43 +09:00
' uploader ' : " l ' Or Vert asbl " ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @ElevageOrVert ' ,
2020-09-13 23:23:21 +09:00
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
2020-11-21 01:21:52 +09:00
{
2020-11-28 17:02:31 +09:00
# with '};' inside yt initial data (see [1])
# see [2] for an example with '};' inside ytInitialPlayerResponse
# 1. https://github.com/ytdl-org/youtube-dl/issues/27093
# 2. https://github.com/ytdl-org/youtube-dl/issues/27216
2020-11-21 01:21:52 +09:00
' url ' : ' https://www.youtube.com/watch?v=CHqg6qOn4no ' ,
' info_dict ' : {
' id ' : ' CHqg6qOn4no ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Part 77 Sort a list of simple types in c# ' ,
' description ' : ' md5:b8746fa52e10cdbf47997903f13b20dc ' ,
' upload_date ' : ' 20130831 ' ,
' uploader ' : ' kudvenkat ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @Csharp-video-tutorialsBlogspot ' ,
2020-11-21 01:21:52 +09:00
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
2020-12-29 04:29:34 +09:00
{
# another example of '};' in ytInitialData
' url ' : ' https://www.youtube.com/watch?v=gVfgbahppCY ' ,
' only_matching ' : True ,
} ,
2020-12-29 04:19:43 +09:00
{
' url ' : ' https://www.youtube.com/watch_popup?v=63RmMXCd_bQ ' ,
' only_matching ' : True ,
} ,
2021-02-07 04:17:03 +09:00
{
# https://github.com/ytdl-org/youtube-dl/pull/28094
' url ' : ' OtqTfy26tG0 ' ,
' info_dict ' : {
' id ' : ' OtqTfy26tG0 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Burn Out ' ,
' description ' : ' md5:8d07b84dcbcbfb34bc12a56d968b6131 ' ,
' upload_date ' : ' 20141120 ' ,
' uploader ' : ' The Cinematic Orchestra - Topic ' ,
' uploader_id ' : ' UCIzsJBIyo8hhpFm1NK0uLgw ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UCIzsJBIyo8hhpFm1NK0uLgw ' ,
' artist ' : ' The Cinematic Orchestra ' ,
' track ' : ' Burn Out ' ,
' album ' : ' Every Day ' ,
' release_data ' : None ,
' release_year ' : None ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
2021-02-16 07:57:53 +09:00
{
# controversial video, only works with bpctr when authenticated with cookies
' url ' : ' https://www.youtube.com/watch?v=nGC3D_FkCmg ' ,
' only_matching ' : True ,
} ,
2021-04-07 05:34:43 +09:00
{
# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
' url ' : ' cBvYw8_A0vQ ' ,
' info_dict ' : {
' id ' : ' cBvYw8_A0vQ ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 4K Ueno Okachimachi Street Scenes 上野御徒町歩き ' ,
' description ' : ' md5:ea770e474b7cd6722b4c95b833c03630 ' ,
' upload_date ' : ' 20201120 ' ,
' uploader ' : ' Walk around Japan ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @walkaroundjapan7124 ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/@walkaroundjapan7124 ' ,
2021-04-07 05:34:43 +09:00
} ,
' params ' : {
' skip_download ' : True ,
2022-01-31 09:02:56 +09:00
} ,
} ,
{
# YT 'Shorts'
' url ' : ' https://youtube.com/shorts/4L2J27mJ3Dc ' ,
' info_dict ' : {
' id ' : ' 4L2J27mJ3Dc ' ,
' ext ' : ' mp4 ' ,
2023-02-24 11:48:37 +09:00
' title ' : ' Midwest Squid Game #Shorts ' ,
' description ' : ' md5:976512b8a29269b93bbd8a61edc45a6d ' ,
2022-01-31 09:02:56 +09:00
' upload_date ' : ' 20211025 ' ,
' uploader ' : ' Charlie Berens ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @CharlieBerens ' ,
2022-01-31 09:02:56 +09:00
} ,
' params ' : {
' skip_download ' : True ,
2021-04-07 05:34:43 +09:00
} ,
} ,
2013-06-28 02:13:11 +09:00
]
2021-02-02 00:36:19 +09:00
_formats = {
' 5 ' : { ' ext ' : ' flv ' , ' width ' : 400 , ' height ' : 240 , ' acodec ' : ' mp3 ' , ' abr ' : 64 , ' vcodec ' : ' h263 ' } ,
' 6 ' : { ' ext ' : ' flv ' , ' width ' : 450 , ' height ' : 270 , ' acodec ' : ' mp3 ' , ' abr ' : 64 , ' vcodec ' : ' h263 ' } ,
' 13 ' : { ' ext ' : ' 3gp ' , ' acodec ' : ' aac ' , ' vcodec ' : ' mp4v ' } ,
' 17 ' : { ' ext ' : ' 3gp ' , ' width ' : 176 , ' height ' : 144 , ' acodec ' : ' aac ' , ' abr ' : 24 , ' vcodec ' : ' mp4v ' } ,
' 18 ' : { ' ext ' : ' mp4 ' , ' width ' : 640 , ' height ' : 360 , ' acodec ' : ' aac ' , ' abr ' : 96 , ' vcodec ' : ' h264 ' } ,
' 22 ' : { ' ext ' : ' mp4 ' , ' width ' : 1280 , ' height ' : 720 , ' acodec ' : ' aac ' , ' abr ' : 192 , ' vcodec ' : ' h264 ' } ,
' 34 ' : { ' ext ' : ' flv ' , ' width ' : 640 , ' height ' : 360 , ' acodec ' : ' aac ' , ' abr ' : 128 , ' vcodec ' : ' h264 ' } ,
' 35 ' : { ' ext ' : ' flv ' , ' width ' : 854 , ' height ' : 480 , ' acodec ' : ' aac ' , ' abr ' : 128 , ' vcodec ' : ' h264 ' } ,
# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
' 36 ' : { ' ext ' : ' 3gp ' , ' width ' : 320 , ' acodec ' : ' aac ' , ' vcodec ' : ' mp4v ' } ,
' 37 ' : { ' ext ' : ' mp4 ' , ' width ' : 1920 , ' height ' : 1080 , ' acodec ' : ' aac ' , ' abr ' : 192 , ' vcodec ' : ' h264 ' } ,
' 38 ' : { ' ext ' : ' mp4 ' , ' width ' : 4096 , ' height ' : 3072 , ' acodec ' : ' aac ' , ' abr ' : 192 , ' vcodec ' : ' h264 ' } ,
' 43 ' : { ' ext ' : ' webm ' , ' width ' : 640 , ' height ' : 360 , ' acodec ' : ' vorbis ' , ' abr ' : 128 , ' vcodec ' : ' vp8 ' } ,
' 44 ' : { ' ext ' : ' webm ' , ' width ' : 854 , ' height ' : 480 , ' acodec ' : ' vorbis ' , ' abr ' : 128 , ' vcodec ' : ' vp8 ' } ,
' 45 ' : { ' ext ' : ' webm ' , ' width ' : 1280 , ' height ' : 720 , ' acodec ' : ' vorbis ' , ' abr ' : 192 , ' vcodec ' : ' vp8 ' } ,
' 46 ' : { ' ext ' : ' webm ' , ' width ' : 1920 , ' height ' : 1080 , ' acodec ' : ' vorbis ' , ' abr ' : 192 , ' vcodec ' : ' vp8 ' } ,
' 59 ' : { ' ext ' : ' mp4 ' , ' width ' : 854 , ' height ' : 480 , ' acodec ' : ' aac ' , ' abr ' : 128 , ' vcodec ' : ' h264 ' } ,
' 78 ' : { ' ext ' : ' mp4 ' , ' width ' : 854 , ' height ' : 480 , ' acodec ' : ' aac ' , ' abr ' : 128 , ' vcodec ' : ' h264 ' } ,
# 3D videos
' 82 ' : { ' ext ' : ' mp4 ' , ' height ' : 360 , ' format_note ' : ' 3D ' , ' acodec ' : ' aac ' , ' abr ' : 128 , ' vcodec ' : ' h264 ' , ' preference ' : - 20 } ,
' 83 ' : { ' ext ' : ' mp4 ' , ' height ' : 480 , ' format_note ' : ' 3D ' , ' acodec ' : ' aac ' , ' abr ' : 128 , ' vcodec ' : ' h264 ' , ' preference ' : - 20 } ,
' 84 ' : { ' ext ' : ' mp4 ' , ' height ' : 720 , ' format_note ' : ' 3D ' , ' acodec ' : ' aac ' , ' abr ' : 192 , ' vcodec ' : ' h264 ' , ' preference ' : - 20 } ,
' 85 ' : { ' ext ' : ' mp4 ' , ' height ' : 1080 , ' format_note ' : ' 3D ' , ' acodec ' : ' aac ' , ' abr ' : 192 , ' vcodec ' : ' h264 ' , ' preference ' : - 20 } ,
' 100 ' : { ' ext ' : ' webm ' , ' height ' : 360 , ' format_note ' : ' 3D ' , ' acodec ' : ' vorbis ' , ' abr ' : 128 , ' vcodec ' : ' vp8 ' , ' preference ' : - 20 } ,
' 101 ' : { ' ext ' : ' webm ' , ' height ' : 480 , ' format_note ' : ' 3D ' , ' acodec ' : ' vorbis ' , ' abr ' : 192 , ' vcodec ' : ' vp8 ' , ' preference ' : - 20 } ,
' 102 ' : { ' ext ' : ' webm ' , ' height ' : 720 , ' format_note ' : ' 3D ' , ' acodec ' : ' vorbis ' , ' abr ' : 192 , ' vcodec ' : ' vp8 ' , ' preference ' : - 20 } ,
# Apple HTTP Live Streaming
' 91 ' : { ' ext ' : ' mp4 ' , ' height ' : 144 , ' format_note ' : ' HLS ' , ' acodec ' : ' aac ' , ' abr ' : 48 , ' vcodec ' : ' h264 ' , ' preference ' : - 10 } ,
' 92 ' : { ' ext ' : ' mp4 ' , ' height ' : 240 , ' format_note ' : ' HLS ' , ' acodec ' : ' aac ' , ' abr ' : 48 , ' vcodec ' : ' h264 ' , ' preference ' : - 10 } ,
' 93 ' : { ' ext ' : ' mp4 ' , ' height ' : 360 , ' format_note ' : ' HLS ' , ' acodec ' : ' aac ' , ' abr ' : 128 , ' vcodec ' : ' h264 ' , ' preference ' : - 10 } ,
' 94 ' : { ' ext ' : ' mp4 ' , ' height ' : 480 , ' format_note ' : ' HLS ' , ' acodec ' : ' aac ' , ' abr ' : 128 , ' vcodec ' : ' h264 ' , ' preference ' : - 10 } ,
' 95 ' : { ' ext ' : ' mp4 ' , ' height ' : 720 , ' format_note ' : ' HLS ' , ' acodec ' : ' aac ' , ' abr ' : 256 , ' vcodec ' : ' h264 ' , ' preference ' : - 10 } ,
' 96 ' : { ' ext ' : ' mp4 ' , ' height ' : 1080 , ' format_note ' : ' HLS ' , ' acodec ' : ' aac ' , ' abr ' : 256 , ' vcodec ' : ' h264 ' , ' preference ' : - 10 } ,
' 132 ' : { ' ext ' : ' mp4 ' , ' height ' : 240 , ' format_note ' : ' HLS ' , ' acodec ' : ' aac ' , ' abr ' : 48 , ' vcodec ' : ' h264 ' , ' preference ' : - 10 } ,
' 151 ' : { ' ext ' : ' mp4 ' , ' height ' : 72 , ' format_note ' : ' HLS ' , ' acodec ' : ' aac ' , ' abr ' : 24 , ' vcodec ' : ' h264 ' , ' preference ' : - 10 } ,
# DASH mp4 video
' 133 ' : { ' ext ' : ' mp4 ' , ' height ' : 240 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
' 134 ' : { ' ext ' : ' mp4 ' , ' height ' : 360 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
' 135 ' : { ' ext ' : ' mp4 ' , ' height ' : 480 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
' 136 ' : { ' ext ' : ' mp4 ' , ' height ' : 720 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
' 137 ' : { ' ext ' : ' mp4 ' , ' height ' : 1080 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
' 138 ' : { ' ext ' : ' mp4 ' , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } , # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
' 160 ' : { ' ext ' : ' mp4 ' , ' height ' : 144 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
' 212 ' : { ' ext ' : ' mp4 ' , ' height ' : 480 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
' 264 ' : { ' ext ' : ' mp4 ' , ' height ' : 1440 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
' 298 ' : { ' ext ' : ' mp4 ' , ' height ' : 720 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' , ' fps ' : 60 } ,
' 299 ' : { ' ext ' : ' mp4 ' , ' height ' : 1080 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' , ' fps ' : 60 } ,
' 266 ' : { ' ext ' : ' mp4 ' , ' height ' : 2160 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' h264 ' } ,
# Dash mp4 audio
' 139 ' : { ' ext ' : ' m4a ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' aac ' , ' abr ' : 48 , ' container ' : ' m4a_dash ' } ,
' 140 ' : { ' ext ' : ' m4a ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' aac ' , ' abr ' : 128 , ' container ' : ' m4a_dash ' } ,
' 141 ' : { ' ext ' : ' m4a ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' aac ' , ' abr ' : 256 , ' container ' : ' m4a_dash ' } ,
' 256 ' : { ' ext ' : ' m4a ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' aac ' , ' container ' : ' m4a_dash ' } ,
' 258 ' : { ' ext ' : ' m4a ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' aac ' , ' container ' : ' m4a_dash ' } ,
' 325 ' : { ' ext ' : ' m4a ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' dtse ' , ' container ' : ' m4a_dash ' } ,
' 328 ' : { ' ext ' : ' m4a ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' ec-3 ' , ' container ' : ' m4a_dash ' } ,
# Dash webm
' 167 ' : { ' ext ' : ' webm ' , ' height ' : 360 , ' width ' : 640 , ' format_note ' : ' DASH video ' , ' container ' : ' webm ' , ' vcodec ' : ' vp8 ' } ,
' 168 ' : { ' ext ' : ' webm ' , ' height ' : 480 , ' width ' : 854 , ' format_note ' : ' DASH video ' , ' container ' : ' webm ' , ' vcodec ' : ' vp8 ' } ,
' 169 ' : { ' ext ' : ' webm ' , ' height ' : 720 , ' width ' : 1280 , ' format_note ' : ' DASH video ' , ' container ' : ' webm ' , ' vcodec ' : ' vp8 ' } ,
' 170 ' : { ' ext ' : ' webm ' , ' height ' : 1080 , ' width ' : 1920 , ' format_note ' : ' DASH video ' , ' container ' : ' webm ' , ' vcodec ' : ' vp8 ' } ,
' 218 ' : { ' ext ' : ' webm ' , ' height ' : 480 , ' width ' : 854 , ' format_note ' : ' DASH video ' , ' container ' : ' webm ' , ' vcodec ' : ' vp8 ' } ,
' 219 ' : { ' ext ' : ' webm ' , ' height ' : 480 , ' width ' : 854 , ' format_note ' : ' DASH video ' , ' container ' : ' webm ' , ' vcodec ' : ' vp8 ' } ,
' 278 ' : { ' ext ' : ' webm ' , ' height ' : 144 , ' format_note ' : ' DASH video ' , ' container ' : ' webm ' , ' vcodec ' : ' vp9 ' } ,
' 242 ' : { ' ext ' : ' webm ' , ' height ' : 240 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 243 ' : { ' ext ' : ' webm ' , ' height ' : 360 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 244 ' : { ' ext ' : ' webm ' , ' height ' : 480 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 245 ' : { ' ext ' : ' webm ' , ' height ' : 480 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 246 ' : { ' ext ' : ' webm ' , ' height ' : 480 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 247 ' : { ' ext ' : ' webm ' , ' height ' : 720 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 248 ' : { ' ext ' : ' webm ' , ' height ' : 1080 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 271 ' : { ' ext ' : ' webm ' , ' height ' : 1440 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
' 272 ' : { ' ext ' : ' webm ' , ' height ' : 2160 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 302 ' : { ' ext ' : ' webm ' , ' height ' : 720 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' , ' fps ' : 60 } ,
' 303 ' : { ' ext ' : ' webm ' , ' height ' : 1080 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' , ' fps ' : 60 } ,
' 308 ' : { ' ext ' : ' webm ' , ' height ' : 1440 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' , ' fps ' : 60 } ,
' 313 ' : { ' ext ' : ' webm ' , ' height ' : 2160 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' } ,
' 315 ' : { ' ext ' : ' webm ' , ' height ' : 2160 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' vp9 ' , ' fps ' : 60 } ,
# Dash webm audio
' 171 ' : { ' ext ' : ' webm ' , ' acodec ' : ' vorbis ' , ' format_note ' : ' DASH audio ' , ' abr ' : 128 } ,
' 172 ' : { ' ext ' : ' webm ' , ' acodec ' : ' vorbis ' , ' format_note ' : ' DASH audio ' , ' abr ' : 256 } ,
# Dash webm audio with opus inside
' 249 ' : { ' ext ' : ' webm ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' opus ' , ' abr ' : 50 } ,
' 250 ' : { ' ext ' : ' webm ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' opus ' , ' abr ' : 70 } ,
' 251 ' : { ' ext ' : ' webm ' , ' format_note ' : ' DASH audio ' , ' acodec ' : ' opus ' , ' abr ' : 160 } ,
# RTMP (unnamed)
' _rtmp ' : { ' protocol ' : ' rtmp ' } ,
# av01 video only formats sometimes served with "unknown" codecs
' 394 ' : { ' acodec ' : ' none ' , ' vcodec ' : ' av01.0.05M.08 ' } ,
' 395 ' : { ' acodec ' : ' none ' , ' vcodec ' : ' av01.0.05M.08 ' } ,
' 396 ' : { ' acodec ' : ' none ' , ' vcodec ' : ' av01.0.05M.08 ' } ,
' 397 ' : { ' acodec ' : ' none ' , ' vcodec ' : ' av01.0.05M.08 ' } ,
}
2013-06-28 02:13:11 +09:00
2021-04-17 02:07:32 +09:00
@classmethod
def suitable ( cls , url ) :
2023-02-07 01:19:21 +09:00
if parse_qs ( url ) . get ( ' list ' , [ None ] ) [ 0 ] :
2021-04-17 02:07:32 +09:00
return False
return super ( YoutubeIE , cls ) . suitable ( url )
2013-09-21 21:19:30 +09:00
def __init__ ( self , * args , * * kwargs ) :
super ( YoutubeIE , self ) . __init__ ( * args , * * kwargs )
2021-02-01 22:30:59 +09:00
self . _code_cache = { }
2013-09-21 22:19:48 +09:00
self . _player_cache = { }
2013-09-21 21:19:30 +09:00
2024-01-16 03:34:21 +09:00
# *ytcfgs, webpage=None
def _extract_player_url ( self , * ytcfgs , * * kw_webpage ) :
if ytcfgs and not isinstance ( ytcfgs [ 0 ] , dict ) :
webpage = kw_webpage . get ( ' webpage ' ) or ytcfgs [ 0 ]
if webpage :
player_url = self . _search_regex (
r ' " (?:PLAYER_JS_URL|jsUrl) " \ s*: \ s* " ([^ " ]+) " ' ,
webpage or ' ' , ' player URL ' , fatal = False )
if player_url :
ytcfgs = ytcfgs + ( { ' PLAYER_JS_URL ' : player_url } , )
return traverse_obj (
ytcfgs , ( Ellipsis , ' PLAYER_JS_URL ' ) , ( Ellipsis , ' WEB_PLAYER_CONTEXT_CONFIGS ' , Ellipsis , ' jsUrl ' ) ,
get_all = False , expected_type = lambda u : urljoin ( ' https://www.youtube.com ' , u ) )
def _download_player_url ( self , video_id , fatal = False ) :
res = self . _download_webpage (
' https://www.youtube.com/iframe_api ' ,
note = ' Downloading iframe API JS ' , video_id = video_id , fatal = fatal )
player_version = self . _search_regex (
r ' player \\ ?/([0-9a-fA-F] {8} ) \\ ?/ ' , res or ' ' , ' player version ' , fatal = fatal ,
default = NO_DEFAULT if res else None )
if player_version :
return ' https://www.youtube.com/s/player/ {0} /player_ias.vflset/en_US/base.js ' . format ( player_version )
2014-08-02 19:21:53 +09:00
def _signature_cache_id ( self , example_sig ) :
""" Return a string representation of a signature """
2014-09-13 14:51:06 +09:00
return ' . ' . join ( compat_str ( len ( part ) ) for part in example_sig . split ( ' . ' ) )
2014-08-02 19:21:53 +09:00
2020-05-02 09:18:08 +09:00
@classmethod
def _extract_player_info ( cls , player_url ) :
for player_re in cls . _PLAYER_INFO_RE :
id_m = re . search ( player_re , player_url )
if id_m :
break
else :
2014-07-23 09:19:33 +09:00
raise ExtractorError ( ' Cannot identify player %r ' % player_url )
2021-02-01 22:30:59 +09:00
return id_m . group ( ' id ' )
2020-05-02 09:18:08 +09:00
2024-01-16 03:34:21 +09:00
def _load_player ( self , video_id , player_url , fatal = True , player_id = None ) :
Implement n-param descrambling using JSInterp
Fixes #29326, closes #29790, closes #30004, closes #30024, closes #30052,
closes #30088, closes #30097, closes #30102, closes #30109, closes #30119,
closes #30125, closes #30128, closes #30162, closes #30173, closes #30186,
closes #30192, closes #30221, closes #30239, closes #30539, closes #30552.
2021-11-01 13:45:42 +09:00
if not player_id :
player_id = self . _extract_player_info ( player_url )
if player_id not in self . _code_cache :
2024-01-16 03:34:21 +09:00
code = self . _download_webpage (
player_url , video_id , fatal = fatal ,
Implement n-param descrambling using JSInterp
Fixes #29326, closes #29790, closes #30004, closes #30024, closes #30052,
closes #30088, closes #30097, closes #30102, closes #30109, closes #30119,
closes #30125, closes #30128, closes #30162, closes #30173, closes #30186,
closes #30192, closes #30221, closes #30239, closes #30539, closes #30552.
2021-11-01 13:45:42 +09:00
note = ' Downloading player ' + player_id ,
errnote = ' Download of %s failed ' % player_url )
2024-01-16 03:34:21 +09:00
if code :
self . _code_cache [ player_id ] = code
return self . _code_cache [ player_id ] if fatal else self . _code_cache . get ( player_id )
Implement n-param descrambling using JSInterp
Fixes #29326, closes #29790, closes #30004, closes #30024, closes #30052,
closes #30088, closes #30097, closes #30102, closes #30109, closes #30119,
closes #30125, closes #30128, closes #30162, closes #30173, closes #30186,
closes #30192, closes #30221, closes #30239, closes #30539, closes #30552.
2021-11-01 13:45:42 +09:00
2020-05-02 09:18:08 +09:00
def _extract_signature_function ( self , video_id , player_url , example_sig ) :
2021-02-01 22:30:59 +09:00
player_id = self . _extract_player_info ( player_url )
2013-09-21 21:19:30 +09:00
2013-09-22 07:35:03 +09:00
# Read from filesystem cache
2024-01-16 03:34:21 +09:00
func_id = ' js_ {0} _ {1} ' . format (
2021-02-01 22:30:59 +09:00
player_id , self . _signature_cache_id ( example_sig ) )
2013-09-22 07:35:03 +09:00
assert os . path . basename ( func_id ) == func_id
2014-09-03 19:41:05 +09:00
2024-01-16 03:34:21 +09:00
self . write_debug ( ' Extracting signature function {0} ' . format ( func_id ) )
cache_spec , code = self . cache . load ( ' youtube-sigfuncs ' , func_id ) , None
2013-09-21 22:19:48 +09:00
2024-01-16 03:34:21 +09:00
if not cache_spec :
code = self . _load_player ( video_id , player_url , player_id )
if code :
res = self . _parse_sig_js ( code )
test_string = ' ' . join ( map ( compat_chr , range ( len ( example_sig ) ) ) )
cache_spec = [ ord ( c ) for c in res ( test_string ) ]
self . cache . store ( ' youtube-sigfuncs ' , func_id , cache_spec )
2013-09-21 21:19:30 +09:00
2024-01-16 03:34:21 +09:00
return lambda s : ' ' . join ( s [ i ] for i in cache_spec )
2013-09-21 22:19:48 +09:00
2014-08-02 19:21:53 +09:00
def _print_sig_code ( self , func , example_sig ) :
2024-01-16 03:34:21 +09:00
if not self . get_param ( ' youtube_print_sig_code ' ) :
return
2013-09-22 17:30:02 +09:00
def gen_sig_code ( idxs ) :
def _genslice ( start , end , step ) :
2014-09-13 14:51:06 +09:00
starts = ' ' if start == 0 else str ( start )
2014-11-24 05:20:46 +09:00
ends = ( ' : %d ' % ( end + step ) ) if end + step > = 0 else ' : '
2014-09-24 16:51:45 +09:00
steps = ' ' if step == 1 else ( ' : %d ' % step )
2024-01-16 03:34:21 +09:00
return ' s[ {0} {1} {2} ] ' . format ( starts , ends , steps )
2013-09-22 17:30:02 +09:00
step = None
2014-12-17 08:06:41 +09:00
# Quelch pyflakes warnings - start will be set when step is set
start = ' (Never used) '
2013-09-22 17:30:02 +09:00
for i , prev in zip ( idxs [ 1 : ] , idxs [ : - 1 ] ) :
if step is not None :
if i - prev == step :
continue
yield _genslice ( start , prev , step )
step = None
continue
if i - prev in [ - 1 , 1 ] :
step = i - prev
start = prev
continue
else :
2014-09-13 14:51:06 +09:00
yield ' s[ %d ] ' % prev
2013-09-22 17:30:02 +09:00
if step is None :
2014-09-13 14:51:06 +09:00
yield ' s[ %d ] ' % i
2013-09-22 17:30:02 +09:00
else :
yield _genslice ( start , i , step )
2014-09-13 14:51:06 +09:00
test_string = ' ' . join ( map ( compat_chr , range ( len ( example_sig ) ) ) )
2013-09-22 19:18:16 +09:00
cache_res = func ( test_string )
2013-09-22 17:30:02 +09:00
cache_spec = [ ord ( c ) for c in cache_res ]
2014-09-13 14:51:06 +09:00
expr_code = ' + ' . join ( gen_sig_code ( cache_spec ) )
2014-08-02 19:21:53 +09:00
signature_id_tuple = ' ( %s ) ' % (
' , ' . join ( compat_str ( len ( p ) ) for p in example_sig . split ( ' . ' ) ) )
2014-09-24 16:51:45 +09:00
code = ( ' if tuple(len(p) for p in s.split( \' . \' )) == %s : \n '
2014-09-13 14:51:06 +09:00
' return %s \n ' ) % ( signature_id_tuple , expr_code )
2014-09-24 16:51:45 +09:00
self . to_screen ( ' Extracted signature function: \n ' + code )
2013-09-22 17:30:02 +09:00
2013-09-21 21:19:30 +09:00
def _parse_sig_js ( self , jscode ) :
2024-12-07 12:39:44 +09:00
# Examples where `sig` is funcname:
# sig=function(a){a=a.split(""); ... ;return a.join("")};
# ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
# {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))}
# sig=function(J){J=J.split(""); ... ;return J.join("")};
# ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
# {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
2013-09-21 21:19:30 +09:00
funcname = self . _search_regex (
2024-12-13 21:16:31 +09:00
( r ' \ b(?P<var>[ \ w$]+)&& \ ((?P=var)=(?P<sig>[ \ w$] { 2,}) \ (decodeURIComponent \ ((?P=var) \ ) \ ) ' ,
r ' (?P<sig>[ \ w$]+) \ s*= \ s*function \ ( \ s*(?P<arg>[ \ w$]+) \ s* \ ) \ s* { \ s*(?P=arg) \ s*= \ s*(?P=arg) \ .split \ ( \ s* " " \ s* \ ) \ s*; \ s*[^}]+; \ s*return \ s+(?P=arg) \ .join \ ( \ s* " " \ s* \ ) ' ,
r ' (?: \ b|[^ \ w$])(?P<sig>[ \ w$] { 2,}) \ s*= \ s*function \ ( \ s*a \ s* \ ) \ s* { \ s*a \ s*= \ s*a \ .split \ ( \ s* " " \ s* \ )(?:;[ \ w$] {2} \ .[ \ w$] {2} \ (a, \ d+ \ ))? ' ,
2024-12-07 12:39:44 +09:00
# Old patterns
2024-12-13 21:16:31 +09:00
r ' \ b[cs] \ s*&& \ s*[adf] \ .set \ ([^,]+ \ s*, \ s*encodeURIComponent \ s* \ ( \ s*(?P<sig>[ \ w$]+) \ ( ' ,
r ' \ b[ \ w]+ \ s*&& \ s*[ \ w]+ \ .set \ ([^,]+ \ s*, \ s*encodeURIComponent \ s* \ ( \ s*(?P<sig>[ \ w$]+) \ ( ' ,
r ' \ bm=(?P<sig>[ \ w$] { 2,}) \ (decodeURIComponent \ (h \ .s \ ) \ ) ' ,
2019-06-22 00:58:42 +09:00
# Obsolete patterns
2024-12-13 21:16:31 +09:00
r ' ( " | \' )signature \ 1 \ s*, \ s*(?P<sig>[ \ w$]+) \ ( ' ,
r ' \ .sig \ | \ |(?P<sig>[ \ w$]+) \ ( ' ,
r ' yt \ .akamaized \ .net/ \ ) \ s* \ | \ | \ s*.*? \ s*[cs] \ s*&& \ s*[adf] \ .set \ ([^,]+ \ s*, \ s*(?:encodeURIComponent \ s* \ ()? \ s*(?P<sig>[ \ w$]+) \ ( ' ,
r ' \ b[cs] \ s*&& \ s*[adf] \ .set \ ([^,]+ \ s*, \ s*(?P<sig>[ \ w$]+) \ ( ' ,
r ' \ bc \ s*&& \ s*[ \ w]+ \ .set \ ([^,]+ \ s*, \ s* \ ([^)]* \ ) \ s* \ ( \ s*(?P<sig>[ \ w$]+) \ ( ' ) ,
2017-02-01 00:19:29 +09:00
jscode , ' Initial JS player signature function name ' , group = ' sig ' )
2014-03-30 14:02:58 +09:00
2023-06-18 08:52:18 +09:00
jsi = JSInterpreter ( jscode )
2014-03-30 14:02:58 +09:00
initial_function = jsi . extract_function ( funcname )
2013-09-21 21:19:30 +09:00
return lambda s : initial_function ( [ s ] )
2024-01-16 03:34:21 +09:00
def _cached ( self , func , * cache_id ) :
def inner ( * args , * * kwargs ) :
if cache_id not in self . _player_cache :
try :
self . _player_cache [ cache_id ] = func ( * args , * * kwargs )
except ExtractorError as e :
self . _player_cache [ cache_id ] = e
except Exception as e :
self . _player_cache [ cache_id ] = ExtractorError ( traceback . format_exc ( ) , cause = e )
ret = self . _player_cache [ cache_id ]
if isinstance ( ret , Exception ) :
raise ret
return ret
return inner
2021-02-01 22:30:59 +09:00
def _decrypt_signature ( self , s , video_id , player_url ) :
2013-06-28 05:20:50 +09:00
""" Turn the encrypted s field into a working signature """
2024-01-16 03:34:21 +09:00
extract_sig = self . _cached (
self . _extract_signature_function , ' sig ' , player_url , self . _signature_cache_id ( s ) )
func = extract_sig ( video_id , player_url , s )
self . _print_sig_code ( func , s )
return func ( s )
Implement n-param descrambling using JSInterp
Fixes #29326, closes #29790, closes #30004, closes #30024, closes #30052,
closes #30088, closes #30097, closes #30102, closes #30109, closes #30119,
closes #30125, closes #30128, closes #30162, closes #30173, closes #30186,
closes #30192, closes #30221, closes #30239, closes #30539, closes #30552.
2021-11-01 13:45:42 +09:00
# from yt-dlp
# See also:
# 1. https://github.com/ytdl-org/youtube-dl/issues/29326#issuecomment-894619419
# 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
# 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
2024-01-16 03:34:21 +09:00
def _decrypt_nsig ( self , n , video_id , player_url ) :
""" Turn the encrypted n field into a working signature """
if player_url is None :
raise ExtractorError ( ' Cannot decrypt nsig without player_url ' )
try :
jsi , player_id , func_code = self . _extract_n_function_code ( video_id , player_url )
except ExtractorError as e :
2024-07-11 02:20:59 +09:00
raise ExtractorError ( ' Unable to extract nsig function code ' , cause = e )
2024-01-16 03:34:21 +09:00
if self . get_param ( ' youtube_print_sig_code ' ) :
self . to_screen ( ' Extracted nsig function from {0} : \n {1} \n ' . format (
player_id , func_code [ 1 ] ) )
try :
extract_nsig = self . _cached ( self . _extract_n_function_from_code , ' nsig func ' , player_url )
ret = extract_nsig ( jsi , func_code ) ( n )
except JSInterpreter . Exception as e :
self . report_warning (
' %s ( %s %s ) ' % (
2024-07-24 22:33:34 +09:00
' Unable to decode n-parameter: expect download to be blocked or throttled ' ,
2024-01-16 03:34:21 +09:00
error_to_compat_str ( e ) ,
2024-03-24 00:30:13 +09:00
traceback . format_exc ( ) ) ,
video_id = video_id )
2024-01-16 03:34:21 +09:00
return
self . write_debug ( ' Decrypted nsig {0} => {1} ' . format ( n , ret ) )
return ret
Implement n-param descrambling using JSInterp
Fixes #29326, closes #29790, closes #30004, closes #30024, closes #30052,
closes #30088, closes #30097, closes #30102, closes #30109, closes #30119,
closes #30125, closes #30128, closes #30162, closes #30173, closes #30186,
closes #30192, closes #30221, closes #30239, closes #30539, closes #30552.
2021-11-01 13:45:42 +09:00
def _extract_n_function_name ( self , jscode ) :
2024-01-16 03:34:21 +09:00
func_name , idx = self . _search_regex (
2024-12-13 12:09:29 +09:00
# (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}};
2024-12-07 12:39:44 +09:00
# (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
2024-12-13 12:09:29 +09:00
# or: (b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
# or: (b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c)
# or: (PL(a),b=a.j.n||null)&&(b=narray[idx](b)
2024-08-07 04:59:09 +09:00
# or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
2024-12-13 12:09:29 +09:00
# old: (b=a.get("n"))&&(b=narray[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
2024-08-07 04:59:09 +09:00
# older: (b=a.get("n"))&&(b=nfunc(b)
2024-07-11 02:20:59 +09:00
r ''' (?x)
2024-12-13 12:09:29 +09:00
# (expr, ...,
\( ( ? : ( ? : \s * [ \w $ ] + \s * = ) ? ( ? : [ \w $ " + \ . \ s( \ []+(?:[) \ ]] \ s*)?),)*
# b=...
( ? P < b > [ \w $ ] + ) \s * = \s * ( ? ! ( ? P = b ) [ ^ \w $ ] ) [ \w $ ] + \s * ( ? : ( ? :
\. \s * [ \w $ ] + |
\[ \s * [ \w $ ] + \s * \] |
\. \s * get \s * \( \s * [ \w $ " ]+ \ s* \ )
) \s * ) { , 2 } ( ? : \s * \| \| \s * null ( ? = \s * \) ) ) ? \s *
\) \s * & & \s * \( # ...)&&(
# b = nfunc, b = narray[idx]
( ? P = b ) \s * = \s * ( ? P < nfunc > [ \w $ ] + ) \s *
( ? : \[ \s * ( ? P < idx > [ \w $ ] + ) \s * \] \s * ) ?
# (...)
\( \s * [ \w $ ] + \s * \)
2024-08-02 03:18:34 +09:00
''' , jscode, ' Initial JS player n function name ' , group=( ' nfunc ' , ' idx ' ),
default = ( None , None ) )
# thx bashonly: yt-dlp/yt-dlp/pull/10611
if not func_name :
self . report_warning ( ' Falling back to generic n function search ' )
return self . _search_regex (
r ''' (?xs)
( ? : ( ? < = [ ^ \w $ ] ) | ^ ) # instead of \b, which ignores $
( ? P < name > ( ? ! \d ) [ a - zA - Z \d_ $ ] + ) \s * = \s * function \( ( ? ! \d ) [ a - zA - Z \d_ $ ] + \)
2024-12-07 12:39:44 +09:00
\s * \{ ( ? : ( ? ! } ; ) . ) + ? ( ? :
[ " ' ]enhanced_except_ |
2024-12-13 21:16:31 +09:00
return \s * ( ? P < q > " | ' )[a-zA-Z \ d-]+_w8_(?P=q) \ s* \ + \ s*[ \ w$]+
2024-12-07 12:39:44 +09:00
)
2024-08-02 03:18:34 +09:00
''' , jscode, ' Initial JS player n function name ' , group= ' name ' )
2022-02-01 23:39:03 +09:00
if not idx :
2024-01-16 03:34:21 +09:00
return func_name
2023-06-22 16:53:31 +09:00
2024-12-07 12:39:44 +09:00
return self . _search_json (
r ' var \ s+ {0} \ s*= ' . format ( re . escape ( func_name ) ) , jscode ,
' Initial JS player n function list ( {0} . {1} ) ' . format ( func_name , idx ) ,
func_name , contains_pattern = r ' \ [[ \ s \ S]+ \ ] ' , end_pattern = ' [,;] ' ,
transform_source = js_to_json ) [ int ( idx ) ]
2023-06-22 16:53:31 +09:00
2024-01-16 03:34:21 +09:00
def _extract_n_function_code ( self , video_id , player_url ) :
player_id = self . _extract_player_info ( player_url )
func_code = self . cache . load ( ' youtube-nsig ' , player_id )
jscode = func_code or self . _load_player ( video_id , player_url )
jsi = JSInterpreter ( jscode )
2023-06-22 16:53:31 +09:00
2024-01-16 03:34:21 +09:00
if func_code :
return jsi , player_id , func_code
Implement n-param descrambling using JSInterp
Fixes #29326, closes #29790, closes #30004, closes #30024, closes #30052,
closes #30088, closes #30097, closes #30102, closes #30109, closes #30119,
closes #30125, closes #30128, closes #30162, closes #30173, closes #30186,
closes #30192, closes #30221, closes #30239, closes #30539, closes #30552.
2021-11-01 13:45:42 +09:00
2024-01-16 03:34:21 +09:00
func_name = self . _extract_n_function_name ( jscode )
Implement n-param descrambling using JSInterp
Fixes #29326, closes #29790, closes #30004, closes #30024, closes #30052,
closes #30088, closes #30097, closes #30102, closes #30109, closes #30119,
closes #30125, closes #30128, closes #30162, closes #30173, closes #30186,
closes #30192, closes #30221, closes #30239, closes #30539, closes #30552.
2021-11-01 13:45:42 +09:00
2024-07-11 02:20:59 +09:00
func_code = jsi . extract_function_code ( func_name )
Implement n-param descrambling using JSInterp
Fixes #29326, closes #29790, closes #30004, closes #30024, closes #30052,
closes #30088, closes #30097, closes #30102, closes #30109, closes #30119,
closes #30125, closes #30128, closes #30162, closes #30173, closes #30186,
closes #30192, closes #30221, closes #30239, closes #30539, closes #30552.
2021-11-01 13:45:42 +09:00
2024-01-16 03:34:21 +09:00
self . cache . store ( ' youtube-nsig ' , player_id , func_code )
return jsi , player_id , func_code
def _extract_n_function_from_code ( self , jsi , func_code ) :
func = jsi . extract_function_from_code ( * func_code )
def extract_nsig ( s ) :
try :
2024-12-12 13:38:23 +09:00
ret = func ( [ s ] , kwargs = { ' _ytdl_do_not_return ' : s } )
2024-01-16 03:34:21 +09:00
except JSInterpreter . Exception :
raise
except Exception as e :
raise JSInterpreter . Exception ( traceback . format_exc ( ) , cause = e )
Implement n-param descrambling using JSInterp
Fixes #29326, closes #29790, closes #30004, closes #30024, closes #30052,
closes #30088, closes #30097, closes #30102, closes #30109, closes #30119,
closes #30125, closes #30128, closes #30162, closes #30173, closes #30186,
closes #30192, closes #30221, closes #30239, closes #30539, closes #30552.
2021-11-01 13:45:42 +09:00
2024-12-07 12:39:44 +09:00
if ret . startswith ( ' enhanced_except_ ' ) or ret . endswith ( s ) :
2024-01-16 03:34:21 +09:00
raise JSInterpreter . Exception ( ' Signature function returned an exception ' )
return ret
return extract_nsig
def _unthrottle_format_urls ( self , video_id , player_url , * formats ) :
def decrypt_nsig ( n ) :
return self . _cached ( self . _decrypt_nsig , ' nsig ' , n , player_url )
Implement n-param descrambling using JSInterp
Fixes #29326, closes #29790, closes #30004, closes #30024, closes #30052,
closes #30088, closes #30097, closes #30102, closes #30109, closes #30119,
closes #30125, closes #30128, closes #30162, closes #30173, closes #30186,
closes #30192, closes #30221, closes #30239, closes #30539, closes #30552.
2021-11-01 13:45:42 +09:00
for fmt in formats :
2023-02-07 01:19:21 +09:00
parsed_fmt_url = compat_urllib_parse . urlparse ( fmt [ ' url ' ] )
n_param = compat_parse_qs ( parsed_fmt_url . query ) . get ( ' n ' )
Implement n-param descrambling using JSInterp
Fixes #29326, closes #29790, closes #30004, closes #30024, closes #30052,
closes #30088, closes #30097, closes #30102, closes #30109, closes #30119,
closes #30125, closes #30128, closes #30162, closes #30173, closes #30186,
closes #30192, closes #30221, closes #30239, closes #30539, closes #30552.
2021-11-01 13:45:42 +09:00
if not n_param :
continue
n_param = n_param [ - 1 ]
2024-01-16 03:34:21 +09:00
n_response = decrypt_nsig ( n_param ) ( n_param , video_id , player_url )
2022-08-25 20:14:59 +09:00
if n_response is None :
# give up if descrambling failed
break
2024-01-16 03:34:21 +09:00
fmt [ ' url ' ] = update_url_query ( fmt [ ' url ' ] , { ' n ' : n_response } )
Implement n-param descrambling using JSInterp
Fixes #29326, closes #29790, closes #30004, closes #30024, closes #30052,
closes #30088, closes #30097, closes #30102, closes #30109, closes #30119,
closes #30125, closes #30128, closes #30162, closes #30173, closes #30186,
closes #30192, closes #30221, closes #30239, closes #30539, closes #30552.
2021-11-01 13:45:42 +09:00
2022-06-21 07:15:20 +09:00
# from yt-dlp, with tweaks
def _extract_signature_timestamp ( self , video_id , player_url , ytcfg = None , fatal = False ) :
"""
Extract signatureTimestamp ( sts )
Required to tell API what sig / player version is in use .
"""
2024-01-16 03:34:21 +09:00
sts = traverse_obj ( ytcfg , ' STS ' , expected_type = int )
2022-06-21 07:15:20 +09:00
if not sts :
# Attempt to extract from player
if player_url is None :
error_msg = ' Cannot extract signature timestamp without player_url. '
if fatal :
raise ExtractorError ( error_msg )
2024-01-16 03:34:21 +09:00
self . report_warning ( error_msg )
2022-06-21 07:15:20 +09:00
return
2024-01-16 03:34:21 +09:00
code = self . _load_player ( video_id , player_url , fatal = fatal )
2022-06-21 07:15:20 +09:00
sts = int_or_none ( self . _search_regex (
r ' (?:signatureTimestamp|sts) \ s*: \ s*(?P<sts>[0-9] {5} ) ' , code or ' ' ,
' JS player signature timestamp ' , group = ' sts ' , fatal = fatal ) )
return sts
2021-02-01 22:30:59 +09:00
def _mark_watched ( self , video_id , player_response ) :
2018-12-16 21:35:48 +09:00
playback_url = url_or_none ( try_get (
player_response ,
2021-02-01 22:30:59 +09:00
lambda x : x [ ' playbackTracking ' ] [ ' videostatsPlaybackUrl ' ] [ ' baseUrl ' ] ) )
2016-03-01 04:01:33 +09:00
if not playback_url :
return
# cpn generation algorithm is reverse engineered from base.js.
# In fact it works even with dummy cpn.
CPN_ALPHABET = ' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_ '
2024-01-16 03:34:21 +09:00
cpn = ' ' . join ( CPN_ALPHABET [ random . randint ( 0 , 256 ) & 63 ] for _ in range ( 0 , 16 ) )
# more consistent results setting it to right before the end
qs = parse_qs ( playback_url )
video_length = ' {0} ' . format ( float ( ( qs . get ( ' len ' ) or [ ' 1.5 ' ] ) [ 0 ] ) - 1 )
playback_url = update_url_query (
playback_url , {
' ver ' : ' 2 ' ,
' cpn ' : cpn ,
' cmt ' : video_length ,
' el ' : ' detailpage ' , # otherwise defaults to "shorts"
2023-02-07 01:19:21 +09:00
} )
2016-03-01 04:01:33 +09:00
self . _download_webpage (
playback_url , video_id , ' Marking watched ' ,
' Unable to mark watched ' , fatal = False )
2017-09-06 02:48:37 +09:00
@staticmethod
def _extract_urls ( webpage ) :
# Embedded YouTube player
entries = [
unescapeHTML ( mobj . group ( ' url ' ) )
for mobj in re . finditer ( r ''' (?x)
( ? :
< iframe [ ^ > ] + ? src = |
data - video - url = |
< embed [ ^ > ] + ? src = |
embedSWF \( ? : \s * |
< object [ ^ > ] + data = |
new \s + SWFObject \(
)
( [ " \' ])
( ? P < url > ( ? : https ? : ) ? / / ( ? : www \. ) ? youtube ( ? : - nocookie ) ? \. com /
2017-10-28 00:26:43 +09:00
( ? : embed | v | p ) / [ 0 - 9 A - Za - z_ - ] { 11 } . * ? )
2017-09-06 02:48:37 +09:00
\1 ''' , webpage)]
# lazyYT YouTube embed
entries . extend ( list ( map (
unescapeHTML ,
re . findall ( r ' class= " lazyYT " data-youtube-id= " ([^ " ]+) " ' , webpage ) ) ) )
# Wordpress "YouTube Video Importer" plugin
matches = re . findall ( r ''' (?x)<div[^>]+
class = ( ? P < q1 > [ \' " ])[^ \' " ]* \b yvii_single_video_player \b [^ \' " ]*(?P=q1)[^>]+
data - video_id = ( ? P < q2 > [ \' " ])([^ \' " ]+)(?P=q2) ' ' ' , webpage )
entries . extend ( m [ - 1 ] for m in matches )
return entries
@staticmethod
def _extract_url ( webpage ) :
urls = YoutubeIE . _extract_urls ( webpage )
return urls [ 0 ] if urls else None
2014-02-09 03:20:11 +09:00
@classmethod
def extract_id ( cls , url ) :
mobj = re . match ( cls . _VALID_URL , url , re . VERBOSE )
2013-06-24 02:58:33 +09:00
if mobj is None :
2014-09-24 16:51:45 +09:00
raise ExtractorError ( ' Invalid URL: %s ' % url )
2013-06-24 02:58:33 +09:00
video_id = mobj . group ( 2 )
return video_id
2021-02-02 00:08:50 +09:00
def _extract_chapters_from_json ( self , data , video_id , duration ) :
chapters_list = try_get (
data ,
lambda x : x [ ' playerOverlays ' ]
[ ' playerOverlayRenderer ' ]
[ ' decoratedPlayerBarRenderer ' ]
[ ' decoratedPlayerBarRenderer ' ]
[ ' playerBar ' ]
[ ' chapteredPlayerBarRenderer ' ]
[ ' chapters ' ] ,
list )
if not chapters_list :
return
def chapter_time ( chapter ) :
return float_or_none (
try_get (
chapter ,
lambda x : x [ ' chapterRenderer ' ] [ ' timeRangeStartMillis ' ] ,
int ) ,
scale = 1000 )
chapters = [ ]
for next_num , chapter in enumerate ( chapters_list , start = 1 ) :
start_time = chapter_time ( chapter )
if start_time is None :
continue
end_time = ( chapter_time ( chapters_list [ next_num ] )
if next_num < len ( chapters_list ) else duration )
if end_time is None :
continue
title = try_get (
chapter , lambda x : x [ ' chapterRenderer ' ] [ ' title ' ] [ ' simpleText ' ] ,
compat_str )
chapters . append ( {
' start_time ' : start_time ,
' end_time ' : end_time ,
' title ' : title ,
} )
return chapters
2021-02-01 22:30:59 +09:00
def _extract_yt_initial_variable ( self , webpage , regex , video_id , name ) :
return self . _parse_json ( self . _search_regex (
( r ' %s \ s* %s ' % ( regex , self . _YT_INITIAL_BOUNDARY_RE ) ,
regex ) , webpage , name , default = ' {} ' ) , video_id , fatal = False )
2020-06-06 06:16:31 +09:00
2013-06-24 02:58:33 +09:00
def _real_extract ( self , url ) :
2015-07-26 00:30:34 +09:00
url , smuggled_data = unsmuggle_url ( url , { } )
2021-02-01 22:30:59 +09:00
video_id = self . _match_id ( url )
base_url = self . http_scheme ( ) + ' //www.youtube.com/ '
webpage_url = base_url + ' watch?v= ' + video_id
2021-02-16 07:57:53 +09:00
webpage = self . _download_webpage (
2021-03-31 05:44:41 +09:00
webpage_url + ' &bpctr=9999999999&has_verified=1 ' , video_id , fatal = False )
2021-02-01 22:30:59 +09:00
player_response = None
2022-06-21 07:15:20 +09:00
player_url = None
2021-02-01 22:30:59 +09:00
if webpage :
player_response = self . _extract_yt_initial_variable (
webpage , self . _YT_INITIAL_PLAYER_RESPONSE_RE ,
video_id , ' initial player response ' )
if not player_response :
player_response = self . _call_api (
' player ' , { ' videoId ' : video_id } , video_id )
2022-01-31 13:28:54 +09:00
def is_agegated ( playability ) :
if not isinstance ( playability , dict ) :
return
if playability . get ( ' desktopLegacyAgeGateReason ' ) :
return True
reasons = filter ( None , ( playability . get ( r ) for r in ( ' status ' , ' reason ' ) ) )
AGE_GATE_REASONS = (
' confirm your age ' , ' age-restricted ' , ' inappropriate ' , # reason
' age_verification_required ' , ' age_check_required ' , # status
)
return any ( expected in reason for expected in AGE_GATE_REASONS for reason in reasons )
def get_playability_status ( response ) :
return try_get ( response , lambda x : x [ ' playabilityStatus ' ] , dict ) or { }
playability_status = get_playability_status ( player_response )
if ( is_agegated ( playability_status )
and int_or_none ( self . _downloader . params . get ( ' age_limit ' ) , default = 18 ) > = 18 ) :
self . report_age_confirmation ( )
# Thanks: https://github.com/yt-dlp/yt-dlp/pull/3233
pb_context = { ' html5Preference ' : ' HTML5_PREF_WANTS ' }
2022-06-21 07:15:20 +09:00
# Use signatureTimestamp if available
# Thanks https://github.com/ytdl-org/youtube-dl/issues/31034#issuecomment-1160718026
player_url = self . _extract_player_url ( webpage )
ytcfg = self . _extract_ytcfg ( video_id , webpage )
sts = self . _extract_signature_timestamp ( video_id , player_url , ytcfg )
if sts :
pb_context [ ' signatureTimestamp ' ] = sts
2022-01-31 13:28:54 +09:00
query = {
2022-06-21 07:15:20 +09:00
' playbackContext ' : { ' contentPlaybackContext ' : pb_context } ,
2022-01-31 13:28:54 +09:00
' contentCheckOk ' : True ,
' racyCheckOk ' : True ,
' context ' : {
' client ' : { ' clientName ' : ' TVHTML5_SIMPLY_EMBEDDED_PLAYER ' , ' clientVersion ' : ' 2.0 ' , ' hl ' : ' en ' , ' clientScreen ' : ' EMBED ' } ,
' thirdParty ' : { ' embedUrl ' : ' https://google.com ' } ,
} ,
' videoId ' : video_id ,
}
headers = {
' X-YouTube-Client-Name ' : ' 85 ' ,
' X-YouTube-Client-Version ' : ' 2.0 ' ,
' Origin ' : ' https://www.youtube.com '
}
video_info = self . _call_api ( ' player ' , query , video_id , fatal = False , headers = headers )
age_gate_status = get_playability_status ( video_info )
if age_gate_status . get ( ' status ' ) == ' OK ' :
player_response = video_info
playability_status = age_gate_status
2021-02-01 22:30:59 +09:00
trailer_video_id = try_get (
playability_status ,
lambda x : x [ ' errorScreen ' ] [ ' playerLegacyDesktopYpcTrailerRenderer ' ] [ ' trailerVideoId ' ] ,
compat_str )
if trailer_video_id :
return self . url_result (
trailer_video_id , self . ie_key ( ) , trailer_video_id )
2015-07-26 00:30:34 +09:00
2021-02-01 22:30:59 +09:00
def get_text ( x ) :
if not x :
2019-07-31 02:14:08 +09:00
return
2021-04-07 05:34:43 +09:00
text = x . get ( ' simpleText ' )
if text and isinstance ( text , compat_str ) :
return text
runs = x . get ( ' runs ' )
if not isinstance ( runs , list ) :
return
return ' ' . join ( [ r [ ' text ' ] for r in runs if isinstance ( r . get ( ' text ' ) , compat_str ) ] )
2020-03-08 07:09:02 +09:00
2021-02-01 22:30:59 +09:00
search_meta = (
lambda x : self . _html_search_meta ( x , webpage , default = None ) ) \
if webpage else lambda x : None
2018-11-03 08:26:16 +09:00
2021-02-01 22:30:59 +09:00
video_details = player_response . get ( ' videoDetails ' ) or { }
2020-06-16 04:37:19 +09:00
microformat = try_get (
2021-02-01 22:30:59 +09:00
player_response ,
lambda x : x [ ' microformat ' ] [ ' playerMicroformatRenderer ' ] ,
dict ) or { }
video_title = video_details . get ( ' title ' ) \
or get_text ( microformat . get ( ' title ' ) ) \
or search_meta ( [ ' og:title ' , ' twitter:title ' , ' title ' ] )
video_description = video_details . get ( ' shortDescription ' )
2015-07-26 00:30:34 +09:00
2018-12-16 00:25:12 +09:00
if not smuggled_data . get ( ' force_singlefeed ' , False ) :
2015-07-30 00:18:16 +09:00
if not self . _downloader . params . get ( ' noplaylist ' ) :
2018-12-16 00:25:12 +09:00
multifeed_metadata_list = try_get (
player_response ,
lambda x : x [ ' multicamera ' ] [ ' playerLegacyMulticameraRenderer ' ] [ ' metadataList ' ] ,
2021-02-01 22:30:59 +09:00
compat_str )
2018-12-16 00:25:12 +09:00
if multifeed_metadata_list :
entries = [ ]
feed_ids = [ ]
for feed in multifeed_metadata_list . split ( ' , ' ) :
# Unquote should take place before split on comma (,) since textual
# fields may contain comma as well (see
2019-03-09 21:14:41 +09:00
# https://github.com/ytdl-org/youtube-dl/issues/8536)
2021-02-01 22:30:59 +09:00
feed_data = compat_parse_qs (
compat_urllib_parse_unquote_plus ( feed ) )
2020-04-10 00:42:43 +09:00
def feed_entry ( name ) :
2021-02-01 22:30:59 +09:00
return try_get (
feed_data , lambda x : x [ name ] [ 0 ] , compat_str )
2020-04-10 00:42:43 +09:00
feed_id = feed_entry ( ' id ' )
if not feed_id :
continue
feed_title = feed_entry ( ' title ' )
title = video_title
if feed_title :
title + = ' ( %s ) ' % feed_title
2018-12-16 00:25:12 +09:00
entries . append ( {
' _type ' : ' url_transparent ' ,
' ie_key ' : ' Youtube ' ,
' url ' : smuggle_url (
2021-02-01 22:30:59 +09:00
base_url + ' watch?v= ' + feed_data [ ' id ' ] [ 0 ] ,
2018-12-16 00:25:12 +09:00
{ ' force_singlefeed ' : True } ) ,
2020-04-10 00:42:43 +09:00
' title ' : title ,
2018-12-16 00:25:12 +09:00
} )
2020-04-10 00:42:43 +09:00
feed_ids . append ( feed_id )
2018-12-16 00:25:12 +09:00
self . to_screen (
' Downloading multifeed video ( %s ) - add --no-playlist to just download video %s '
% ( ' , ' . join ( feed_ids ) , video_id ) )
2021-02-01 22:30:59 +09:00
return self . playlist_result (
entries , video_id , video_title , video_description )
2018-12-16 00:25:12 +09:00
else :
self . to_screen ( ' Downloading just video %s because of --no-playlist ' % video_id )
2015-07-26 00:30:34 +09:00
2024-01-16 03:34:21 +09:00
if not player_url :
player_url = self . _extract_player_url ( webpage )
2021-02-01 22:30:59 +09:00
formats = [ ]
2024-01-16 03:34:21 +09:00
itags = collections . defaultdict ( set )
2021-02-04 21:05:35 +09:00
itag_qualities = { }
2021-02-01 22:30:59 +09:00
q = qualities ( [ ' tiny ' , ' small ' , ' medium ' , ' large ' , ' hd720 ' , ' hd1080 ' , ' hd1440 ' , ' hd2160 ' , ' hd2880 ' , ' highres ' ] )
2023-03-12 18:16:09 +09:00
CHUNK_SIZE = 10 << 20
2021-02-01 22:30:59 +09:00
streaming_data = player_response . get ( ' streamingData ' ) or { }
streaming_formats = streaming_data . get ( ' formats ' ) or [ ]
streaming_formats . extend ( streaming_data . get ( ' adaptiveFormats ' ) or [ ] )
2023-03-12 18:16:09 +09:00
def build_fragments ( f ) :
return LazyList ( {
' url ' : update_url_query ( f [ ' url ' ] , {
' range ' : ' {0} - {1} ' . format ( range_start , min ( range_start + CHUNK_SIZE - 1 , f [ ' filesize ' ] ) )
} )
} for range_start in range ( 0 , f [ ' filesize ' ] , CHUNK_SIZE ) )
2024-01-16 03:34:21 +09:00
lower = lambda s : s . lower ( )
2021-02-01 22:30:59 +09:00
for fmt in streaming_formats :
2024-01-16 03:34:21 +09:00
if fmt . get ( ' targetDurationSec ' ) :
2021-02-01 22:30:59 +09:00
continue
2019-09-12 00:44:47 +09:00
2021-02-04 21:05:35 +09:00
itag = str_or_none ( fmt . get ( ' itag ' ) )
2024-01-16 03:34:21 +09:00
audio_track = traverse_obj ( fmt , ( ' audioTrack ' , T ( dict ) ) ) or { }
quality = traverse_obj ( fmt , ( (
# The 3gp format (17) in android client has a quality of "small",
# but is actually worse than other formats
T ( lambda _ : ' tiny ' if itag == 17 else None ) ,
( ' quality ' , T ( lambda q : q if q and q != ' tiny ' else None ) ) ,
( ' audioQuality ' , T ( lower ) ) ,
' quality ' ) , T ( txt_or_none ) ) , get_all = False )
if quality and itag :
2021-02-04 21:05:35 +09:00
itag_qualities [ itag ] = quality
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
# (adding `&sq=0` to the URL) and parsing emsg box to determine the
2024-01-16 03:34:21 +09:00
# number of fragments that would subsequently be requested with (`&sq=N`)
2021-02-04 21:05:35 +09:00
if fmt . get ( ' type ' ) == ' FORMAT_STREAM_TYPE_OTF ' :
continue
2021-02-01 22:30:59 +09:00
fmt_url = fmt . get ( ' url ' )
if not fmt_url :
sc = compat_parse_qs ( fmt . get ( ' signatureCipher ' ) )
2024-01-16 03:34:21 +09:00
fmt_url = traverse_obj ( sc , ( ' url ' , - 1 , T ( url_or_none ) ) )
encrypted_sig = traverse_obj ( sc , ( ' s ' , - 1 ) )
if not ( fmt_url and encrypted_sig ) :
2019-09-12 00:44:47 +09:00
continue
2024-01-16 03:34:21 +09:00
player_url = player_url or self . _extract_player_url ( webpage )
2021-02-01 22:30:59 +09:00
if not player_url :
continue
2024-01-16 03:34:21 +09:00
try :
fmt_url = update_url_query ( fmt_url , {
traverse_obj ( sc , ( ' sp ' , - 1 ) ) or ' signature ' :
[ self . _decrypt_signature ( encrypted_sig , video_id , player_url ) ] ,
} )
except ExtractorError as e :
self . report_warning ( ' Signature extraction failed: Some formats may be missing ' ,
video_id = video_id , only_once = True )
self . write_debug ( error_to_compat_str ( e ) , only_once = True )
continue
2021-02-01 22:30:59 +09:00
2024-01-16 03:34:21 +09:00
language_preference = (
10 if audio_track . get ( ' audioIsDefault ' )
else - 10 if ' descriptive ' in ( traverse_obj ( audio_track , ( ' displayName ' , T ( lower ) ) ) or ' ' )
else - 1 )
name = (
traverse_obj ( fmt , ( ' qualityLabel ' , T ( txt_or_none ) ) )
or quality . replace ( ' audio_quality_ ' , ' ' ) )
2021-02-01 22:30:59 +09:00
dct = {
2024-01-16 03:34:21 +09:00
' format_id ' : join_nonempty ( itag , fmt . get ( ' isDrc ' ) and ' drc ' ) ,
2021-02-01 22:30:59 +09:00
' url ' : fmt_url ,
2024-01-16 03:34:21 +09:00
# Format 22 is likely to be damaged: see https://github.com/yt-dlp/yt-dlp/issues/3372
' source_preference ' : ( ( - 5 if itag == ' 22 ' else - 1 )
+ ( 100 if ' Premium ' in name else 0 ) ) ,
' quality ' : q ( quality ) ,
' language ' : join_nonempty ( audio_track . get ( ' id ' , ' ' ) . split ( ' . ' ) [ 0 ] ,
' desc ' if language_preference < - 1 else ' ' ) or None ,
' language_preference ' : language_preference ,
# Strictly de-prioritize 3gp formats
' preference ' : - 2 if itag == ' 17 ' else None ,
2021-02-01 22:30:59 +09:00
}
2024-01-16 03:34:21 +09:00
if itag :
itags [ itag ] . add ( ( ' https ' , dct . get ( ' language ' ) ) )
self . _unthrottle_format_urls ( video_id , player_url , dct )
dct . update ( traverse_obj ( fmt , {
' asr ' : ( ' audioSampleRate ' , T ( int_or_none ) ) ,
' filesize ' : ( ' contentLength ' , T ( int_or_none ) ) ,
' format_note ' : ( ' qualityLabel ' , T ( lambda x : x or quality ) ) ,
# for some formats, fps is wrongly returned as 1
' fps ' : ( ' fps ' , T ( int_or_none ) , T ( lambda f : f if f > 1 else None ) ) ,
' audio_channels ' : ( ' audioChannels ' , T ( int_or_none ) ) ,
' height ' : ( ' height ' , T ( int_or_none ) ) ,
' has_drm ' : ( ' drmFamilies ' , T ( bool ) ) ,
' tbr ' : ( ( ' averageBitrate ' , ' bitrate ' ) , T ( lambda t : float_or_none ( t , 1000 ) ) ) ,
' width ' : ( ' width ' , T ( int_or_none ) ) ,
' _duration_ms ' : ( ' approxDurationMs ' , T ( int_or_none ) ) ,
} , get_all = False ) )
mime_mobj = re . match (
r ' ((?:[^/]+)/(?:[^;]+))(?:; \ s*codecs= " ([^ " ]+) " )? ' , fmt . get ( ' mimeType ' ) or ' ' )
if mime_mobj :
dct [ ' ext ' ] = mimetype2ext ( mime_mobj . group ( 1 ) )
dct . update ( parse_codecs ( mime_mobj . group ( 2 ) ) )
2023-03-10 01:39:23 +09:00
single_stream = ' none ' in ( dct . get ( c ) for c in ( ' acodec ' , ' vcodec ' ) )
if single_stream and dct . get ( ' ext ' ) :
dct [ ' container ' ] = dct [ ' ext ' ] + ' _dash '
if single_stream or itag == ' 17 ' :
2023-03-01 02:33:44 +09:00
# avoid Youtube throttling
dct . update ( {
' protocol ' : ' http_dash_segments ' ,
2023-03-12 18:16:09 +09:00
' fragments ' : build_fragments ( dct ) ,
2023-03-01 02:33:44 +09:00
} if dct [ ' filesize ' ] else {
' downloader_options ' : { ' http_chunk_size ' : CHUNK_SIZE } # No longer useful?
} )
2021-02-01 22:30:59 +09:00
formats . append ( dct )
2024-01-16 03:34:21 +09:00
def process_manifest_format ( f , proto , client_name , itag , all_formats = False ) :
key = ( proto , f . get ( ' language ' ) )
if not all_formats and key in itags [ itag ] :
return False
itags [ itag ] . add ( key )
if itag :
f [ ' format_id ' ] = (
' {0} - {1} ' . format ( itag , proto )
if all_formats or any ( p != proto for p , _ in itags [ itag ] )
else itag )
if f . get ( ' source_preference ' ) is None :
f [ ' source_preference ' ] = - 1
if itag in ( ' 616 ' , ' 235 ' ) :
f [ ' format_note ' ] = join_nonempty ( f . get ( ' format_note ' ) , ' Premium ' , delim = ' ' )
f [ ' source_preference ' ] + = 100
f [ ' quality ' ] = q ( traverse_obj ( f , (
' format_id ' , T ( lambda s : itag_qualities [ s . split ( ' - ' ) [ 0 ] ] ) ) , default = - 1 ) )
if try_call ( lambda : f [ ' fps ' ] < = 1 ) :
del f [ ' fps ' ]
if proto == ' hls ' and f . get ( ' has_drm ' ) :
f [ ' has_drm ' ] = ' maybe '
f [ ' source_preference ' ] - = 5
return True
2021-02-01 22:30:59 +09:00
hls_manifest_url = streaming_data . get ( ' hlsManifestUrl ' )
if hls_manifest_url :
for f in self . _extract_m3u8_formats (
hls_manifest_url , video_id , ' mp4 ' , fatal = False ) :
2024-01-16 03:34:21 +09:00
if process_manifest_format (
f , ' hls ' , None , self . _search_regex (
r ' /itag/( \ d+) ' , f [ ' url ' ] , ' itag ' , default = None ) ) :
formats . append ( f )
2021-02-01 22:30:59 +09:00
2021-03-24 04:57:35 +09:00
if self . _downloader . params . get ( ' youtube_include_dash_manifest ' , True ) :
2021-02-01 22:30:59 +09:00
dash_manifest_url = streaming_data . get ( ' dashManifestUrl ' )
if dash_manifest_url :
for f in self . _extract_mpd_formats (
dash_manifest_url , video_id , fatal = False ) :
2024-01-16 03:34:21 +09:00
if process_manifest_format (
f , ' dash ' , None , f [ ' format_id ' ] ) :
f [ ' filesize ' ] = traverse_obj ( f , (
( ' fragment_base_url ' , ' url ' ) , T ( lambda u : self . _search_regex (
r ' /clen/( \ d+) ' , u , ' file size ' , default = None ) ) ,
T ( int_or_none ) ) , get_all = False )
formats . append ( f )
playable_formats = [ f for f in formats if not f . get ( ' has_drm ' ) ]
if formats and not playable_formats :
# If there are no formats that definitely don't have DRM, all have DRM
self . report_drm ( video_id )
formats [ : ] = playable_formats
2019-09-12 00:44:47 +09:00
2021-02-01 22:30:59 +09:00
if not formats :
if streaming_data . get ( ' licenseInfos ' ) :
raise ExtractorError (
' This video is DRM protected. ' , expected = True )
pemr = try_get (
playability_status ,
lambda x : x [ ' errorScreen ' ] [ ' playerErrorMessageRenderer ' ] ,
dict ) or { }
reason = get_text ( pemr . get ( ' reason ' ) ) or playability_status . get ( ' reason ' )
subreason = pemr . get ( ' subreason ' )
if subreason :
subreason = clean_html ( get_text ( subreason ) )
if subreason == ' The uploader has not made this video available in your country. ' :
countries = microformat . get ( ' availableCountries ' )
if not countries :
regions_allowed = search_meta ( ' regionsAllowed ' )
countries = regions_allowed . split ( ' , ' ) if regions_allowed else None
self . raise_geo_restricted (
subreason , countries )
reason + = ' \n ' + subreason
if reason :
raise ExtractorError ( reason , expected = True )
2015-01-30 12:43:50 +09:00
2021-02-01 22:30:59 +09:00
self . _sort_formats ( formats )
2018-09-15 03:24:26 +09:00
2021-02-01 22:30:59 +09:00
keywords = video_details . get ( ' keywords ' ) or [ ]
if not keywords and webpage :
keywords = [
unescapeHTML ( m . group ( ' content ' ) )
for m in re . finditer ( self . _meta_regex ( ' og:video:tag ' ) , webpage ) ]
for keyword in keywords :
if keyword . startswith ( ' yt:stretch= ' ) :
2021-04-17 04:27:54 +09:00
mobj = re . search ( r ' ( \ d+) \ s*: \ s*( \ d+) ' , keyword )
if mobj :
# NB: float is intentional for forcing float division
w , h = ( float ( v ) for v in mobj . groups ( ) )
if w > 0 and h > 0 :
ratio = w / h
for f in formats :
if f . get ( ' vcodec ' ) != ' none ' :
f [ ' stretched_ratio ' ] = ratio
break
2020-06-16 04:29:04 +09:00
2021-02-01 22:30:59 +09:00
thumbnails = [ ]
for container in ( video_details , microformat ) :
2023-02-07 01:19:21 +09:00
for thumbnail in try_get (
2021-02-01 22:30:59 +09:00
container ,
2023-02-07 01:19:21 +09:00
lambda x : x [ ' thumbnail ' ] [ ' thumbnails ' ] , list ) or [ ] :
2022-01-31 13:28:54 +09:00
thumbnail_url = url_or_none ( thumbnail . get ( ' url ' ) )
2021-02-01 22:30:59 +09:00
if not thumbnail_url :
continue
thumbnails . append ( {
' height ' : int_or_none ( thumbnail . get ( ' height ' ) ) ,
2022-01-31 13:28:54 +09:00
' url ' : update_url ( thumbnail_url , query = None , fragment = None ) ,
2021-02-01 22:30:59 +09:00
' width ' : int_or_none ( thumbnail . get ( ' width ' ) ) ,
} )
if thumbnails :
break
2018-06-03 04:08:38 +09:00
else :
2021-02-01 22:30:59 +09:00
thumbnail = search_meta ( [ ' og:image ' , ' twitter:image ' ] )
if thumbnail :
thumbnails = [ { ' url ' : thumbnail } ]
category = microformat . get ( ' category ' ) or search_meta ( ' genre ' )
2023-02-24 11:48:37 +09:00
channel_id = self . _extract_channel_id (
webpage , videodetails = video_details , metadata = microformat )
2021-02-01 22:30:59 +09:00
duration = int_or_none (
video_details . get ( ' lengthSeconds ' )
or microformat . get ( ' lengthSeconds ' ) ) \
or parse_duration ( search_meta ( ' duration ' ) )
2024-01-16 03:34:21 +09:00
for f in formats :
# Some formats may have much smaller duration than others (possibly damaged during encoding)
# but avoid false positives with small duration differences.
# Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
if try_call ( lambda x : float ( x . pop ( ' _duration_ms ' ) ) / duration < 500 , args = ( f , ) ) :
self . report_warning (
' {0} : Some possibly damaged formats will be deprioritized ' . format ( video_id ) , only_once = True )
# Strictly de-prioritize damaged formats
f [ ' preference ' ] = - 10
2021-02-01 22:30:59 +09:00
is_live = video_details . get ( ' isLive ' )
2023-02-09 03:16:51 +09:00
2023-02-24 11:48:37 +09:00
owner_profile_url = self . _yt_urljoin ( self . _extract_author_var (
webpage , ' url ' , videodetails = video_details , metadata = microformat ) )
2023-02-09 03:16:51 +09:00
2023-02-24 11:48:37 +09:00
uploader = self . _extract_author_var (
webpage , ' name ' , videodetails = video_details , metadata = microformat )
2021-02-01 22:30:59 +09:00
info = {
' id ' : video_id ,
' title ' : self . _live_title ( video_title ) if is_live else video_title ,
' formats ' : formats ,
' thumbnails ' : thumbnails ,
' description ' : video_description ,
' upload_date ' : unified_strdate (
microformat . get ( ' uploadDate ' )
or search_meta ( ' uploadDate ' ) ) ,
2023-02-24 11:48:37 +09:00
' uploader ' : uploader ,
2021-02-01 22:30:59 +09:00
' channel_id ' : channel_id ,
' duration ' : duration ,
' view_count ' : int_or_none (
video_details . get ( ' viewCount ' )
or microformat . get ( ' viewCount ' )
or search_meta ( ' interactionCount ' ) ) ,
' average_rating ' : float_or_none ( video_details . get ( ' averageRating ' ) ) ,
' age_limit ' : 18 if (
microformat . get ( ' isFamilySafe ' ) is False
or search_meta ( ' isFamilyFriendly ' ) == ' false '
or search_meta ( ' og:restrictions:age ' ) == ' 18+ ' ) else 0 ,
' webpage_url ' : webpage_url ,
' categories ' : [ category ] if category else None ,
' tags ' : keywords ,
' is_live ' : is_live ,
}
2018-06-03 04:08:38 +09:00
2021-02-01 22:30:59 +09:00
pctr = try_get (
player_response ,
lambda x : x [ ' captions ' ] [ ' playerCaptionsTracklistRenderer ' ] , dict )
if pctr :
2021-02-02 02:12:35 +09:00
def process_language ( container , base_url , lang_code , query ) :
2021-02-01 22:30:59 +09:00
lang_subs = [ ]
for fmt in self . _SUBTITLE_FORMATS :
query . update ( {
' fmt ' : fmt ,
} )
lang_subs . append ( {
' ext ' : fmt ,
' url ' : update_url_query ( base_url , query ) ,
} )
2021-02-02 02:12:35 +09:00
container [ lang_code ] = lang_subs
2021-02-01 22:30:59 +09:00
subtitles = { }
2021-02-02 02:12:35 +09:00
for caption_track in ( pctr . get ( ' captionTracks ' ) or [ ] ) :
base_url = caption_track . get ( ' baseUrl ' )
if not base_url :
continue
2021-02-01 22:30:59 +09:00
if caption_track . get ( ' kind ' ) != ' asr ' :
2021-02-02 02:12:35 +09:00
lang_code = caption_track . get ( ' languageCode ' )
if not lang_code :
continue
process_language (
subtitles , base_url , lang_code , { } )
2021-02-01 22:30:59 +09:00
continue
automatic_captions = { }
2021-02-02 02:12:35 +09:00
for translation_language in ( pctr . get ( ' translationLanguages ' ) or [ ] ) :
translation_language_code = translation_language . get ( ' languageCode ' )
if not translation_language_code :
continue
process_language (
automatic_captions , base_url , translation_language_code ,
{ ' tlang ' : translation_language_code } )
2021-02-01 22:30:59 +09:00
info [ ' automatic_captions ' ] = automatic_captions
info [ ' subtitles ' ] = subtitles
2018-06-03 04:08:38 +09:00
2021-02-01 22:30:59 +09:00
parsed_url = compat_urllib_parse_urlparse ( url )
for component in [ parsed_url . fragment , parsed_url . query ] :
query = compat_parse_qs ( component )
for k , v in query . items ( ) :
for d_k , s_ks in [ ( ' start ' , ( ' start ' , ' t ' ) ) , ( ' end ' , ( ' end ' , ) ) ] :
d_k + = ' _time '
if d_k not in info and k in s_ks :
info [ d_k ] = parse_duration ( query [ k ] [ 0 ] )
2019-04-27 17:16:17 +09:00
if video_description :
2023-02-09 03:16:51 +09:00
# Youtube Music Auto-generated description
2020-11-24 06:44:34 +09:00
mobj = re . search ( r ' (?s)(?P<track>[^· \ n]+)·(?P<artist>[^ \ n]+) \ n+(?P<album>[^ \ n]+)(?:.+?℗ \ s*(?P<release_year> \ d {4} )(?! \ d))?(?:.+?Released on \ s*: \ s*(?P<release_date> \ d {4} - \ d {2} - \ d {2} ))?(.+? \ nArtist \ s*: \ s*(?P<clean_artist>[^ \ n]+))?.+ \ nAuto-generated by YouTube \ . \ s*$ ' , video_description )
2019-04-27 17:16:17 +09:00
if mobj :
release_year = mobj . group ( ' release_year ' )
release_date = mobj . group ( ' release_date ' )
if release_date :
release_date = release_date . replace ( ' - ' , ' ' )
if not release_year :
2021-02-01 22:30:59 +09:00
release_year = release_date [ : 4 ]
info . update ( {
' album ' : mobj . group ( ' album ' . strip ( ) ) ,
' artist ' : mobj . group ( ' clean_artist ' ) or ' , ' . join ( a . strip ( ) for a in mobj . group ( ' artist ' ) . split ( ' · ' ) ) ,
' track ' : mobj . group ( ' track ' ) . strip ( ) ,
' release_date ' : release_date ,
2021-02-07 04:17:03 +09:00
' release_year ' : int_or_none ( release_year ) ,
2021-02-01 22:30:59 +09:00
} )
2013-12-18 11:30:55 +09:00
2021-02-01 22:30:59 +09:00
initial_data = None
if webpage :
initial_data = self . _extract_yt_initial_variable (
webpage , self . _YT_INITIAL_DATA_RE , video_id ,
' yt initial data ' )
if not initial_data :
initial_data = self . _call_api (
' next ' , { ' videoId ' : video_id } , video_id , fatal = False )
if initial_data :
2021-02-02 00:08:50 +09:00
chapters = self . _extract_chapters_from_json (
initial_data , video_id , duration )
if not chapters :
for engagment_pannel in ( initial_data . get ( ' engagementPanels ' ) or [ ] ) :
contents = try_get (
engagment_pannel , lambda x : x [ ' engagementPanelSectionListRenderer ' ] [ ' content ' ] [ ' macroMarkersListRenderer ' ] [ ' contents ' ] ,
list )
if not contents :
2021-02-01 22:30:59 +09:00
continue
2021-02-02 00:08:50 +09:00
def chapter_time ( mmlir ) :
2021-02-02 00:49:52 +09:00
return parse_duration (
get_text ( mmlir . get ( ' timeDescription ' ) ) )
2021-02-02 00:08:50 +09:00
2021-02-02 00:49:52 +09:00
chapters = [ ]
2021-02-02 00:08:50 +09:00
for next_num , content in enumerate ( contents , start = 1 ) :
mmlir = content . get ( ' macroMarkersListItemRenderer ' ) or { }
start_time = chapter_time ( mmlir )
end_time = chapter_time ( try_get (
contents , lambda x : x [ next_num ] [ ' macroMarkersListItemRenderer ' ] ) ) \
if next_num < len ( contents ) else duration
2021-02-02 00:49:52 +09:00
if start_time is None or end_time is None :
2021-02-02 00:08:50 +09:00
continue
chapters . append ( {
' start_time ' : start_time ,
' end_time ' : end_time ,
' title ' : get_text ( mmlir . get ( ' title ' ) ) ,
} )
2021-02-02 00:49:52 +09:00
if chapters :
break
2021-02-02 00:08:50 +09:00
if chapters :
2021-02-01 22:30:59 +09:00
info [ ' chapters ' ] = chapters
contents = try_get (
initial_data ,
lambda x : x [ ' contents ' ] [ ' twoColumnWatchNextResults ' ] [ ' results ' ] [ ' results ' ] [ ' contents ' ] ,
list ) or [ ]
2023-02-24 11:48:37 +09:00
if not info [ ' channel_id ' ] :
channel_id = self . _extract_channel_id ( ' ' , renderers = contents )
if not info [ ' uploader ' ] :
info [ ' uploader ' ] = self . _extract_author_var ( ' ' , ' name ' , renderers = contents )
if not owner_profile_url :
owner_profile_url = self . _yt_urljoin ( self . _extract_author_var ( ' ' , ' url ' , renderers = contents ) )
2021-02-01 22:30:59 +09:00
for content in contents :
vpir = content . get ( ' videoPrimaryInfoRenderer ' )
if vpir :
stl = vpir . get ( ' superTitleLink ' )
if stl :
stl = get_text ( stl )
if try_get (
vpir ,
lambda x : x [ ' superTitleIcon ' ] [ ' iconType ' ] ) == ' LOCATION_PIN ' :
info [ ' location ' ] = stl
else :
2023-02-09 03:16:51 +09:00
# •? doesn't match, but [•]? does; \xa0 = non-breaking space
mobj = re . search ( r ' ([^ \ xa0 \ s].*?)[ \ xa0 \ s]*S( \ d+)[ \ xa0 \ s]*[•]?[ \ xa0 \ s]*E( \ d+) ' , stl )
2021-02-01 22:30:59 +09:00
if mobj :
info . update ( {
' series ' : mobj . group ( 1 ) ,
' season_number ' : int ( mobj . group ( 2 ) ) ,
' episode_number ' : int ( mobj . group ( 3 ) ) ,
} )
for tlb in ( try_get (
vpir ,
lambda x : x [ ' videoActions ' ] [ ' menuRenderer ' ] [ ' topLevelButtons ' ] ,
list ) or [ ] ) :
2023-02-09 03:16:51 +09:00
tbr = traverse_obj ( tlb , ( ' segmentedLikeDislikeButtonRenderer ' , ' likeButton ' , ' toggleButtonRenderer ' ) , ' toggleButtonRenderer ' ) or { }
2021-02-01 22:30:59 +09:00
for getter , regex in [ (
lambda x : x [ ' defaultText ' ] [ ' accessibility ' ] [ ' accessibilityData ' ] ,
r ' (?P<count>[ \ d,]+) \ s*(?P<type>(?:dis)?like) ' ) , ( [
lambda x : x [ ' accessibility ' ] ,
lambda x : x [ ' accessibilityData ' ] [ ' accessibilityData ' ] ,
] , r ' (?P<type>(?:dis)?like) this video along with (?P<count>[ \ d,]+) other people ' ) ] :
label = ( try_get ( tbr , getter , dict ) or { } ) . get ( ' label ' )
if label :
mobj = re . match ( regex , label )
if mobj :
info [ mobj . group ( ' type ' ) + ' _count ' ] = str_to_int ( mobj . group ( ' count ' ) )
break
sbr_tooltip = try_get (
vpir , lambda x : x [ ' sentimentBar ' ] [ ' sentimentBarRenderer ' ] [ ' tooltip ' ] )
if sbr_tooltip :
2022-01-31 13:28:54 +09:00
# however dislike_count was hidden by YT, as if there could ever be dislikable content on YT
2021-02-01 22:30:59 +09:00
like_count , dislike_count = sbr_tooltip . split ( ' / ' )
info . update ( {
' like_count ' : str_to_int ( like_count ) ,
' dislike_count ' : str_to_int ( dislike_count ) ,
} )
2024-01-16 03:38:43 +09:00
else :
info [ ' like_count ' ] = traverse_obj ( vpir , (
' videoActions ' , ' menuRenderer ' , ' topLevelButtons ' , Ellipsis ,
' segmentedLikeDislikeButtonViewModel ' , ' likeButtonViewModel ' , ' likeButtonViewModel ' ,
' toggleButtonViewModel ' , ' toggleButtonViewModel ' , ' defaultButtonViewModel ' ,
' buttonViewModel ' , ( ( ' title ' , ( ' accessibilityText ' , T ( lambda s : s . split ( ) ) , Ellipsis ) ) ) , T ( parse_count ) ) ,
get_all = False )
2021-02-01 22:30:59 +09:00
vsir = content . get ( ' videoSecondaryInfoRenderer ' )
if vsir :
rows = try_get (
vsir ,
lambda x : x [ ' metadataRowContainer ' ] [ ' metadataRowContainerRenderer ' ] [ ' rows ' ] ,
list ) or [ ]
multiple_songs = False
for row in rows :
if try_get ( row , lambda x : x [ ' metadataRowRenderer ' ] [ ' hasDividerLine ' ] ) is True :
multiple_songs = True
break
for row in rows :
mrr = row . get ( ' metadataRowRenderer ' ) or { }
mrr_title = mrr . get ( ' title ' )
if not mrr_title :
continue
mrr_title = get_text ( mrr [ ' title ' ] )
mrr_contents_text = get_text ( mrr [ ' contents ' ] [ 0 ] )
if mrr_title == ' License ' :
info [ ' license ' ] = mrr_contents_text
elif not multiple_songs :
if mrr_title == ' Album ' :
info [ ' album ' ] = mrr_contents_text
elif mrr_title == ' Artist ' :
info [ ' artist ' ] = mrr_contents_text
elif mrr_title == ' Song ' :
info [ ' track ' ] = mrr_contents_text
2023-02-09 03:16:51 +09:00
# this is not extraction but spelunking!
carousel_lockups = traverse_obj (
initial_data ,
( ' engagementPanels ' , Ellipsis , ' engagementPanelSectionListRenderer ' ,
' content ' , ' structuredDescriptionContentRenderer ' , ' items ' , Ellipsis ,
' videoDescriptionMusicSectionRenderer ' , ' carouselLockups ' , Ellipsis ) ,
expected_type = dict ) or [ ]
# try to reproduce logic from metadataRowContainerRenderer above (if it still is)
fields = ( ( ' ALBUM ' , ' album ' ) , ( ' ARTIST ' , ' artist ' ) , ( ' SONG ' , ' track ' ) , ( ' LICENSES ' , ' license ' ) )
# multiple_songs ?
if len ( carousel_lockups ) > 1 :
fields = fields [ - 1 : ]
for info_row in traverse_obj (
carousel_lockups ,
( 0 , ' carouselLockupRenderer ' , ' infoRows ' , Ellipsis , ' infoRowRenderer ' ) ,
expected_type = dict ) :
row_title = traverse_obj ( info_row , ( ' title ' , ' simpleText ' ) )
row_text = traverse_obj ( info_row , ' defaultMetadata ' , ' expandedMetadata ' , expected_type = get_text )
if not row_text :
continue
for name , field in fields :
if name == row_title and not info . get ( field ) :
info [ field ] = row_text
2021-02-01 22:30:59 +09:00
for s_k , d_k in [ ( ' artist ' , ' creator ' ) , ( ' track ' , ' alt_title ' ) ] :
v = info . get ( s_k )
if v :
info [ d_k ] = v
self . mark_watched ( video_id , player_response )
2023-02-24 11:48:37 +09:00
return merge_dicts (
info , {
' uploader_id ' : self . _extract_uploader_id ( owner_profile_url ) ,
' uploader_url ' : owner_profile_url ,
' channel_id ' : channel_id ,
' channel_url ' : channel_id and self . _yt_urljoin ( ' /channel/ ' + channel_id ) ,
' channel ' : info [ ' uploader ' ] ,
} )
2013-06-24 02:58:33 +09:00
2014-11-24 04:41:03 +09:00
2020-11-12 08:16:37 +09:00
class YoutubeTabIE ( YoutubeBaseInfoExtractor ) :
IE_DESC = ' YouTube.com tab '
2020-11-24 02:10:25 +09:00
_VALID_URL = r ''' (?x)
https ? : / /
( ? : \w + \. ) ?
( ? :
youtube ( ? : kids ) ? \. com |
invidio \. us
) /
( ? :
2021-04-01 19:50:30 +09:00
( ? : channel | c | user | feed | hashtag ) / |
2020-12-26 21:59:57 +09:00
( ? : playlist | watch ) \? . * ? \blist = |
2022-01-30 04:11:47 +09:00
( ? ! ( ? : watch | embed | v | e | results ) \b )
2020-11-24 02:10:25 +09:00
)
( ? P < id > [ ^ / ? \#&]+)
'''
2020-11-12 08:16:37 +09:00
IE_NAME = ' youtube:tab '
2014-09-13 14:19:20 +09:00
_TESTS = [ {
2023-02-03 02:26:31 +09:00
# Shorts
' url ' : ' https://www.youtube.com/@SuperCooperShorts/shorts ' ,
' playlist_mincount ' : 5 ,
' info_dict ' : {
' description ' : ' Short clips from Super Cooper Sundays! ' ,
' id ' : ' UCKMA8kHZ8bPYpnMNaUSxfEQ ' ,
' title ' : ' Super Cooper Shorts - Shorts ' ,
2023-02-24 11:48:37 +09:00
' uploader ' : ' Super Cooper Shorts ' ,
' uploader_id ' : ' @SuperCooperShorts ' ,
2023-02-03 02:26:31 +09:00
}
} , {
# Channel that does not have a Shorts tab. Test should just download videos on Home tab instead
' url ' : ' https://www.youtube.com/@emergencyawesome/shorts ' ,
' info_dict ' : {
' description ' : ' md5:592c080c06fef4de3c902c4a8eecd850 ' ,
' id ' : ' UCDiFRMQWpcp8_KD4vwIVicw ' ,
' title ' : ' Emergency Awesome - Home ' ,
} ,
' playlist_mincount ' : 5 ,
2023-02-24 11:48:37 +09:00
' skip ' : ' new test page needed to replace `Emergency Awesome - Shorts` ' ,
2023-02-03 02:26:31 +09:00
} , {
2020-11-12 08:16:37 +09:00
# playlists, multipage
' url ' : ' https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid ' ,
' playlist_mincount ' : 94 ,
' info_dict ' : {
' id ' : ' UCqj7Cz7revf5maW9g5pgNcg ' ,
2024-01-16 03:38:43 +09:00
' title ' : r ' re:Igor Kleiner(?: Ph \ .D \ .)? - Playlists ' ,
2020-11-18 05:32:42 +09:00
' description ' : ' md5:be97ee0f14ee314f1f002cf187166ee2 ' ,
2023-02-24 11:48:37 +09:00
' uploader ' : ' Igor Kleiner ' ,
' uploader_id ' : ' @IgorDataScience ' ,
2020-11-12 08:16:37 +09:00
} ,
} , {
# playlists, multipage, different order
' url ' : ' https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd ' ,
' playlist_mincount ' : 94 ,
' info_dict ' : {
' id ' : ' UCqj7Cz7revf5maW9g5pgNcg ' ,
2024-01-16 03:38:43 +09:00
' title ' : r ' re:Igor Kleiner(?: Ph \ .D \ .)? - Playlists ' ,
2020-11-18 05:32:42 +09:00
' description ' : ' md5:be97ee0f14ee314f1f002cf187166ee2 ' ,
2023-02-24 11:48:37 +09:00
' uploader ' : ' Igor Kleiner ' ,
' uploader_id ' : ' @IgorDataScience ' ,
2020-11-12 08:16:37 +09:00
} ,
2021-04-10 22:11:35 +09:00
} , {
# playlists, series
' url ' : ' https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3 ' ,
' playlist_mincount ' : 5 ,
' info_dict ' : {
' id ' : ' UCYO_jab_esuFRV4b17AJtAw ' ,
' title ' : ' 3Blue1Brown - Playlists ' ,
' description ' : ' md5:e1384e8a133307dd10edee76e875d62f ' ,
2023-02-24 11:48:37 +09:00
' uploader ' : ' 3Blue1Brown ' ,
' uploader_id ' : ' @3blue1brown ' ,
2021-04-10 22:11:35 +09:00
} ,
2020-11-12 08:16:37 +09:00
} , {
# playlists, singlepage
' url ' : ' https://www.youtube.com/user/ThirstForScience/playlists ' ,
' playlist_mincount ' : 4 ,
' info_dict ' : {
2020-11-18 05:32:42 +09:00
' id ' : ' UCAEtajcuhQ6an9WEzY9LEMQ ' ,
' title ' : ' ThirstForScience - Playlists ' ,
' description ' : ' md5:609399d937ea957b0f53cbffb747a14c ' ,
2023-02-24 11:48:37 +09:00
' uploader ' : ' ThirstForScience ' ,
' uploader_id ' : ' @ThirstForScience ' ,
2020-11-12 08:16:37 +09:00
}
} , {
' url ' : ' https://www.youtube.com/c/ChristophLaimer/playlists ' ,
' only_matching ' : True ,
} , {
# basic, single video playlist
2020-03-03 03:46:00 +09:00
' url ' : ' https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc ' ,
2014-09-13 14:19:20 +09:00
' info_dict ' : {
2020-03-03 03:46:00 +09:00
' id ' : ' PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc ' ,
' title ' : ' youtube-dl public playlist ' ,
2023-02-24 11:48:37 +09:00
' uploader ' : ' Sergey M. ' ,
' uploader_id ' : ' @sergeym.6173 ' ,
' channel_id ' : ' UCmlqkdCBesrv2Lak1mF_MxA ' ,
2014-09-13 14:19:20 +09:00
} ,
2020-03-03 03:46:00 +09:00
' playlist_count ' : 1 ,
2014-09-13 14:31:48 +09:00
} , {
2020-11-12 08:16:37 +09:00
# empty playlist
2020-03-03 03:46:00 +09:00
' url ' : ' https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf ' ,
2014-09-13 14:31:48 +09:00
' info_dict ' : {
2020-03-03 03:46:00 +09:00
' id ' : ' PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf ' ,
' title ' : ' youtube-dl empty playlist ' ,
2023-02-24 11:48:37 +09:00
' uploader ' : ' Sergey M. ' ,
' uploader_id ' : ' @sergeym.6173 ' ,
' channel_id ' : ' UCmlqkdCBesrv2Lak1mF_MxA ' ,
2014-09-13 14:31:48 +09:00
} ,
' playlist_count ' : 0 ,
2020-11-12 08:16:37 +09:00
} , {
# Home tab
' url ' : ' https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured ' ,
' info_dict ' : {
' id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
' title ' : ' lex will - Home ' ,
2020-11-18 05:32:42 +09:00
' description ' : ' md5:2163c5d0ff54ed5f598d6a7e6211e488 ' ,
2023-02-24 11:48:37 +09:00
' uploader ' : ' lex will ' ,
' uploader_id ' : ' @lexwill718 ' ,
2020-11-12 08:16:37 +09:00
} ,
' playlist_mincount ' : 2 ,
} , {
# Videos tab
' url ' : ' https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos ' ,
' info_dict ' : {
' id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
' title ' : ' lex will - Videos ' ,
2020-11-18 05:32:42 +09:00
' description ' : ' md5:2163c5d0ff54ed5f598d6a7e6211e488 ' ,
2023-02-24 11:48:37 +09:00
' uploader ' : ' lex will ' ,
' uploader_id ' : ' @lexwill718 ' ,
2020-11-12 08:16:37 +09:00
} ,
' playlist_mincount ' : 975 ,
} , {
# Videos tab, sorted by popular
' url ' : ' https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid ' ,
' info_dict ' : {
' id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
' title ' : ' lex will - Videos ' ,
2020-11-18 05:32:42 +09:00
' description ' : ' md5:2163c5d0ff54ed5f598d6a7e6211e488 ' ,
2023-02-24 11:48:37 +09:00
' uploader ' : ' lex will ' ,
' uploader_id ' : ' @lexwill718 ' ,
2020-11-12 08:16:37 +09:00
} ,
' playlist_mincount ' : 199 ,
} , {
# Playlists tab
' url ' : ' https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists ' ,
' info_dict ' : {
' id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
' title ' : ' lex will - Playlists ' ,
2020-11-18 05:32:42 +09:00
' description ' : ' md5:2163c5d0ff54ed5f598d6a7e6211e488 ' ,
2023-02-24 11:48:37 +09:00
' uploader ' : ' lex will ' ,
' uploader_id ' : ' @lexwill718 ' ,
2020-11-12 08:16:37 +09:00
} ,
' playlist_mincount ' : 17 ,
} , {
# Community tab
' url ' : ' https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community ' ,
' info_dict ' : {
' id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
' title ' : ' lex will - Community ' ,
2020-11-18 05:32:42 +09:00
' description ' : ' md5:2163c5d0ff54ed5f598d6a7e6211e488 ' ,
2023-02-24 11:48:37 +09:00
' uploader ' : ' lex will ' ,
' uploader_id ' : ' @lexwill718 ' ,
2020-11-12 08:16:37 +09:00
} ,
' playlist_mincount ' : 18 ,
} , {
# Channels tab
' url ' : ' https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels ' ,
' info_dict ' : {
' id ' : ' UCKfVa3S1e4PHvxWcwyMMg8w ' ,
2024-01-16 03:38:43 +09:00
' title ' : r ' re:lex will - (?:Home|Channels) ' ,
2020-11-18 05:32:42 +09:00
' description ' : ' md5:2163c5d0ff54ed5f598d6a7e6211e488 ' ,
2023-02-24 11:48:37 +09:00
' uploader ' : ' lex will ' ,
' uploader_id ' : ' @lexwill718 ' ,
2020-11-12 08:16:37 +09:00
} ,
2023-02-24 11:48:37 +09:00
' playlist_mincount ' : 75 ,
2023-04-24 06:58:35 +09:00
} , {
# Releases tab
' url ' : ' https://www.youtube.com/@daftpunk/releases ' ,
' info_dict ' : {
' id ' : ' UC_kRDKYrUlrbtrSiyu5Tflg ' ,
' title ' : ' Daft Punk - Releases ' ,
' description ' : ' Daft Punk (1993 - 2021) - Official YouTube Channel ' ,
' uploader_id ' : ' @daftpunk ' ,
' uploader ' : ' Daft Punk ' ,
} ,
' playlist_mincount ' : 36 ,
2020-11-12 08:16:37 +09:00
} , {
2020-11-21 01:34:46 +09:00
' url ' : ' https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA ' ,
2020-11-12 08:16:37 +09:00
' only_matching ' : True ,
} , {
2020-11-21 01:34:46 +09:00
' url ' : ' https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA ' ,
2020-11-12 08:16:37 +09:00
' only_matching ' : True ,
} , {
2020-11-21 01:34:46 +09:00
' url ' : ' https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA ' ,
2020-11-12 08:16:37 +09:00
' only_matching ' : True ,
2014-09-13 14:31:48 +09:00
} , {
' note ' : ' Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list. ' ,
' url ' : ' https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC ' ,
' info_dict ' : {
' title ' : ' 29C3: Not my department ' ,
2015-02-01 23:33:32 +09:00
' id ' : ' PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC ' ,
2019-07-21 02:01:46 +09:00
' uploader ' : ' Christiaan008 ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @ChRiStIaAn008 ' ,
' channel_id ' : ' UCEPzS1rYsrkqzSLNp76nrcg ' ,
2014-09-13 14:31:48 +09:00
} ,
2020-03-03 03:46:00 +09:00
' playlist_count ' : 96 ,
2014-09-13 14:31:48 +09:00
} , {
' note ' : ' Large playlist ' ,
' url ' : ' https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q ' ,
' info_dict ' : {
' title ' : ' Uploads from Cauchemar ' ,
2015-02-01 23:33:32 +09:00
' id ' : ' UUBABnxM4Ar9ten8Mdjj1j0Q ' ,
2019-07-21 02:01:46 +09:00
' uploader ' : ' Cauchemar ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @Cauchemar89 ' ,
' channel_id ' : ' UCBABnxM4Ar9ten8Mdjj1j0Q ' ,
2014-09-13 14:31:48 +09:00
} ,
2020-11-12 08:16:37 +09:00
' playlist_mincount ' : 1123 ,
} , {
# even larger playlist, 8832 videos
' url ' : ' http://www.youtube.com/user/NASAgovVideo/videos ' ,
' only_matching ' : True ,
} , {
' note ' : ' Buggy playlist: the webpage has a " Load more " button but it doesn \' t have more videos ' ,
' url ' : ' https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA ' ,
' info_dict ' : {
' title ' : ' Uploads from Interstellar Movie ' ,
' id ' : ' UUXw-G3eDE9trcvY2sBMM_aA ' ,
' uploader ' : ' Interstellar Movie ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @InterstellarMovie ' ,
' channel_id ' : ' UCXw-G3eDE9trcvY2sBMM_aA ' ,
2020-11-12 08:16:37 +09:00
} ,
' playlist_mincount ' : 21 ,
} , {
# https://github.com/ytdl-org/youtube-dl/issues/21844
' url ' : ' https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba ' ,
' info_dict ' : {
' title ' : ' Data Analysis with Dr Mike Pound ' ,
' id ' : ' PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba ' ,
' uploader ' : ' Computerphile ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @Computerphile ' ,
' channel_id ' : ' UC9-y-6csu5WGm29I7JiwpnA ' ,
2020-11-12 08:16:37 +09:00
} ,
' playlist_mincount ' : 11 ,
} , {
2020-11-21 01:34:46 +09:00
' url ' : ' https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc ' ,
2020-11-12 08:16:37 +09:00
' only_matching ' : True ,
2020-11-18 05:32:42 +09:00
} , {
# Playlist URL that does not actually serve a playlist
' url ' : ' https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4 ' ,
' info_dict ' : {
' id ' : ' FqZTN594JQw ' ,
' ext ' : ' webm ' ,
' title ' : " Smiley ' s People 01 detective, Adventure Series, Action " ,
' uploader ' : ' STREEM ' ,
' uploader_id ' : ' UCyPhqAZgwYWZfxElWVbVJng ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/channel/UCyPhqAZgwYWZfxElWVbVJng ' ,
' upload_date ' : ' 20150526 ' ,
' license ' : ' Standard YouTube License ' ,
' description ' : ' md5:507cdcb5a49ac0da37a920ece610be80 ' ,
' categories ' : [ ' People & Blogs ' ] ,
' tags ' : list ,
' view_count ' : int ,
' like_count ' : int ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
' skip ' : ' This video is not available. ' ,
' add_ie ' : [ YoutubeIE . ie_key ( ) ] ,
} , {
' url ' : ' https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM ' ,
' only_matching ' : True ,
2020-11-21 22:48:13 +09:00
} , {
' url ' : ' https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live ' ,
' info_dict ' : {
2023-02-24 11:48:37 +09:00
' id ' : r ' re:[ \ da-zA-Z_-] { 8,} ' ,
2020-11-21 22:48:13 +09:00
' ext ' : ' mp4 ' ,
2023-02-24 11:48:37 +09:00
' title ' : r ' re:(?s)[A-Z]. { 20,} ' ,
2020-11-21 22:48:13 +09:00
' uploader ' : ' Sky News ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @SkyNews ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/@SkyNews ' ,
' upload_date ' : r ' re: \ d {8} ' ,
' description ' : r ' re:(?s)(?:.* \ n)+SUBSCRIBE to our YouTube channel for more videos: http://www \ .youtube \ .com/skynews * \ n.* ' ,
2020-11-21 22:48:13 +09:00
' categories ' : [ ' News & Politics ' ] ,
' tags ' : list ,
' like_count ' : int ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} , {
' url ' : ' https://www.youtube.com/user/TheYoungTurks/live ' ,
' info_dict ' : {
' id ' : ' a48o2S1cPoo ' ,
' ext ' : ' mp4 ' ,
' title ' : ' The Young Turks - Live Main Show ' ,
' uploader ' : ' The Young Turks ' ,
' uploader_id ' : ' TheYoungTurks ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/user/TheYoungTurks ' ,
' upload_date ' : ' 20150715 ' ,
' license ' : ' Standard YouTube License ' ,
' description ' : ' md5:438179573adcdff3c97ebb1ee632b891 ' ,
' categories ' : [ ' News & Politics ' ] ,
' tags ' : [ ' Cenk Uygur (TV Program Creator) ' , ' The Young Turks (Award-Winning Work) ' , ' Talk Show (TV Genre) ' ] ,
' like_count ' : int ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/c/CommanderVideoHq/live ' ,
' only_matching ' : True ,
2020-11-24 02:10:25 +09:00
} , {
' url ' : ' https://www.youtube.com/feed/trending ' ,
' only_matching ' : True ,
} , {
# needs auth
' url ' : ' https://www.youtube.com/feed/library ' ,
' only_matching ' : True ,
} , {
# needs auth
' url ' : ' https://www.youtube.com/feed/history ' ,
' only_matching ' : True ,
} , {
# needs auth
' url ' : ' https://www.youtube.com/feed/subscriptions ' ,
' only_matching ' : True ,
} , {
# needs auth
' url ' : ' https://www.youtube.com/feed/watch_later ' ,
' only_matching ' : True ,
} , {
# no longer available?
' url ' : ' https://www.youtube.com/feed/recommended ' ,
' only_matching ' : True ,
2020-12-09 05:40:02 +09:00
} , {
# inline playlist with not always working continuations
' url ' : ' https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C ' ,
' only_matching ' : True ,
2020-12-26 21:59:57 +09:00
} , {
' url ' : ' https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8 ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/course ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/zsecurity ' ,
' only_matching ' : True ,
} , {
' url ' : ' http://www.youtube.com/NASAgovVideo/videos ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/TheYoungTurks/live ' ,
' only_matching ' : True ,
2021-04-01 19:50:30 +09:00
} , {
' url ' : ' https://www.youtube.com/hashtag/cctv9 ' ,
' info_dict ' : {
' id ' : ' cctv9 ' ,
' title ' : ' #cctv9 ' ,
} ,
' playlist_mincount ' : 350 ,
2021-04-17 02:07:32 +09:00
} , {
' url ' : ' https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU ' ,
' only_matching ' : True ,
2022-01-30 04:54:09 +09:00
} , {
' note ' : ' Search tab ' ,
' url ' : ' https://www.youtube.com/c/3blue1brown/search?query=linear %20a lgebra ' ,
2023-02-24 11:48:37 +09:00
' playlist_mincount ' : 20 ,
2022-01-30 04:54:09 +09:00
' info_dict ' : {
' id ' : ' UCYO_jab_esuFRV4b17AJtAw ' ,
' title ' : ' 3Blue1Brown - Search - linear algebra ' ,
' description ' : ' md5:e1384e8a133307dd10edee76e875d62f ' ,
' uploader ' : ' 3Blue1Brown ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @3blue1brown ' ,
' channel_id ' : ' UCYO_jab_esuFRV4b17AJtAw ' ,
2022-01-30 04:54:09 +09:00
}
2020-12-26 21:59:57 +09:00
} ]
2020-11-12 08:16:37 +09:00
2020-12-29 04:19:43 +09:00
@classmethod
def suitable ( cls , url ) :
2023-02-24 11:48:37 +09:00
return not YoutubeIE . suitable ( url ) and super (
2020-12-29 04:19:43 +09:00
YoutubeTabIE , cls ) . suitable ( url )
2020-11-12 08:16:37 +09:00
@staticmethod
def _extract_grid_item_renderer ( item ) :
2021-04-17 03:05:44 +09:00
assert isinstance ( item , dict )
for key , renderer in item . items ( ) :
if not key . startswith ( ' grid ' ) or not key . endswith ( ' Renderer ' ) :
continue
if not isinstance ( renderer , dict ) :
continue
return renderer
2020-11-12 08:16:37 +09:00
2023-04-24 06:58:35 +09:00
@staticmethod
def _get_text ( r , k ) :
return traverse_obj (
r , ( k , ' runs ' , 0 , ' text ' ) , ( k , ' simpleText ' ) ,
expected_type = txt_or_none )
2020-11-12 08:16:37 +09:00
def _grid_entries ( self , grid_renderer ) :
for item in grid_renderer [ ' items ' ] :
if not isinstance ( item , dict ) :
continue
renderer = self . _extract_grid_item_renderer ( item )
if not isinstance ( renderer , dict ) :
continue
2023-04-24 06:58:35 +09:00
title = self . _get_text ( renderer , ' title ' )
2020-11-12 08:16:37 +09:00
# playlist
playlist_id = renderer . get ( ' playlistId ' )
if playlist_id :
yield self . url_result (
' https://www.youtube.com/playlist?list= %s ' % playlist_id ,
ie = YoutubeTabIE . ie_key ( ) , video_id = playlist_id ,
video_title = title )
2021-04-17 03:05:44 +09:00
continue
2020-11-12 08:16:37 +09:00
# video
video_id = renderer . get ( ' videoId ' )
if video_id :
yield self . _extract_video ( renderer )
2021-04-17 03:05:44 +09:00
continue
2020-11-12 08:16:37 +09:00
# channel
channel_id = renderer . get ( ' channelId ' )
if channel_id :
2023-04-24 06:58:35 +09:00
title = self . _get_text ( renderer , ' title ' )
2020-11-12 08:16:37 +09:00
yield self . url_result (
' https://www.youtube.com/channel/ %s ' % channel_id ,
ie = YoutubeTabIE . ie_key ( ) , video_title = title )
2021-04-17 03:05:44 +09:00
continue
# generic endpoint URL support
ep_url = urljoin ( ' https://www.youtube.com/ ' , try_get (
renderer , lambda x : x [ ' navigationEndpoint ' ] [ ' commandMetadata ' ] [ ' webCommandMetadata ' ] [ ' url ' ] ,
compat_str ) )
if ep_url :
for ie in ( YoutubeTabIE , YoutubePlaylistIE , YoutubeIE ) :
if ie . suitable ( ep_url ) :
2021-04-10 22:11:35 +09:00
yield self . url_result (
2021-04-17 03:05:44 +09:00
ep_url , ie = ie . ie_key ( ) , video_id = ie . _match_id ( ep_url ) , video_title = title )
break
2020-11-12 08:16:37 +09:00
2020-11-24 02:10:25 +09:00
def _shelf_entries_from_content ( self , shelf_renderer ) :
content = shelf_renderer . get ( ' content ' )
if not isinstance ( content , dict ) :
2020-11-12 08:16:37 +09:00
return
2020-11-24 02:10:25 +09:00
renderer = content . get ( ' gridRenderer ' )
if renderer :
# TODO: add support for nested playlists so each shelf is processed
# as separate playlist
# TODO: this includes only first N items
for entry in self . _grid_entries ( renderer ) :
yield entry
renderer = content . get ( ' horizontalListRenderer ' )
if renderer :
# TODO
pass
2020-11-12 08:16:37 +09:00
2020-12-01 06:44:10 +09:00
def _shelf_entries ( self , shelf_renderer , skip_channels = False ) :
2020-11-12 08:16:37 +09:00
ep = try_get (
shelf_renderer , lambda x : x [ ' endpoint ' ] [ ' commandMetadata ' ] [ ' webCommandMetadata ' ] [ ' url ' ] ,
compat_str )
shelf_url = urljoin ( ' https://www.youtube.com ' , ep )
2020-11-24 02:10:25 +09:00
if shelf_url :
2020-12-01 06:44:10 +09:00
# Skipping links to another channels, note that checking for
# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
# will not work
if skip_channels and ' /channels? ' in shelf_url :
return
2020-11-24 02:10:25 +09:00
title = try_get (
shelf_renderer , lambda x : x [ ' title ' ] [ ' runs ' ] [ 0 ] [ ' text ' ] , compat_str )
yield self . url_result ( shelf_url , video_title = title )
# Shelf may not contain shelf URL, fallback to extraction from content
for entry in self . _shelf_entries_from_content ( shelf_renderer ) :
yield entry
2020-11-12 08:16:37 +09:00
def _playlist_entries ( self , video_list_renderer ) :
for content in video_list_renderer [ ' contents ' ] :
if not isinstance ( content , dict ) :
continue
2020-11-18 05:32:42 +09:00
renderer = content . get ( ' playlistVideoRenderer ' ) or content . get ( ' playlistPanelVideoRenderer ' )
2020-11-12 08:16:37 +09:00
if not isinstance ( renderer , dict ) :
continue
video_id = renderer . get ( ' videoId ' )
if not video_id :
continue
yield self . _extract_video ( renderer )
def _video_entry ( self , video_renderer ) :
video_id = video_renderer . get ( ' videoId ' )
if video_id :
return self . _extract_video ( video_renderer )
def _post_thread_entries ( self , post_thread_renderer ) :
post_renderer = try_get (
post_thread_renderer , lambda x : x [ ' post ' ] [ ' backstagePostRenderer ' ] , dict )
if not post_renderer :
return
# video attachment
video_renderer = try_get (
post_renderer , lambda x : x [ ' backstageAttachment ' ] [ ' videoRenderer ' ] , dict )
video_id = None
if video_renderer :
entry = self . _video_entry ( video_renderer )
if entry :
yield entry
# inline video links
runs = try_get ( post_renderer , lambda x : x [ ' contentText ' ] [ ' runs ' ] , list ) or [ ]
for run in runs :
if not isinstance ( run , dict ) :
continue
ep_url = try_get (
run , lambda x : x [ ' navigationEndpoint ' ] [ ' urlEndpoint ' ] [ ' url ' ] , compat_str )
if not ep_url :
continue
if not YoutubeIE . suitable ( ep_url ) :
continue
ep_video_id = YoutubeIE . _match_id ( ep_url )
if video_id == ep_video_id :
continue
yield self . url_result ( ep_url , ie = YoutubeIE . ie_key ( ) , video_id = video_id )
def _post_thread_continuation_entries ( self , post_thread_continuation ) :
contents = post_thread_continuation . get ( ' contents ' )
if not isinstance ( contents , list ) :
return
for content in contents :
renderer = content . get ( ' backstagePostThreadRenderer ' )
if not isinstance ( renderer , dict ) :
continue
for entry in self . _post_thread_entries ( renderer ) :
yield entry
2021-04-01 19:50:30 +09:00
def _rich_grid_entries ( self , contents ) :
for content in contents :
2023-04-24 06:58:35 +09:00
content = traverse_obj (
content , ( ' richItemRenderer ' , ' content ' ) ,
expected_type = dict ) or { }
video_renderer = traverse_obj (
content , ' videoRenderer ' , ' reelItemRenderer ' ,
expected_type = dict )
2021-04-01 19:50:30 +09:00
if video_renderer :
entry = self . _video_entry ( video_renderer )
if entry :
yield entry
2023-04-24 06:58:35 +09:00
# playlist
renderer = traverse_obj (
content , ' playlistRenderer ' , expected_type = dict ) or { }
title = self . _get_text ( renderer , ' title ' )
playlist_id = renderer . get ( ' playlistId ' )
if playlist_id :
yield self . url_result (
' https://www.youtube.com/playlist?list= %s ' % playlist_id ,
ie = YoutubeTabIE . ie_key ( ) , video_id = playlist_id ,
video_title = title )
2021-04-01 19:50:30 +09:00
2020-12-09 06:02:50 +09:00
@staticmethod
def _build_continuation_query ( continuation , ctp = None ) :
query = {
' ctoken ' : continuation ,
' continuation ' : continuation ,
}
if ctp :
query [ ' itct ' ] = ctp
return query
2020-11-12 08:16:37 +09:00
@staticmethod
def _extract_next_continuation_data ( renderer ) :
next_continuation = try_get (
renderer , lambda x : x [ ' continuations ' ] [ 0 ] [ ' nextContinuationData ' ] , dict )
if not next_continuation :
return
continuation = next_continuation . get ( ' continuation ' )
if not continuation :
return
ctp = next_continuation . get ( ' clickTrackingParams ' )
2020-12-09 06:02:50 +09:00
return YoutubeTabIE . _build_continuation_query ( continuation , ctp )
2020-11-12 08:16:37 +09:00
@classmethod
def _extract_continuation ( cls , renderer ) :
next_continuation = cls . _extract_next_continuation_data ( renderer )
if next_continuation :
return next_continuation
2021-02-11 00:28:58 +09:00
contents = [ ]
for key in ( ' contents ' , ' items ' ) :
contents . extend ( try_get ( renderer , lambda x : x [ key ] , list ) or [ ] )
2020-11-12 08:16:37 +09:00
for content in contents :
if not isinstance ( content , dict ) :
continue
continuation_ep = try_get (
content , lambda x : x [ ' continuationItemRenderer ' ] [ ' continuationEndpoint ' ] ,
dict )
if not continuation_ep :
continue
continuation = try_get (
continuation_ep , lambda x : x [ ' continuationCommand ' ] [ ' token ' ] , compat_str )
if not continuation :
continue
ctp = continuation_ep . get ( ' clickTrackingParams ' )
2020-12-09 06:02:50 +09:00
return YoutubeTabIE . _build_continuation_query ( continuation , ctp )
2020-11-12 08:16:37 +09:00
2021-04-09 04:09:52 +09:00
def _entries ( self , tab , item_id , webpage ) :
2020-12-01 06:44:10 +09:00
tab_content = try_get ( tab , lambda x : x [ ' content ' ] , dict )
if not tab_content :
return
slr_renderer = try_get ( tab_content , lambda x : x [ ' sectionListRenderer ' ] , dict )
2021-04-01 19:50:30 +09:00
if slr_renderer :
is_channels_tab = tab . get ( ' title ' ) == ' Channels '
continuation = None
slr_contents = try_get ( slr_renderer , lambda x : x [ ' contents ' ] , list ) or [ ]
for slr_content in slr_contents :
if not isinstance ( slr_content , dict ) :
2020-11-12 08:16:37 +09:00
continue
2021-04-01 19:50:30 +09:00
is_renderer = try_get ( slr_content , lambda x : x [ ' itemSectionRenderer ' ] , dict )
if not is_renderer :
2020-11-12 08:16:37 +09:00
continue
2021-04-01 19:50:30 +09:00
isr_contents = try_get ( is_renderer , lambda x : x [ ' contents ' ] , list ) or [ ]
for isr_content in isr_contents :
if not isinstance ( isr_content , dict ) :
continue
renderer = isr_content . get ( ' playlistVideoListRenderer ' )
if renderer :
for entry in self . _playlist_entries ( renderer ) :
yield entry
continuation = self . _extract_continuation ( renderer )
continue
renderer = isr_content . get ( ' gridRenderer ' )
if renderer :
for entry in self . _grid_entries ( renderer ) :
yield entry
continuation = self . _extract_continuation ( renderer )
continue
renderer = isr_content . get ( ' shelfRenderer ' )
if renderer :
for entry in self . _shelf_entries ( renderer , not is_channels_tab ) :
yield entry
continue
renderer = isr_content . get ( ' backstagePostThreadRenderer ' )
if renderer :
for entry in self . _post_thread_entries ( renderer ) :
yield entry
continuation = self . _extract_continuation ( renderer )
continue
renderer = isr_content . get ( ' videoRenderer ' )
if renderer :
entry = self . _video_entry ( renderer )
if entry :
yield entry
if not continuation :
continuation = self . _extract_continuation ( is_renderer )
2020-11-12 08:16:37 +09:00
if not continuation :
2021-04-01 19:50:30 +09:00
continuation = self . _extract_continuation ( slr_renderer )
else :
rich_grid_renderer = tab_content . get ( ' richGridRenderer ' )
if not rich_grid_renderer :
return
for entry in self . _rich_grid_entries ( rich_grid_renderer . get ( ' contents ' ) or [ ] ) :
yield entry
2023-04-24 06:58:35 +09:00
2021-04-01 19:50:30 +09:00
continuation = self . _extract_continuation ( rich_grid_renderer )
2020-11-24 02:10:25 +09:00
2021-04-09 04:09:52 +09:00
ytcfg = self . _extract_ytcfg ( item_id , webpage )
client_version = try_get (
ytcfg , lambda x : x [ ' INNERTUBE_CLIENT_VERSION ' ] , compat_str ) or ' 2.20210407.08.00 '
2020-11-17 05:40:02 +09:00
headers = {
' x-youtube-client-name ' : ' 1 ' ,
2021-04-09 04:09:52 +09:00
' x-youtube-client-version ' : client_version ,
2021-03-03 13:42:59 +09:00
' content-type ' : ' application/json ' ,
2020-11-17 05:40:02 +09:00
}
2021-04-09 04:09:52 +09:00
context = try_get ( ytcfg , lambda x : x [ ' INNERTUBE_CONTEXT ' ] , dict ) or {
' client ' : {
' clientName ' : ' WEB ' ,
' clientVersion ' : client_version ,
}
}
visitor_data = try_get ( context , lambda x : x [ ' client ' ] [ ' visitorData ' ] , compat_str )
identity_token = self . _extract_identity_token ( ytcfg , webpage )
2020-11-17 05:40:02 +09:00
if identity_token :
headers [ ' x-youtube-identity-token ' ] = identity_token
2021-03-03 13:42:59 +09:00
data = {
2021-04-09 04:09:52 +09:00
' context ' : context ,
2021-03-03 13:42:59 +09:00
}
2020-11-12 08:16:37 +09:00
for page_num in itertools . count ( 1 ) :
if not continuation :
break
2021-04-09 04:09:52 +09:00
if visitor_data :
headers [ ' x-goog-visitor-id ' ] = visitor_data
2021-03-03 13:42:59 +09:00
data [ ' continuation ' ] = continuation [ ' continuation ' ]
data [ ' clickTracking ' ] = {
' clickTrackingParams ' : continuation [ ' itct ' ]
}
2020-12-29 04:11:48 +09:00
count = 0
retries = 3
while count < = retries :
try :
# Downloading page may result in intermittent 5xx HTTP error
# that is usually worked around with a retry
2021-03-03 13:42:59 +09:00
response = self . _download_json (
' https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8 ' ,
None , ' Downloading page %d %s ' % ( page_num , ' (retry # %d ) ' % count if count else ' ' ) ,
headers = headers , data = json . dumps ( data ) . encode ( ' utf8 ' ) )
2020-12-29 04:11:48 +09:00
break
except ExtractorError as e :
if isinstance ( e . cause , compat_HTTPError ) and e . cause . code in ( 500 , 503 ) :
count + = 1
if count < = retries :
continue
raise
2020-11-12 08:16:37 +09:00
if not response :
break
2021-04-09 04:09:52 +09:00
visitor_data = try_get (
response , lambda x : x [ ' responseContext ' ] [ ' visitorData ' ] , compat_str ) or visitor_data
2020-11-12 08:16:37 +09:00
continuation_contents = try_get (
response , lambda x : x [ ' continuationContents ' ] , dict )
if continuation_contents :
continuation_renderer = continuation_contents . get ( ' playlistVideoListContinuation ' )
if continuation_renderer :
for entry in self . _playlist_entries ( continuation_renderer ) :
yield entry
continuation = self . _extract_continuation ( continuation_renderer )
continue
continuation_renderer = continuation_contents . get ( ' gridContinuation ' )
if continuation_renderer :
for entry in self . _grid_entries ( continuation_renderer ) :
yield entry
continuation = self . _extract_continuation ( continuation_renderer )
continue
continuation_renderer = continuation_contents . get ( ' itemSectionContinuation ' )
if continuation_renderer :
for entry in self . _post_thread_continuation_entries ( continuation_renderer ) :
yield entry
continuation = self . _extract_continuation ( continuation_renderer )
continue
2021-03-25 20:53:18 +09:00
on_response_received = dict_get ( response , ( ' onResponseReceivedActions ' , ' onResponseReceivedEndpoints ' ) )
2020-11-12 08:16:37 +09:00
continuation_items = try_get (
2021-03-25 20:53:18 +09:00
on_response_received , lambda x : x [ 0 ] [ ' appendContinuationItemsAction ' ] [ ' continuationItems ' ] , list )
2020-11-12 08:16:37 +09:00
if continuation_items :
continuation_item = continuation_items [ 0 ]
if not isinstance ( continuation_item , dict ) :
continue
2021-03-25 20:53:18 +09:00
renderer = self . _extract_grid_item_renderer ( continuation_item )
2021-02-11 00:28:58 +09:00
if renderer :
grid_renderer = { ' items ' : continuation_items }
for entry in self . _grid_entries ( grid_renderer ) :
yield entry
continuation = self . _extract_continuation ( grid_renderer )
continue
2020-11-24 02:10:25 +09:00
renderer = continuation_item . get ( ' playlistVideoRenderer ' ) or continuation_item . get ( ' itemSectionRenderer ' )
2020-11-12 08:16:37 +09:00
if renderer :
video_list_renderer = { ' contents ' : continuation_items }
for entry in self . _playlist_entries ( video_list_renderer ) :
yield entry
continuation = self . _extract_continuation ( video_list_renderer )
continue
2021-03-25 20:53:18 +09:00
renderer = continuation_item . get ( ' backstagePostThreadRenderer ' )
if renderer :
continuation_renderer = { ' contents ' : continuation_items }
for entry in self . _post_thread_continuation_entries ( continuation_renderer ) :
yield entry
continuation = self . _extract_continuation ( continuation_renderer )
continue
2021-04-01 19:50:30 +09:00
renderer = continuation_item . get ( ' richItemRenderer ' )
if renderer :
for entry in self . _rich_grid_entries ( continuation_items ) :
yield entry
continuation = self . _extract_continuation ( { ' contents ' : continuation_items } )
continue
2020-11-12 08:16:37 +09:00
break
@staticmethod
def _extract_selected_tab ( tabs ) :
for tab in tabs :
2022-01-30 04:54:09 +09:00
renderer = dict_get ( tab , ( ' tabRenderer ' , ' expandableTabRenderer ' ) ) or { }
if renderer . get ( ' selected ' ) is True :
return renderer
2020-11-12 08:16:37 +09:00
else :
raise ExtractorError ( ' Unable to find selected tab ' )
2023-02-24 11:48:37 +09:00
def _extract_uploader ( self , metadata , data ) :
2020-11-18 05:32:42 +09:00
uploader = { }
2023-02-24 11:48:37 +09:00
renderers = traverse_obj ( data ,
( ' sidebar ' , ' playlistSidebarRenderer ' , ' items ' ) )
uploader [ ' channel_id ' ] = self . _extract_channel_id ( ' ' , metadata = metadata , renderers = renderers )
uploader [ ' uploader ' ] = (
self . _extract_author_var ( ' ' , ' name ' , renderers = renderers )
or self . _extract_author_var ( ' ' , ' name ' , metadata = metadata ) )
uploader [ ' uploader_url ' ] = self . _yt_urljoin (
self . _extract_author_var ( ' ' , ' url ' , metadata = metadata , renderers = renderers ) )
uploader [ ' uploader_id ' ] = self . _extract_uploader_id ( uploader [ ' uploader_url ' ] )
uploader [ ' channel ' ] = uploader [ ' uploader ' ]
2020-11-18 05:32:42 +09:00
return uploader
2023-04-24 06:58:35 +09:00
@classmethod
def _extract_alert ( cls , data ) :
2020-12-08 03:12:00 +09:00
alerts = [ ]
2023-04-24 06:58:35 +09:00
for alert in traverse_obj ( data , ( ' alerts ' , Ellipsis ) , expected_type = dict ) :
alert_text = traverse_obj (
alert , ( None , lambda x : x [ ' alertRenderer ' ] [ ' text ' ] ) , get_all = False )
2020-12-08 03:12:00 +09:00
if not alert_text :
continue
2023-04-24 06:58:35 +09:00
text = cls . _get_text ( alert_text , ' text ' )
2020-12-08 03:12:00 +09:00
if text :
alerts . append ( text )
return ' \n ' . join ( alerts )
2021-04-09 04:09:52 +09:00
def _extract_from_tabs ( self , item_id , webpage , data , tabs ) :
2020-11-12 08:16:37 +09:00
selected_tab = self . _extract_selected_tab ( tabs )
2023-04-24 06:58:35 +09:00
renderer = traverse_obj ( data , ( ' metadata ' , ' channelMetadataRenderer ' ) ,
expected_type = dict ) or { }
2021-04-01 19:50:30 +09:00
playlist_id = item_id
title = description = None
2020-11-17 09:01:41 +09:00
if renderer :
2023-04-24 06:58:35 +09:00
channel_title = txt_or_none ( renderer . get ( ' title ' ) ) or item_id
tab_title = txt_or_none ( selected_tab . get ( ' title ' ) )
title = join_nonempty (
channel_title or item_id , tab_title ,
txt_or_none ( selected_tab . get ( ' expandedText ' ) ) ,
delim = ' - ' )
description = txt_or_none ( renderer . get ( ' description ' ) )
playlist_id = txt_or_none ( renderer . get ( ' externalId ' ) ) or playlist_id
2021-04-01 19:50:30 +09:00
else :
2023-04-24 06:58:35 +09:00
renderer = traverse_obj ( data ,
( ' metadata ' , ' playlistMetadataRenderer ' ) ,
( ' header ' , ' hashtagHeaderRenderer ' ) ,
expected_type = dict ) or { }
title = traverse_obj ( renderer , ' title ' , ( ' hashtag ' , ' simpleText ' ) ,
expected_type = txt_or_none )
2020-11-18 05:32:42 +09:00
playlist = self . playlist_result (
2021-04-09 04:09:52 +09:00
self . _entries ( selected_tab , item_id , webpage ) ,
2020-11-17 09:01:41 +09:00
playlist_id = playlist_id , playlist_title = title ,
playlist_description = description )
2023-02-24 11:48:37 +09:00
return merge_dicts ( playlist , self . _extract_uploader ( renderer , data ) )
2020-11-18 05:32:42 +09:00
2020-12-09 05:40:02 +09:00
def _extract_from_playlist ( self , item_id , url , data , playlist ) :
2023-04-24 06:58:35 +09:00
title = traverse_obj ( ( playlist , data ) ,
( 0 , ' title ' ) , ( 1 , ' titleText ' , ' simpleText ' ) ,
expected_type = txt_or_none )
playlist_id = txt_or_none ( playlist . get ( ' playlistId ' ) ) or item_id
2020-12-09 05:40:02 +09:00
# Inline playlist rendition continuation does not always work
# at Youtube side, so delegating regular tab-based playlist URL
# processing whenever possible.
2023-04-24 06:58:35 +09:00
playlist_url = urljoin ( url , traverse_obj (
playlist , ( ' endpoint ' , ' commandMetadata ' , ' webCommandMetadata ' , ' url ' ) ,
expected_type = url_or_none ) )
2020-12-09 05:40:02 +09:00
if playlist_url and playlist_url != url :
return self . url_result (
playlist_url , ie = YoutubeTabIE . ie_key ( ) , video_id = playlist_id ,
video_title = title )
2020-11-18 05:32:42 +09:00
return self . playlist_result (
self . _playlist_entries ( playlist ) , playlist_id = playlist_id ,
playlist_title = title )
2021-04-09 04:09:52 +09:00
def _extract_identity_token ( self , ytcfg , webpage ) :
2020-12-09 06:17:47 +09:00
if ytcfg :
token = try_get ( ytcfg , lambda x : x [ ' ID_TOKEN ' ] , compat_str )
if token :
return token
return self . _search_regex (
r ' \ bID_TOKEN[ " \' ] \ s*: \ s*[ " \' ](.+?)[ " \' ] ' , webpage ,
' identity token ' , default = None )
2020-11-18 05:32:42 +09:00
def _real_extract ( self , url ) :
item_id = self . _match_id ( url )
2022-01-31 13:28:54 +09:00
url = update_url ( url , netloc = ' www.youtube.com ' )
2020-11-18 05:32:42 +09:00
# Handle both video/playlist URLs
2021-04-17 02:07:32 +09:00
qs = parse_qs ( url )
2020-11-18 05:32:42 +09:00
video_id = qs . get ( ' v ' , [ None ] ) [ 0 ]
playlist_id = qs . get ( ' list ' , [ None ] ) [ 0 ]
if video_id and playlist_id :
if self . _downloader . params . get ( ' noplaylist ' ) :
self . to_screen ( ' Downloading just video %s because of --no-playlist ' % video_id )
return self . url_result ( video_id , ie = YoutubeIE . ie_key ( ) , video_id = video_id )
self . to_screen ( ' Downloading playlist %s - add --no-playlist to just download video %s ' % ( playlist_id , video_id ) )
webpage = self . _download_webpage ( url , item_id )
data = self . _extract_yt_initial_data ( item_id , webpage )
tabs = try_get (
data , lambda x : x [ ' contents ' ] [ ' twoColumnBrowseResultsRenderer ' ] [ ' tabs ' ] , list )
if tabs :
2021-04-09 04:09:52 +09:00
return self . _extract_from_tabs ( item_id , webpage , data , tabs )
2020-11-18 05:32:42 +09:00
playlist = try_get (
data , lambda x : x [ ' contents ' ] [ ' twoColumnWatchNextResults ' ] [ ' playlist ' ] [ ' playlist ' ] , dict )
if playlist :
2020-12-09 05:40:02 +09:00
return self . _extract_from_playlist ( item_id , url , data , playlist )
2020-11-21 22:48:13 +09:00
# Fallback to video extraction if no playlist alike page is recognized.
# First check for the current video then try the v attribute of URL query.
video_id = try_get (
data , lambda x : x [ ' currentVideoEndpoint ' ] [ ' watchEndpoint ' ] [ ' videoId ' ] ,
compat_str ) or video_id
2020-11-18 05:32:42 +09:00
if video_id :
return self . url_result ( video_id , ie = YoutubeIE . ie_key ( ) , video_id = video_id )
2020-12-08 03:12:00 +09:00
# Capture and output alerts
alert = self . _extract_alert ( data )
if alert :
raise ExtractorError ( alert , expected = True )
2020-11-18 05:32:42 +09:00
# Failed to recognize
raise ExtractorError ( ' Unable to recognize tab page ' )
2020-11-12 08:16:37 +09:00
class YoutubePlaylistIE ( InfoExtractor ) :
IE_DESC = ' YouTube.com playlists '
2020-11-18 05:32:42 +09:00
_VALID_URL = r ''' (?x)(?:
2020-11-12 08:16:37 +09:00
( ? : https ? : / / ) ?
( ? : \w + \. ) ?
( ? :
( ? :
youtube ( ? : kids ) ? \. com |
2020-12-07 03:48:10 +09:00
invidio \. us
2020-11-12 08:16:37 +09:00
)
2020-11-18 05:32:42 +09:00
/ . * ? \? . * ? \blist =
) ?
( ? P < id > % ( playlist_id ) s )
) ''' % { ' playlist_id ' : YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
2020-11-12 08:16:37 +09:00
IE_NAME = ' youtube:playlist '
_TESTS = [ {
' note ' : ' issue #673 ' ,
' url ' : ' PLBB231211A4F62143 ' ,
' info_dict ' : {
' title ' : ' [OLD]Team Fortress 2 (Class-based LP) ' ,
' id ' : ' PLBB231211A4F62143 ' ,
2023-02-24 11:48:37 +09:00
' uploader ' : ' Wickman ' ,
' uploader_id ' : ' @WickmanVT ' ,
' channel_id ' : ' UCKSpbfbl5kRQpTdL7kMc-1Q ' ,
2020-11-12 08:16:37 +09:00
} ,
' playlist_mincount ' : 29 ,
2014-09-13 14:31:48 +09:00
} , {
' url ' : ' PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl ' ,
' info_dict ' : {
' title ' : ' YDL_safe_search ' ,
2015-02-01 23:33:32 +09:00
' id ' : ' PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl ' ,
2014-09-13 14:31:48 +09:00
} ,
' playlist_count ' : 2 ,
2017-01-23 04:49:56 +09:00
' skip ' : ' This playlist is private ' ,
2014-09-24 17:34:29 +09:00
} , {
' note ' : ' embedded ' ,
2016-09-17 23:48:20 +09:00
' url ' : ' https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu ' ,
2023-02-24 11:48:37 +09:00
# TODO: full playlist requires _reload_with_unavailable_videos()
# 'playlist_count': 4,
' playlist_mincount ' : 1 ,
2014-09-24 17:34:29 +09:00
' info_dict ' : {
' title ' : ' JODA15 ' ,
2015-02-01 23:33:32 +09:00
' id ' : ' PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu ' ,
2019-07-21 02:01:46 +09:00
' uploader ' : ' milan ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @milan5503 ' ,
' channel_id ' : ' UCEI1-PVPcYXjB73Hfelbmaw ' ,
2014-09-24 17:34:29 +09:00
}
2017-03-01 01:06:47 +09:00
} , {
' url ' : ' http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl ' ,
2023-02-24 11:48:37 +09:00
' playlist_mincount ' : 455 ,
2017-03-01 01:06:47 +09:00
' info_dict ' : {
2019-07-21 02:01:46 +09:00
' title ' : ' 2018 Chinese New Singles (11/6 updated) ' ,
2017-03-01 01:06:47 +09:00
' id ' : ' PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl ' ,
2019-07-21 02:01:46 +09:00
' uploader ' : ' LBK ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @music_king ' ,
' channel_id ' : ' UC21nz3_MesPLqtDqwdvnoxA ' ,
2017-03-01 01:06:47 +09:00
}
2016-10-19 05:27:18 +09:00
} , {
2020-12-07 03:48:10 +09:00
' url ' : ' TLGGrESM50VT6acwMjAyMjAxNw ' ,
' only_matching ' : True ,
} , {
# music album playlist
' url ' : ' OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM ' ,
' only_matching ' : True ,
} ]
@classmethod
def suitable ( cls , url ) :
2021-04-17 02:07:32 +09:00
if YoutubeTabIE . suitable ( url ) :
return False
2023-02-07 01:19:21 +09:00
if parse_qs ( url ) . get ( ' v ' , [ None ] ) [ 0 ] :
2021-04-17 02:07:32 +09:00
return False
return super ( YoutubePlaylistIE , cls ) . suitable ( url )
2020-12-07 03:48:10 +09:00
def _real_extract ( self , url ) :
playlist_id = self . _match_id ( url )
2021-04-17 02:07:32 +09:00
qs = parse_qs ( url )
2020-12-07 03:48:10 +09:00
if not qs :
qs = { ' list ' : playlist_id }
return self . url_result (
update_url_query ( ' https://www.youtube.com/playlist ' , qs ) ,
ie = YoutubeTabIE . ie_key ( ) , video_id = playlist_id )
class YoutubeYtBeIE ( InfoExtractor ) :
_VALID_URL = r ' https?://youtu \ .be/(?P<id>[0-9A-Za-z_-] {11} )/*?.*? \ blist=(?P<playlist_id> %(playlist_id)s ) ' % { ' playlist_id ' : YoutubeBaseInfoExtractor . _PLAYLIST_ID_RE }
_TESTS = [ {
2016-10-19 05:27:18 +09:00
' url ' : ' https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5 ' ,
' info_dict ' : {
' id ' : ' yeWKywCrFtk ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Small Scale Baler and Braiding Rugs ' ,
' uploader ' : ' Backus-Page House Museum ' ,
2023-02-24 11:48:37 +09:00
' uploader_id ' : ' @backuspagemuseum ' ,
' uploader_url ' : r ' re:https?://(?:www \ .)?youtube \ .com/@backuspagemuseum ' ,
2016-10-19 05:27:18 +09:00
' upload_date ' : ' 20161008 ' ,
' description ' : ' md5:800c0c78d5eb128500bffd4f0b4f2e8a ' ,
' categories ' : [ ' Nonprofits & Activism ' ] ,
' tags ' : list ,
' like_count ' : int ,
} ,
' params ' : {
' noplaylist ' : True ,
' skip_download ' : True ,
} ,
2016-09-04 22:12:34 +09:00
} , {
' url ' : ' https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21 ' ,
' only_matching ' : True ,
2014-09-13 14:19:20 +09:00
} ]
2013-06-24 02:58:33 +09:00
2016-02-19 01:03:46 +09:00
def _real_extract ( self , url ) :
2020-12-07 03:48:10 +09:00
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' id ' )
playlist_id = mobj . group ( ' playlist_id ' )
2020-11-12 08:16:37 +09:00
return self . url_result (
2020-12-07 03:48:10 +09:00
update_url_query ( ' https://www.youtube.com/watch ' , {
' v ' : video_id ,
' list ' : playlist_id ,
' feature ' : ' youtu.be ' ,
} ) , ie = YoutubeTabIE . ie_key ( ) , video_id = playlist_id )
2016-02-19 01:03:46 +09:00
2013-06-24 02:58:33 +09:00
2020-11-12 08:16:37 +09:00
class YoutubeYtUserIE ( InfoExtractor ) :
_VALID_URL = r ' ytuser:(?P<id>.+) '
2014-09-24 17:25:47 +09:00
_TESTS = [ {
' url ' : ' ytuser:phihag ' ,
' only_matching ' : True ,
} ]
2020-11-12 08:16:37 +09:00
def _real_extract ( self , url ) :
user_id = self . _match_id ( url )
return self . url_result (
' https://www.youtube.com/user/ %s ' % user_id ,
ie = YoutubeTabIE . ie_key ( ) , video_id = user_id )
2016-07-09 10:37:02 +09:00
2013-06-24 03:28:15 +09:00
2020-11-24 00:33:21 +09:00
class YoutubeFavouritesIE ( YoutubeBaseInfoExtractor ) :
IE_NAME = ' youtube:favorites '
IE_DESC = ' YouTube.com favourite videos, " :ytfav " for short (requires authentication) '
_VALID_URL = r ' https?://(?:www \ .)?youtube \ .com/my_favorites|:ytfav(?:ou?rites)? '
_LOGIN_REQUIRED = True
_TESTS = [ {
' url ' : ' :ytfav ' ,
' only_matching ' : True ,
} , {
' url ' : ' :ytfavorites ' ,
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
return self . url_result (
' https://www.youtube.com/playlist?list=LL ' ,
ie = YoutubeTabIE . ie_key ( ) )
2020-11-12 08:16:37 +09:00
class YoutubeSearchIE ( SearchInfoExtractor , YoutubeBaseInfoExtractor ) :
2014-09-13 14:51:06 +09:00
IE_DESC = ' YouTube.com searches '
IE_NAME = ' youtube:search '
2013-06-24 03:28:15 +09:00
_SEARCH_KEY = ' ytsearch '
2022-01-30 04:11:47 +09:00
_SEARCH_PARAMS = ' EgIQAQ % 3D % 3D ' # Videos only
2022-01-30 04:24:22 +09:00
_MAX_RESULTS = float ( ' inf ' )
2022-02-03 11:14:37 +09:00
_TESTS = [ {
' url ' : ' ytsearch10:youtube-dl test video ' ,
' playlist_count ' : 10 ,
' info_dict ' : {
' id ' : ' youtube-dl test video ' ,
' title ' : ' youtube-dl test video ' ,
}
} ]
2013-06-24 03:28:15 +09:00
2020-10-23 23:31:37 +09:00
def _get_n_results ( self , query , n ) :
""" Get a specified number of results for a query """
2022-01-30 04:19:55 +09:00
entries = itertools . islice ( self . _search_results ( query , self . _SEARCH_PARAMS ) , 0 , None if n == float ( ' inf ' ) else n )
return self . playlist_result ( entries , query , query )
2013-07-02 00:59:28 +09:00
2014-03-04 11:32:28 +09:00
2013-11-03 11:40:48 +09:00
class YoutubeSearchDateIE ( YoutubeSearchIE ) :
2013-12-03 21:55:25 +09:00
IE_NAME = YoutubeSearchIE . IE_NAME + ' :date '
2013-11-03 11:40:48 +09:00
_SEARCH_KEY = ' ytsearchdate '
2014-09-13 14:51:06 +09:00
IE_DESC = ' YouTube.com searches, newest videos first '
2022-01-30 04:11:47 +09:00
_SEARCH_PARAMS = ' CAISAhAB ' # Videos only, sorted by date
2022-02-03 11:14:37 +09:00
_TESTS = [ {
' url ' : ' ytsearchdate10:youtube-dl test video ' ,
' playlist_count ' : 10 ,
' info_dict ' : {
' id ' : ' youtube-dl test video ' ,
' title ' : ' youtube-dl test video ' ,
}
} ]
2013-07-02 00:59:28 +09:00
2014-03-04 11:32:28 +09:00
2022-01-30 04:11:47 +09:00
class YoutubeSearchURLIE ( YoutubeBaseInfoExtractor ) :
IE_DESC = ' YouTube search URLs with sorting and filter support '
IE_NAME = YoutubeSearchIE . IE_NAME + ' _url '
_VALID_URL = r ' https?://(?:www \ .)?youtube \ .com/results \ ?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$) '
2014-09-24 17:25:47 +09:00
_TESTS = [ {
' url ' : ' https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video ' ,
' playlist_mincount ' : 5 ,
' info_dict ' : {
2022-01-30 04:11:47 +09:00
' id ' : ' youtube-dl test video ' ,
2014-09-24 17:25:47 +09:00
' title ' : ' youtube-dl test video ' ,
2022-02-03 11:14:37 +09:00
} ,
' params ' : { ' playlistend ' : 5 }
2016-02-16 03:29:51 +09:00
} , {
' url ' : ' https://www.youtube.com/results?q=test&sp=EgQIBBgB ' ,
' only_matching ' : True ,
2014-09-24 17:25:47 +09:00
} ]
2014-03-04 11:32:28 +09:00
def _real_extract ( self , url ) :
2023-02-07 01:19:21 +09:00
qs = parse_qs ( url )
query = ( qs . get ( ' search_query ' ) or qs . get ( ' q ' ) ) [ - 1 ]
params = qs . get ( ' sp ' , ( ' ' , ) ) [ - 1 ]
2022-01-30 04:11:47 +09:00
return self . playlist_result ( self . _search_results ( query , params ) , query , query )
2013-07-07 20:58:23 +09:00
2020-11-24 02:10:25 +09:00
class YoutubeFeedsInfoExtractor ( YoutubeTabIE ) :
2013-07-21 02:33:40 +09:00
"""
2015-05-16 00:06:59 +09:00
Base class for feed extractors
2020-11-24 02:10:25 +09:00
Subclasses must define the _FEED_NAME property .
2013-07-21 02:33:40 +09:00
"""
2013-07-25 03:40:12 +09:00
_LOGIN_REQUIRED = True
2013-07-21 02:33:40 +09:00
@property
def IE_NAME ( self ) :
2014-09-13 14:51:06 +09:00
return ' youtube: %s ' % self . _FEED_NAME
2013-07-07 20:58:23 +09:00
2013-07-08 18:23:05 +09:00
def _real_initialize ( self ) :
2013-07-25 03:40:12 +09:00
self . _login ( )
2013-07-08 18:23:05 +09:00
2018-04-22 08:07:32 +09:00
def _real_extract ( self , url ) :
2020-11-24 02:10:25 +09:00
return self . url_result (
2018-04-22 08:07:32 +09:00
' https://www.youtube.com/feed/ %s ' % self . _FEED_NAME ,
2020-11-24 02:10:25 +09:00
ie = YoutubeTabIE . ie_key ( ) )
2015-05-16 00:06:59 +09:00
2020-11-12 08:16:37 +09:00
class YoutubeWatchLaterIE ( InfoExtractor ) :
2015-05-16 00:06:59 +09:00
IE_NAME = ' youtube:watchlater '
IE_DESC = ' Youtube watch later list, " :ytwatchlater " for short (requires authentication) '
2020-11-24 02:10:25 +09:00
_VALID_URL = r ' :ytwatchlater '
2016-02-19 00:50:21 +09:00
_TESTS = [ {
2020-11-18 05:32:42 +09:00
' url ' : ' :ytwatchlater ' ,
2016-02-19 00:50:21 +09:00
' only_matching ' : True ,
} ]
2015-05-16 00:06:59 +09:00
def _real_extract ( self , url ) :
2020-11-12 08:16:37 +09:00
return self . url_result (
' https://www.youtube.com/playlist?list=WL ' , ie = YoutubeTabIE . ie_key ( ) )
2013-10-07 19:21:24 +09:00
2015-05-16 00:06:59 +09:00
class YoutubeRecommendedIE ( YoutubeFeedsInfoExtractor ) :
IE_DESC = ' YouTube.com recommended videos, " :ytrec " for short (requires authentication) '
2020-11-24 02:10:25 +09:00
_VALID_URL = r ' :ytrec(?:ommended)? '
2015-05-16 00:06:59 +09:00
_FEED_NAME = ' recommended '
2020-11-24 02:10:25 +09:00
_TESTS = [ {
' url ' : ' :ytrec ' ,
' only_matching ' : True ,
} , {
' url ' : ' :ytrecommended ' ,
' only_matching ' : True ,
} ]
2014-09-01 06:44:43 +09:00
2015-05-16 00:06:59 +09:00
class YoutubeSubscriptionsIE ( YoutubeFeedsInfoExtractor ) :
IE_DESC = ' YouTube.com subscriptions feed, " ytsubs " keyword (requires authentication) '
2020-11-24 02:10:25 +09:00
_VALID_URL = r ' :ytsubs(?:criptions)? '
2015-05-16 00:06:59 +09:00
_FEED_NAME = ' subscriptions '
2020-11-24 02:10:25 +09:00
_TESTS = [ {
' url ' : ' :ytsubs ' ,
' only_matching ' : True ,
} , {
' url ' : ' :ytsubscriptions ' ,
' only_matching ' : True ,
} ]
2014-09-01 06:44:43 +09:00
2015-05-16 00:06:59 +09:00
class YoutubeHistoryIE ( YoutubeFeedsInfoExtractor ) :
IE_DESC = ' Youtube watch history, " :ythistory " for short (requires authentication) '
2020-11-24 02:10:25 +09:00
_VALID_URL = r ' :ythistory '
2015-05-16 00:06:59 +09:00
_FEED_NAME = ' history '
2020-11-24 02:10:25 +09:00
_TESTS = [ {
' url ' : ' :ythistory ' ,
' only_matching ' : True ,
} ]
2014-09-01 06:44:43 +09:00
2013-10-07 19:21:24 +09:00
class YoutubeTruncatedURLIE ( InfoExtractor ) :
IE_NAME = ' youtube:truncated_url '
IE_DESC = False # Do not list
2014-01-24 00:14:54 +09:00
_VALID_URL = r ''' (?x)
2015-01-24 19:42:20 +09:00
( ? : https ? : / / ) ?
( ? : \w + \. ) ? [ yY ] [ oO ] [ uU ] [ tT ] [ uU ] [ bB ] [ eE ] ( ? : - nocookie ) ? \. com /
( ? : watch \? ( ? :
2014-07-01 22:48:18 +09:00
feature = [ a - z_ ] + |
2015-01-24 19:42:20 +09:00
annotation_id = annotation_ [ ^ & ] + |
x - yt - cl = [ 0 - 9 ] + |
2015-01-30 11:45:29 +09:00
hl = [ ^ & ] * |
2015-09-14 07:26:12 +09:00
t = [ 0 - 9 ] +
2015-01-24 19:42:20 +09:00
) ?
|
attribution_link \? a = [ ^ & ] +
)
$
2014-01-24 00:14:54 +09:00
'''
2013-10-07 19:21:24 +09:00
2014-07-01 22:48:18 +09:00
_TESTS = [ {
2016-09-17 23:48:20 +09:00
' url ' : ' https://www.youtube.com/watch?annotation_id=annotation_3951667041 ' ,
2014-07-01 22:48:18 +09:00
' only_matching ' : True ,
2014-07-01 22:49:34 +09:00
} , {
2016-09-17 23:48:20 +09:00
' url ' : ' https://www.youtube.com/watch? ' ,
2014-07-01 22:49:34 +09:00
' only_matching ' : True ,
2015-01-24 19:42:20 +09:00
} , {
' url ' : ' https://www.youtube.com/watch?x-yt-cl=84503534 ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.youtube.com/watch?feature=foo ' ,
' only_matching ' : True ,
2015-01-30 11:45:29 +09:00
} , {
' url ' : ' https://www.youtube.com/watch?hl=en-GB ' ,
' only_matching ' : True ,
2015-09-14 07:26:12 +09:00
} , {
' url ' : ' https://www.youtube.com/watch?t=2372 ' ,
' only_matching ' : True ,
2014-07-01 22:48:18 +09:00
} ]
2013-10-07 19:21:24 +09:00
def _real_extract ( self , url ) :
raise ExtractorError (
2014-09-13 14:51:06 +09:00
' Did you forget to quote the URL? Remember that & is a meta '
' character in most shells, so you want to put the URL in quotes, '
' like youtube-dl '
2016-09-17 23:48:20 +09:00
' " https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc " '
2014-09-13 14:51:06 +09:00
' or simply youtube-dl BaW_jenozKc . ' ,
2013-10-07 19:21:24 +09:00
expected = True )
2015-01-02 07:44:39 +09:00
class YoutubeTruncatedIDIE ( InfoExtractor ) :
IE_NAME = ' youtube:truncated_id '
IE_DESC = False # Do not list
2015-01-24 19:42:20 +09:00
_VALID_URL = r ' https?://(?:www \ .)?youtube \ .com/watch \ ?v=(?P<id>[0-9A-Za-z_-] { 1,10})$ '
2015-01-02 07:44:39 +09:00
_TESTS = [ {
' url ' : ' https://www.youtube.com/watch?v=N_708QY7Ob ' ,
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
raise ExtractorError (
' Incomplete YouTube ID %s . URL %s looks truncated. ' % ( video_id , url ) ,
expected = True )