mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-07-21 10:54:13 +09:00
Compare commits
11 Commits
c0ae6e105e
...
03b354a729
Author | SHA1 | Date | |
---|---|---|---|
![]() |
03b354a729 | ||
![]() |
2b4fbfce25 | ||
![]() |
1bc45b8b6c | ||
![]() |
b982d77d0b | ||
![]() |
c55dbf4838 | ||
![]() |
087d865230 | ||
![]() |
a4fc1151f1 | ||
![]() |
a464c159e6 | ||
![]() |
7dca08eff0 | ||
![]() |
2239ee7965 | ||
![]() |
a631e79b33 |
@ -310,6 +310,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
--get-filename Simulate, quiet but print output
|
--get-filename Simulate, quiet but print output
|
||||||
filename
|
filename
|
||||||
--get-format Simulate, quiet but print output format
|
--get-format Simulate, quiet but print output format
|
||||||
|
-O, --print TEMPLATE Simulate, quiet but print the given fields.
|
||||||
|
Either a field name or similar formatting
|
||||||
|
as the output template can be used
|
||||||
-j, --dump-json Simulate, quiet but print JSON
|
-j, --dump-json Simulate, quiet but print JSON
|
||||||
information. See the "OUTPUT TEMPLATE"
|
information. See the "OUTPUT TEMPLATE"
|
||||||
for a description of available keys.
|
for a description of available keys.
|
||||||
@ -620,6 +623,12 @@ Available for the media that is a track or a part of a music album:
|
|||||||
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
|
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
|
||||||
- `release_year` (numeric): Year (YYYY) when the album was released
|
- `release_year` (numeric): Year (YYYY) when the album was released
|
||||||
|
|
||||||
|
Available only when used in `--print`:
|
||||||
|
|
||||||
|
- `urls` (string): The URLs of all requested formats, one in each line
|
||||||
|
- `duration_string` (string): Length of the video (HH:mm:ss)
|
||||||
|
- `filename` (string): Name of the video file. Note that the actual filename may be different due to post-processing. Use `--exec echo` to get the name after all postprocessing is complete
|
||||||
|
|
||||||
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
|
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
|
||||||
|
|
||||||
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||||
|
@ -84,6 +84,21 @@ _SIG_TESTS = [
|
|||||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
'0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q',
|
'0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
|
||||||
|
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
|
'AAOAOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7vgpDL0QwbdV06sCIEzpWqMGkFR20CFOS21Tp-7vj_EMu-m37KtXJoOy1',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
|
||||||
|
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
|
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
|
||||||
|
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
|
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
_NSIG_TESTS = [
|
_NSIG_TESTS = [
|
||||||
@ -153,7 +168,7 @@ _NSIG_TESTS = [
|
|||||||
),
|
),
|
||||||
(
|
(
|
||||||
'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
|
||||||
'-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg',
|
'M92UUMHa8PdvPd3wyM', '3hPqLJsiNZx7yA',
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
|
||||||
@ -173,7 +188,7 @@ _NSIG_TESTS = [
|
|||||||
),
|
),
|
||||||
(
|
(
|
||||||
'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
|
||||||
'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
|
'aCi3iElgd2kq0bxVbQ', 'QX1y8jGb2IbZ0w',
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
|
||||||
@ -231,10 +246,6 @@ _NSIG_TESTS = [
|
|||||||
'https://www.youtube.com/s/player/f6e09c70/player_ias_tce.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/f6e09c70/player_ias_tce.vflset/en_US/base.js',
|
||||||
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
|
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
|
||||||
),
|
),
|
||||||
(
|
|
||||||
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
|
|
||||||
'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
|
|
||||||
),
|
|
||||||
(
|
(
|
||||||
'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
|
||||||
'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
|
'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
|
||||||
@ -259,6 +270,22 @@ _NSIG_TESTS = [
|
|||||||
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
|
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
|
||||||
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
|
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
|
||||||
|
'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
|
||||||
|
'eWYu5d5YeY_4LyEDc', 'XJQqf-N7Xra3gg',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
|
||||||
|
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/4fcd6e4a/tv-player-ias.vflset/tv-player-ias.js',
|
||||||
|
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -271,6 +298,8 @@ class TestPlayerInfo(unittest.TestCase):
|
|||||||
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
|
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
|
||||||
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'),
|
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'),
|
||||||
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'),
|
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'),
|
||||||
|
('https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js', 'e7567ecf'),
|
||||||
|
('https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js', '643afba4'),
|
||||||
# obsolete
|
# obsolete
|
||||||
('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
|
('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
|
||||||
('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
|
('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
|
||||||
@ -280,8 +309,9 @@ class TestPlayerInfo(unittest.TestCase):
|
|||||||
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
|
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
|
||||||
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
|
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
|
||||||
)
|
)
|
||||||
|
ie = YoutubeIE(FakeYDL({'cachedir': False}))
|
||||||
for player_url, expected_player_id in PLAYER_URLS:
|
for player_url, expected_player_id in PLAYER_URLS:
|
||||||
player_id = YoutubeIE._extract_player_info(player_url)
|
player_id = ie._extract_player_info(player_url)
|
||||||
self.assertEqual(player_id, expected_player_id)
|
self.assertEqual(player_id, expected_player_id)
|
||||||
|
|
||||||
|
|
||||||
@ -301,8 +331,8 @@ class TestSignature(unittest.TestCase):
|
|||||||
def t_factory(name, sig_func, url_pattern):
|
def t_factory(name, sig_func, url_pattern):
|
||||||
def make_tfunc(url, sig_input, expected_sig):
|
def make_tfunc(url, sig_input, expected_sig):
|
||||||
m = url_pattern.match(url)
|
m = url_pattern.match(url)
|
||||||
assert m, '%r should follow URL format' % url
|
assert m, '{0!r} should follow URL format'.format(url)
|
||||||
test_id = m.group('id')
|
test_id = re.sub(r'[/.-]', '_', m.group('id') or m.group('compat_id'))
|
||||||
|
|
||||||
def test_func(self):
|
def test_func(self):
|
||||||
basename = 'player-{0}-{1}.js'.format(name, test_id)
|
basename = 'player-{0}-{1}.js'.format(name, test_id)
|
||||||
@ -335,12 +365,16 @@ def n_sig(jscode, sig_input):
|
|||||||
|
|
||||||
|
|
||||||
make_sig_test = t_factory(
|
make_sig_test = t_factory(
|
||||||
'signature', signature, re.compile(r'.*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$'))
|
'signature', signature,
|
||||||
|
re.compile(r'''(?x)
|
||||||
|
.+/(?P<h5>html5)?player(?(h5)(?:-en_US)?-|/)(?P<id>[a-zA-Z0-9/._-]+)
|
||||||
|
(?(h5)/(?:watch_as3|html5player))?\.js$
|
||||||
|
'''))
|
||||||
for test_spec in _SIG_TESTS:
|
for test_spec in _SIG_TESTS:
|
||||||
make_sig_test(*test_spec)
|
make_sig_test(*test_spec)
|
||||||
|
|
||||||
make_nsig_test = t_factory(
|
make_nsig_test = t_factory(
|
||||||
'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_-]+)/.+.js$'))
|
'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_/.-]+)\.js$'))
|
||||||
for test_spec in _NSIG_TESTS:
|
for test_spec in _NSIG_TESTS:
|
||||||
make_nsig_test(*test_spec)
|
make_nsig_test(*test_spec)
|
||||||
|
|
||||||
|
@ -1912,25 +1912,49 @@ class YoutubeDL(object):
|
|||||||
return subs
|
return subs
|
||||||
|
|
||||||
def __forced_printings(self, info_dict, filename, incomplete):
|
def __forced_printings(self, info_dict, filename, incomplete):
|
||||||
|
FIELD_ALIASES = {}
|
||||||
|
|
||||||
def print_mandatory(field):
|
def print_mandatory(field):
|
||||||
|
actual_field = FIELD_ALIASES.get(field, field)
|
||||||
if (self.params.get('force%s' % field, False)
|
if (self.params.get('force%s' % field, False)
|
||||||
and (not incomplete or info_dict.get(field) is not None)):
|
and (not incomplete or info_dict.get(actual_field) is not None)):
|
||||||
self.to_stdout(info_dict[field])
|
self.to_stdout(info_dict[actual_field])
|
||||||
|
|
||||||
def print_optional(field):
|
def print_optional(field):
|
||||||
if (self.params.get('force%s' % field, False)
|
if (self.params.get('force%s' % field, False)
|
||||||
and info_dict.get(field) is not None):
|
and info_dict.get(field) is not None):
|
||||||
self.to_stdout(info_dict[field])
|
self.to_stdout(info_dict[field])
|
||||||
|
|
||||||
|
info_dict = info_dict.copy()
|
||||||
|
info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
|
||||||
|
formatSeconds(info_dict['duration'])
|
||||||
|
if info_dict.get('duration', None) is not None
|
||||||
|
else None)
|
||||||
|
if info_dict.get('resolution') is None:
|
||||||
|
info_dict['resolution'] = self.format_resolution(info_dict, default=None)
|
||||||
|
if filename is not None:
|
||||||
|
info_dict['filename'] = filename
|
||||||
|
if info_dict.get('requested_formats') is not None:
|
||||||
|
# For RTMP URLs, also include the playpath
|
||||||
|
info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
|
||||||
|
elif 'url' in info_dict:
|
||||||
|
info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
|
||||||
|
if 'urls' in info_dict:
|
||||||
|
FIELD_ALIASES['url'] = 'urls'
|
||||||
|
|
||||||
|
for tmpl in self.params.get('forceprint', []):
|
||||||
|
if re.match(r'\w+$', tmpl):
|
||||||
|
tmpl = '%({0})s'.format(tmpl)
|
||||||
|
try:
|
||||||
|
out_txt = tmpl % info_dict
|
||||||
|
except KeyError:
|
||||||
|
self.report_warning('Skipping invalid print string "%s"' % (tmpl, ))
|
||||||
|
continue
|
||||||
|
self.to_stdout(out_txt)
|
||||||
|
|
||||||
print_mandatory('title')
|
print_mandatory('title')
|
||||||
print_mandatory('id')
|
print_mandatory('id')
|
||||||
if self.params.get('forceurl', False) and not incomplete:
|
print_mandatory('url')
|
||||||
if info_dict.get('requested_formats') is not None:
|
|
||||||
for f in info_dict['requested_formats']:
|
|
||||||
self.to_stdout(f['url'] + f.get('play_path', ''))
|
|
||||||
else:
|
|
||||||
# For RTMP URLs, also include the playpath
|
|
||||||
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
|
|
||||||
print_optional('thumbnail')
|
print_optional('thumbnail')
|
||||||
print_optional('description')
|
print_optional('description')
|
||||||
if self.params.get('forcefilename', False) and filename is not None:
|
if self.params.get('forcefilename', False) and filename is not None:
|
||||||
@ -1938,6 +1962,7 @@ class YoutubeDL(object):
|
|||||||
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
|
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
|
||||||
self.to_stdout(formatSeconds(info_dict['duration']))
|
self.to_stdout(formatSeconds(info_dict['duration']))
|
||||||
print_mandatory('format')
|
print_mandatory('format')
|
||||||
|
|
||||||
if self.params.get('forcejson', False):
|
if self.params.get('forcejson', False):
|
||||||
self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
|
self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
|
||||||
|
|
||||||
|
@ -245,7 +245,7 @@ def _real_main(argv=None):
|
|||||||
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
||||||
' template'.format(outtmpl))
|
' template'.format(outtmpl))
|
||||||
|
|
||||||
any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
any_getting = opts.print_ or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
||||||
any_printing = opts.print_json
|
any_printing = opts.print_json
|
||||||
download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
|
download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
|
||||||
|
|
||||||
@ -335,6 +335,7 @@ def _real_main(argv=None):
|
|||||||
'forceduration': opts.getduration,
|
'forceduration': opts.getduration,
|
||||||
'forcefilename': opts.getfilename,
|
'forcefilename': opts.getfilename,
|
||||||
'forceformat': opts.getformat,
|
'forceformat': opts.getformat,
|
||||||
|
'forceprint': opts.print_,
|
||||||
'forcejson': opts.dumpjson or opts.print_json,
|
'forcejson': opts.dumpjson or opts.print_json,
|
||||||
'dump_single_json': opts.dump_single_json,
|
'dump_single_json': opts.dump_single_json,
|
||||||
'simulate': opts.simulate or any_getting,
|
'simulate': opts.simulate or any_getting,
|
||||||
|
@ -692,9 +692,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'invidious': '|'.join(_INVIDIOUS_SITES),
|
'invidious': '|'.join(_INVIDIOUS_SITES),
|
||||||
}
|
}
|
||||||
_PLAYER_INFO_RE = (
|
_PLAYER_INFO_RE = (
|
||||||
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})//(?:tv-)?player',
|
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/(?:tv-)?player',
|
||||||
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
|
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias(?:_tce)?\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
|
||||||
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
|
r'\b(?P<id>vfl[a-zA-Z0-9_-]{6,})\b.*?\.js$',
|
||||||
)
|
)
|
||||||
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
|
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
|
||||||
|
|
||||||
@ -1626,15 +1626,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
""" Return a string representation of a signature """
|
""" Return a string representation of a signature """
|
||||||
return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
|
return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
|
||||||
|
|
||||||
@classmethod
|
def _extract_player_info(self, player_url):
|
||||||
def _extract_player_info(cls, player_url):
|
try:
|
||||||
for player_re in cls._PLAYER_INFO_RE:
|
return self._search_regex(
|
||||||
id_m = re.search(player_re, player_url)
|
self._PLAYER_INFO_RE, player_url, 'player info', group='id')
|
||||||
if id_m:
|
except ExtractorError as e:
|
||||||
break
|
raise ExtractorError(
|
||||||
else:
|
'Cannot identify player %r' % (player_url,), cause=e)
|
||||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
|
||||||
return id_m.group('id')
|
|
||||||
|
|
||||||
def _load_player(self, video_id, player_url, fatal=True, player_id=None):
|
def _load_player(self, video_id, player_url, fatal=True, player_id=None):
|
||||||
if not player_id:
|
if not player_id:
|
||||||
@ -1711,6 +1709,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
' return %s\n') % (signature_id_tuple, expr_code)
|
' return %s\n') % (signature_id_tuple, expr_code)
|
||||||
self.to_screen('Extracted signature function:\n' + code)
|
self.to_screen('Extracted signature function:\n' + code)
|
||||||
|
|
||||||
|
def _extract_sig_fn(self, jsi, funcname):
|
||||||
|
var_ay = self._search_regex(
|
||||||
|
r'''(?x)
|
||||||
|
(?:\*/|\{|\n|^)\s*(?:'[^']+'\s*;\s*)
|
||||||
|
(var\s*[\w$]+\s*=\s*(?:
|
||||||
|
('|")(?:\\\2|(?!\2).)+\2\s*\.\s*split\(\s*('|")\W+\3\s*\)|
|
||||||
|
\[\s*(?:('|")(?:\\\4|(?!\4).)*\4\s*(?:(?=\])|,\s*))+\]
|
||||||
|
))(?=\s*[,;])
|
||||||
|
''', jsi.code, 'useful values', default='')
|
||||||
|
|
||||||
|
sig_fn = jsi.extract_function_code(funcname)
|
||||||
|
|
||||||
|
if var_ay:
|
||||||
|
sig_fn = (sig_fn[0], ';\n'.join((var_ay, sig_fn[1])))
|
||||||
|
|
||||||
|
return sig_fn
|
||||||
|
|
||||||
def _parse_sig_js(self, jscode):
|
def _parse_sig_js(self, jscode):
|
||||||
# Examples where `sig` is funcname:
|
# Examples where `sig` is funcname:
|
||||||
# sig=function(a){a=a.split(""); ... ;return a.join("")};
|
# sig=function(a){a=a.split(""); ... ;return a.join("")};
|
||||||
@ -1736,8 +1751,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
jscode, 'Initial JS player signature function name', group='sig')
|
jscode, 'Initial JS player signature function name', group='sig')
|
||||||
|
|
||||||
jsi = JSInterpreter(jscode)
|
jsi = JSInterpreter(jscode)
|
||||||
initial_function = jsi.extract_function(funcname)
|
|
||||||
return lambda s: initial_function([s])
|
initial_function = self._extract_sig_fn(jsi, funcname)
|
||||||
|
|
||||||
|
func = jsi.extract_function_from_code(*initial_function)
|
||||||
|
|
||||||
|
return lambda s: func([s])
|
||||||
|
|
||||||
def _cached(self, func, *cache_id):
|
def _cached(self, func, *cache_id):
|
||||||
def inner(*args, **kwargs):
|
def inner(*args, **kwargs):
|
||||||
@ -1856,15 +1875,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):
|
def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):
|
||||||
|
|
||||||
var_ay = self._search_regex(
|
|
||||||
r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"(?:\\"|[^"])+"\s*\.\s*split\("\W+"\))(?=\s*[,;])',
|
|
||||||
jsi.code, 'useful values', default='')
|
|
||||||
|
|
||||||
func_name = self._extract_n_function_name(jsi.code)
|
func_name = self._extract_n_function_name(jsi.code)
|
||||||
|
|
||||||
func_code = jsi.extract_function_code(func_name)
|
func_code = self._extract_sig_fn(jsi, func_name)
|
||||||
if var_ay:
|
|
||||||
func_code = (func_code[0], ';\n'.join((var_ay, func_code[1])))
|
|
||||||
|
|
||||||
if player_id:
|
if player_id:
|
||||||
self.cache.store('youtube-nsig', player_id, func_code)
|
self.cache.store('youtube-nsig', player_id, func_code)
|
||||||
@ -2136,7 +2149,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
video_details = merge_dicts(*traverse_obj(
|
video_details = merge_dicts(*traverse_obj(
|
||||||
(player_response, api_player_response),
|
(player_response, api_player_response),
|
||||||
(Ellipsis, 'videoDetails', T(dict))))
|
(Ellipsis, 'videoDetails', T(dict))))
|
||||||
player_response.update(api_player_response or {})
|
player_response.update(filter_dict(
|
||||||
|
api_player_response or {}, cndn=lambda k, _: k != 'captions'))
|
||||||
player_response['videoDetails'] = video_details
|
player_response['videoDetails'] = video_details
|
||||||
|
|
||||||
def is_agegated(playability):
|
def is_agegated(playability):
|
||||||
@ -2566,8 +2580,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
pctr = traverse_obj(
|
pctr = traverse_obj(
|
||||||
player_response,
|
(player_response, api_player_response),
|
||||||
('captions', 'playerCaptionsTracklistRenderer', T(dict)))
|
(Ellipsis, 'captions', 'playerCaptionsTracklistRenderer', T(dict)))
|
||||||
if pctr:
|
if pctr:
|
||||||
def process_language(container, base_url, lang_code, query):
|
def process_language(container, base_url, lang_code, query):
|
||||||
lang_subs = []
|
lang_subs = []
|
||||||
@ -2584,20 +2598,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
def process_subtitles():
|
def process_subtitles():
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for caption_track in traverse_obj(pctr, (
|
for caption_track in traverse_obj(pctr, (
|
||||||
'captionTracks', lambda _, v: v.get('baseUrl'))):
|
Ellipsis, 'captionTracks', lambda _, v: (
|
||||||
|
v.get('baseUrl') and v.get('languageCode')))):
|
||||||
base_url = self._yt_urljoin(caption_track['baseUrl'])
|
base_url = self._yt_urljoin(caption_track['baseUrl'])
|
||||||
if not base_url:
|
if not base_url:
|
||||||
continue
|
continue
|
||||||
|
lang_code = caption_track['languageCode']
|
||||||
if caption_track.get('kind') != 'asr':
|
if caption_track.get('kind') != 'asr':
|
||||||
lang_code = caption_track.get('languageCode')
|
|
||||||
if not lang_code:
|
|
||||||
continue
|
|
||||||
process_language(
|
process_language(
|
||||||
subtitles, base_url, lang_code, {})
|
subtitles, base_url, lang_code, {})
|
||||||
continue
|
continue
|
||||||
automatic_captions = {}
|
automatic_captions = {}
|
||||||
|
process_language(
|
||||||
|
automatic_captions, base_url, lang_code, {})
|
||||||
for translation_language in traverse_obj(pctr, (
|
for translation_language in traverse_obj(pctr, (
|
||||||
'translationLanguages', lambda _, v: v.get('languageCode'))):
|
Ellipsis, 'translationLanguages', lambda _, v: v.get('languageCode'))):
|
||||||
translation_language_code = translation_language['languageCode']
|
translation_language_code = translation_language['languageCode']
|
||||||
process_language(
|
process_language(
|
||||||
automatic_captions, base_url, translation_language_code,
|
automatic_captions, base_url, translation_language_code,
|
||||||
|
@ -678,7 +678,7 @@ class JSInterpreter(object):
|
|||||||
return len(obj)
|
return len(obj)
|
||||||
try:
|
try:
|
||||||
return obj[int(idx)] if isinstance(obj, list) else obj[compat_str(idx)]
|
return obj[int(idx)] if isinstance(obj, list) else obj[compat_str(idx)]
|
||||||
except (TypeError, KeyError, IndexError) as e:
|
except (TypeError, KeyError, IndexError, ValueError) as e:
|
||||||
# allow_undefined is None gives correct behaviour
|
# allow_undefined is None gives correct behaviour
|
||||||
if allow_undefined or (
|
if allow_undefined or (
|
||||||
allow_undefined is None and not isinstance(e, TypeError)):
|
allow_undefined is None and not isinstance(e, TypeError)):
|
||||||
@ -1038,6 +1038,10 @@ class JSInterpreter(object):
|
|||||||
left_val = self._index(left_val, idx)
|
left_val = self._index(left_val, idx)
|
||||||
if isinstance(idx, float):
|
if isinstance(idx, float):
|
||||||
idx = int(idx)
|
idx = int(idx)
|
||||||
|
if isinstance(left_val, list) and len(left_val) <= int_or_none(idx, default=-1):
|
||||||
|
# JS Array is a sparsely assignable list
|
||||||
|
# TODO: handle extreme sparsity without memory bloat, eg using auxiliary dict
|
||||||
|
left_val.extend((idx - len(left_val) + 1) * [JS_Undefined])
|
||||||
left_val[idx] = self._operator(
|
left_val[idx] = self._operator(
|
||||||
m.group('op'), self._index(left_val, idx) if m.group('op') else None,
|
m.group('op'), self._index(left_val, idx) if m.group('op') else None,
|
||||||
m.group('expr'), expr, local_vars, allow_recursion)
|
m.group('expr'), expr, local_vars, allow_recursion)
|
||||||
@ -1204,7 +1208,8 @@ class JSInterpreter(object):
|
|||||||
elif member == 'join':
|
elif member == 'join':
|
||||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||||
assertion(len(argvals) <= 1, 'takes at most one argument')
|
assertion(len(argvals) <= 1, 'takes at most one argument')
|
||||||
return (',' if len(argvals) == 0 else argvals[0]).join(
|
return (',' if len(argvals) == 0 or argvals[0] in (None, JS_Undefined)
|
||||||
|
else argvals[0]).join(
|
||||||
('' if x in (None, JS_Undefined) else _js_toString(x))
|
('' if x in (None, JS_Undefined) else _js_toString(x))
|
||||||
for x in obj)
|
for x in obj)
|
||||||
elif member == 'reverse':
|
elif member == 'reverse':
|
||||||
@ -1364,19 +1369,21 @@ class JSInterpreter(object):
|
|||||||
code, _ = self._separate_at_paren(func_m.group('code')) # refine the match
|
code, _ = self._separate_at_paren(func_m.group('code')) # refine the match
|
||||||
return self.build_arglist(func_m.group('args')), code
|
return self.build_arglist(func_m.group('args')), code
|
||||||
|
|
||||||
def extract_function(self, funcname):
|
def extract_function(self, funcname, *global_stack):
|
||||||
return function_with_repr(
|
return function_with_repr(
|
||||||
self.extract_function_from_code(*self.extract_function_code(funcname)),
|
self.extract_function_from_code(*itertools.chain(
|
||||||
|
self.extract_function_code(funcname), global_stack)),
|
||||||
'F<%s>' % (funcname,))
|
'F<%s>' % (funcname,))
|
||||||
|
|
||||||
def extract_function_from_code(self, argnames, code, *global_stack):
|
def extract_function_from_code(self, argnames, code, *global_stack):
|
||||||
local_vars = {}
|
local_vars = {}
|
||||||
|
|
||||||
|
start = None
|
||||||
while True:
|
while True:
|
||||||
mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code)
|
mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code[start:])
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
break
|
break
|
||||||
start, body_start = mobj.span()
|
start, body_start = ((start or 0) + x for x in mobj.span())
|
||||||
body, remaining = self._separate_at_paren(code[body_start - 1:])
|
body, remaining = self._separate_at_paren(code[body_start - 1:])
|
||||||
name = self._named_object(local_vars, self.extract_function_from_code(
|
name = self._named_object(local_vars, self.extract_function_from_code(
|
||||||
[x.strip() for x in mobj.group('args').split(',')],
|
[x.strip() for x in mobj.group('args').split(',')],
|
||||||
|
@ -13,6 +13,7 @@ from .compat import (
|
|||||||
compat_kwargs,
|
compat_kwargs,
|
||||||
compat_open as open,
|
compat_open as open,
|
||||||
compat_shlex_split,
|
compat_shlex_split,
|
||||||
|
compat_str,
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
@ -109,6 +110,14 @@ def parseOpts(overrideArguments=None):
|
|||||||
def _comma_separated_values_options_callback(option, opt_str, value, parser):
|
def _comma_separated_values_options_callback(option, opt_str, value, parser):
|
||||||
setattr(parser.values, option.dest, value.split(','))
|
setattr(parser.values, option.dest, value.split(','))
|
||||||
|
|
||||||
|
def _list_from_options_callback(option, opt_str, value, parser, append=True, delim=',', process=compat_str.strip):
|
||||||
|
# append can be True, False or -1 (prepend)
|
||||||
|
current = list(getattr(parser.values, option.dest)) if append else []
|
||||||
|
value = list(filter(None, [process(value)] if delim is None else map(process, value.split(delim))))
|
||||||
|
setattr(
|
||||||
|
parser.values, option.dest,
|
||||||
|
current + value if append is True else value + current)
|
||||||
|
|
||||||
# No need to wrap help messages if we're on a wide console
|
# No need to wrap help messages if we're on a wide console
|
||||||
columns = compat_get_terminal_size().columns
|
columns = compat_get_terminal_size().columns
|
||||||
max_width = columns if columns else 80
|
max_width = columns if columns else 80
|
||||||
@ -594,6 +603,13 @@ def parseOpts(overrideArguments=None):
|
|||||||
'--skip-download',
|
'--skip-download',
|
||||||
action='store_true', dest='skip_download', default=False,
|
action='store_true', dest='skip_download', default=False,
|
||||||
help='Do not download the video')
|
help='Do not download the video')
|
||||||
|
verbosity.add_option(
|
||||||
|
'-O', '--print', metavar='TEMPLATE',
|
||||||
|
action='callback', dest='print_', type='str', default=[],
|
||||||
|
callback=_list_from_options_callback, callback_kwargs={'delim': None},
|
||||||
|
help=(
|
||||||
|
'Simulate, quiet but print the given fields. Either a field name '
|
||||||
|
'or similar formatting as the output template can be used'))
|
||||||
verbosity.add_option(
|
verbosity.add_option(
|
||||||
'-g', '--get-url',
|
'-g', '--get-url',
|
||||||
action='store_true', dest='geturl', default=False,
|
action='store_true', dest='geturl', default=False,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user