Compare commits

...

5 Commits

Author SHA1 Message Date
dirkf 6fece0a96b [AENetworksBaseIE] Report missing show data instead of crash 2023-03-14 16:23:20 +00:00
dirkf 70ff013910 [devscripts] Add a hack to convert command-line options to API options 2023-03-14 16:23:20 +00:00
dirkf e8de54bce5 [core] Handle `/../` sequences in HTTP URLs
* use Python's RFC implementation for embedded sequences
* hack: strip unbalanced leading `../` from path, like eg Firefox

See https://github.com/yt-dlp/yt-dlp/issues/3355
2023-03-14 16:23:20 +00:00
dirkf baa6c5e95c [FragmentFD] Respect `--no-continue`
* discard partial fragment on `--no-continue`
* continue with correct progress display otherwise

Resolves #21467
2023-03-14 16:23:20 +00:00
dirkf 5c985d4f81 [downloader] Let _ffmpeg_ handle DASH segments
Fixes https://github.com/ytdl-org/youtube-dl/issues/31792 after 3da1783.
2023-03-14 16:23:20 +00:00
8 changed files with 165 additions and 43 deletions

64
devscripts/cli_to_api.py Executable file
View File

@ -0,0 +1,64 @@
#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
"""
This script displays the API parameters corresponding to a yt-dl command line
Example:
$ ./cli_to_api.py -f best
{u'format': 'best'}
$
"""
# Allow direct execution
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import youtube_dl
from types import MethodType
def cli_to_api(*opts):
YDL = youtube_dl.YoutubeDL
# to extract the parsed options, break out of YoutubeDL instantiation
# return options via this Exception
class ParseYTDLResult(Exception):
def __init__(self, result):
super(ParseYTDLResult, self).__init__('result')
self.opts = result
# replacement constructor that raises ParseYTDLResult
def ytdl_init(ydl, ydl_opts):
super(YDL, ydl).__init__(ydl_opts)
raise ParseYTDLResult(ydl_opts)
# patch in the constructor
YDL.__init__ = MethodType(ytdl_init, YDL)
# core parser
def parsed_options(argv):
try:
youtube_dl._real_main(list(argv))
except ParseYTDLResult as result:
return result.opts
# from https://github.com/yt-dlp/yt-dlp/issues/5859#issuecomment-1363938900
default = parsed_options([])
diff = dict((k, v) for k, v in parsed_options(opts).items() if default[k] != v)
if 'postprocessors' in diff:
diff['postprocessors'] = [pp for pp in diff['postprocessors'] if pp not in default['postprocessors']]
return diff
def main():
from pprint import pprint
pprint(cli_to_api(*sys.argv))
if __name__ == '__main__':
main()

View File

@ -39,6 +39,7 @@ from .compat import (
compat_str, compat_str,
compat_tokenize_tokenize, compat_tokenize_tokenize,
compat_urllib_error, compat_urllib_error,
compat_urllib_parse,
compat_urllib_request, compat_urllib_request,
compat_urllib_request_DataHandler, compat_urllib_request_DataHandler,
) )
@ -60,6 +61,7 @@ from .utils import (
format_bytes, format_bytes,
formatSeconds, formatSeconds,
GeoRestrictedError, GeoRestrictedError,
HEADRequest,
int_or_none, int_or_none,
ISO3166Utils, ISO3166Utils,
locked_file, locked_file,
@ -74,6 +76,7 @@ from .utils import (
preferredencoding, preferredencoding,
prepend_extension, prepend_extension,
process_communicate_or_kill, process_communicate_or_kill,
PUTRequest,
register_socks_protocols, register_socks_protocols,
render_table, render_table,
replace_extension, replace_extension,
@ -2297,6 +2300,27 @@ class YoutubeDL(object):
""" Start an HTTP download """ """ Start an HTTP download """
if isinstance(req, compat_basestring): if isinstance(req, compat_basestring):
req = sanitized_Request(req) req = sanitized_Request(req)
# an embedded /../ sequence is not automatically handled by urllib2
# see https://github.com/yt-dlp/yt-dlp/issues/3355
url = req.get_full_url()
parts = url.partition('/../')
if parts[1]:
url = compat_urllib_parse.urljoin(parts[0] + parts[1][:1], parts[1][1:] + parts[2])
if url:
# worse, URL path may have initial /../ against RFCs: work-around
# by stripping such prefixes, like eg Firefox
parts = compat_urllib_parse.urlsplit(url)
path = parts.path
while path.startswith('/../'):
path = path[3:]
url = parts._replace(path=path).geturl()
# get a new Request with the munged URL
if url != req.get_full_url():
req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get(
req.get_method(), compat_urllib_request.Request)
req = req_type(
url, data=req.data, headers=dict(req.header_items()),
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
return self._opener.open(req, timeout=self._socket_timeout) return self._opener.open(req, timeout=self._socket_timeout)
def print_debug_header(self): def print_debug_header(self):

View File

@ -88,17 +88,21 @@ class FileDownloader(object):
return '---.-%' return '---.-%'
return '%6s' % ('%3.1f%%' % percent) return '%6s' % ('%3.1f%%' % percent)
@staticmethod @classmethod
def calc_eta(start, now, total, current): def calc_eta(cls, start_or_rate, now_or_remaining, *args):
if len(args) < 2:
rate, remaining = (start_or_rate, now_or_remaining)
if None in (rate, remaining):
return None
return int(float(remaining) / rate)
start, now = (start_or_rate, now_or_remaining)
total, current = args
if total is None: if total is None:
return None return None
if now is None: if now is None:
now = time.time() now = time.time()
dif = now - start rate = cls.calc_speed(start, now, current)
if current == 0 or dif < 0.001: # One millisecond return rate and int((float(total) - float(current)) / rate)
return None
rate = float(current) / dif
return int((float(total) - float(current)) / rate)
@staticmethod @staticmethod
def format_eta(eta): def format_eta(eta):
@ -123,6 +127,12 @@ class FileDownloader(object):
def format_retries(retries): def format_retries(retries):
return 'inf' if retries == float('inf') else '%.0f' % retries return 'inf' if retries == float('inf') else '%.0f' % retries
@staticmethod
def filesize_or_none(unencoded_filename):
fn = encodeFilename(unencoded_filename)
if os.path.isfile(fn):
return os.path.getsize(fn)
@staticmethod @staticmethod
def best_block_size(elapsed_time, bytes): def best_block_size(elapsed_time, bytes):
new_min = max(bytes / 2.0, 1.0) new_min = max(bytes / 2.0, 1.0)

View File

@ -38,8 +38,7 @@ class DashSegmentsFD(FragmentFD):
# In DASH, the first segment contains necessary headers to # In DASH, the first segment contains necessary headers to
# generate a valid MP4 file, so always abort for the first segment # generate a valid MP4 file, so always abort for the first segment
fatal = i == 0 or not skip_unavailable_fragments fatal = i == 0 or not skip_unavailable_fragments
count = 0 for count in range(fragment_retries + 1):
while count <= fragment_retries:
try: try:
fragment_url = fragment.get('url') fragment_url = fragment.get('url')
if not fragment_url: if not fragment_url:
@ -57,9 +56,8 @@ class DashSegmentsFD(FragmentFD):
# is usually enough) thus allowing to download the whole file successfully. # is usually enough) thus allowing to download the whole file successfully.
# To be future-proof we will retry all fragments that fail with any # To be future-proof we will retry all fragments that fail with any
# HTTP error. # HTTP error.
count += 1 if count < fragment_retries:
if count <= fragment_retries: self.report_retry_fragment(err, frag_index, count + 1, fragment_retries)
self.report_retry_fragment(err, frag_index, count, fragment_retries)
except DownloadError: except DownloadError:
# Don't retry fragment if error occurred during HTTP downloading # Don't retry fragment if error occurred during HTTP downloading
# itself since it has own retry settings # itself since it has own retry settings
@ -68,7 +66,7 @@ class DashSegmentsFD(FragmentFD):
break break
raise raise
if count > fragment_retries: if count >= fragment_retries:
if not fatal: if not fatal:
self.report_skip_fragment(frag_index) self.report_skip_fragment(frag_index)
continue continue

View File

@ -273,7 +273,7 @@ class HttpieFD(ExternalFD):
class FFmpegFD(ExternalFD): class FFmpegFD(ExternalFD):
@classmethod @classmethod
def supports(cls, info_dict): def supports(cls, info_dict):
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms') return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms', 'http_dash_segments')
@classmethod @classmethod
def available(cls): def available(cls):

View File

@ -71,7 +71,7 @@ class FragmentFD(FileDownloader):
@staticmethod @staticmethod
def __do_ytdl_file(ctx): def __do_ytdl_file(ctx):
return not ctx['live'] and not ctx['tmpfilename'] == '-' return ctx['live'] is not True and ctx['tmpfilename'] != '-'
def _read_ytdl_file(self, ctx): def _read_ytdl_file(self, ctx):
assert 'ytdl_corrupt' not in ctx assert 'ytdl_corrupt' not in ctx
@ -101,6 +101,13 @@ class FragmentFD(FileDownloader):
'url': frag_url, 'url': frag_url,
'http_headers': headers or info_dict.get('http_headers'), 'http_headers': headers or info_dict.get('http_headers'),
} }
frag_resume_len = 0
if ctx['dl'].params.get('continuedl', True):
frag_resume_len = self.filesize_or_none(
self.temp_name(fragment_filename))
fragment_info_dict['frag_resume_len'] = frag_resume_len
ctx['frag_resume_len'] = frag_resume_len or 0
success = ctx['dl'].download(fragment_filename, fragment_info_dict) success = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success: if not success:
return False, None return False, None
@ -124,9 +131,7 @@ class FragmentFD(FileDownloader):
del ctx['fragment_filename_sanitized'] del ctx['fragment_filename_sanitized']
def _prepare_frag_download(self, ctx): def _prepare_frag_download(self, ctx):
if 'live' not in ctx: if not ctx.setdefault('live', False):
ctx['live'] = False
if not ctx['live']:
total_frags_str = '%d' % ctx['total_frags'] total_frags_str = '%d' % ctx['total_frags']
ad_frags = ctx.get('ad_frags', 0) ad_frags = ctx.get('ad_frags', 0)
if ad_frags: if ad_frags:
@ -136,10 +141,11 @@ class FragmentFD(FileDownloader):
self.to_screen( self.to_screen(
'[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
self.report_destination(ctx['filename']) self.report_destination(ctx['filename'])
continuedl = self.params.get('continuedl', True)
dl = HttpQuietDownloader( dl = HttpQuietDownloader(
self.ydl, self.ydl,
{ {
'continuedl': True, 'continuedl': continuedl,
'quiet': True, 'quiet': True,
'noprogress': True, 'noprogress': True,
'ratelimit': self.params.get('ratelimit'), 'ratelimit': self.params.get('ratelimit'),
@ -150,12 +156,11 @@ class FragmentFD(FileDownloader):
) )
tmpfilename = self.temp_name(ctx['filename']) tmpfilename = self.temp_name(ctx['filename'])
open_mode = 'wb' open_mode = 'wb'
resume_len = 0
# Establish possible resume length # Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)): resume_len = self.filesize_or_none(tmpfilename) or 0
if resume_len > 0:
open_mode = 'ab' open_mode = 'ab'
resume_len = os.path.getsize(encodeFilename(tmpfilename))
# Should be initialized before ytdl file check # Should be initialized before ytdl file check
ctx.update({ ctx.update({
@ -164,7 +169,8 @@ class FragmentFD(FileDownloader):
}) })
if self.__do_ytdl_file(ctx): if self.__do_ytdl_file(ctx):
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
if continuedl and ytdl_file_exists:
self._read_ytdl_file(ctx) self._read_ytdl_file(ctx)
is_corrupt = ctx.get('ytdl_corrupt') is True is_corrupt = ctx.get('ytdl_corrupt') is True
is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0 is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
@ -178,7 +184,12 @@ class FragmentFD(FileDownloader):
if 'ytdl_corrupt' in ctx: if 'ytdl_corrupt' in ctx:
del ctx['ytdl_corrupt'] del ctx['ytdl_corrupt']
self._write_ytdl_file(ctx) self._write_ytdl_file(ctx)
else: else:
if not continuedl:
if ytdl_file_exists:
self._read_ytdl_file(ctx)
ctx['fragment_index'] = resume_len = 0
self._write_ytdl_file(ctx) self._write_ytdl_file(ctx)
assert ctx['fragment_index'] == 0 assert ctx['fragment_index'] == 0
@ -209,6 +220,7 @@ class FragmentFD(FileDownloader):
start = time.time() start = time.time()
ctx.update({ ctx.update({
'started': start, 'started': start,
'fragment_started': start,
# Amount of fragment's bytes downloaded by the time of the previous # Amount of fragment's bytes downloaded by the time of the previous
# frag progress hook invocation # frag progress hook invocation
'prev_frag_downloaded_bytes': 0, 'prev_frag_downloaded_bytes': 0,
@ -218,6 +230,9 @@ class FragmentFD(FileDownloader):
if s['status'] not in ('downloading', 'finished'): if s['status'] not in ('downloading', 'finished'):
return return
if not total_frags and ctx.get('fragment_count'):
state['fragment_count'] = ctx['fragment_count']
time_now = time.time() time_now = time.time()
state['elapsed'] = time_now - start state['elapsed'] = time_now - start
frag_total_bytes = s.get('total_bytes') or 0 frag_total_bytes = s.get('total_bytes') or 0
@ -232,16 +247,17 @@ class FragmentFD(FileDownloader):
ctx['fragment_index'] = state['fragment_index'] ctx['fragment_index'] = state['fragment_index']
state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
ctx['speed'] = state['speed'] = self.calc_speed(
ctx['fragment_started'], time_now, frag_total_bytes)
ctx['fragment_started'] = time.time()
ctx['prev_frag_downloaded_bytes'] = 0 ctx['prev_frag_downloaded_bytes'] = 0
else: else:
frag_downloaded_bytes = s['downloaded_bytes'] frag_downloaded_bytes = s['downloaded_bytes']
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes'] state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
ctx['speed'] = state['speed'] = self.calc_speed(
ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx['frag_resume_len'])
if not ctx['live']: if not ctx['live']:
state['eta'] = self.calc_eta( state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
start, time_now, estimated_size - resume_len,
state['downloaded_bytes'] - resume_len)
state['speed'] = s.get('speed') or ctx.get('speed')
ctx['speed'] = state['speed']
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
self._hook_progress(state) self._hook_progress(state)
@ -268,7 +284,7 @@ class FragmentFD(FileDownloader):
os.utime(ctx['filename'], (time.time(), filetime)) os.utime(ctx['filename'], (time.time(), filetime))
except Exception: except Exception:
pass pass
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) downloaded_bytes = self.filesize_or_none(ctx['filename']) or 0
self._hook_progress({ self._hook_progress({
'downloaded_bytes': downloaded_bytes, 'downloaded_bytes': downloaded_bytes,

View File

@ -58,9 +58,9 @@ class HttpFD(FileDownloader):
if self.params.get('continuedl', True): if self.params.get('continuedl', True):
# Establish possible resume length # Establish possible resume length
if os.path.isfile(encodeFilename(ctx.tmpfilename)): ctx.resume_len = info_dict.get('frag_resume_len')
ctx.resume_len = os.path.getsize( if ctx.resume_len is None:
encodeFilename(ctx.tmpfilename)) ctx.resume_len = self.filesize_or_none(ctx.tmpfilename) or 0
ctx.is_resume = ctx.resume_len > 0 ctx.is_resume = ctx.resume_len > 0
@ -115,9 +115,9 @@ class HttpFD(FileDownloader):
raise RetryDownload(err) raise RetryDownload(err)
raise err raise err
# When trying to resume, Content-Range HTTP header of response has to be checked # When trying to resume, Content-Range HTTP header of response has to be checked
# to match the value of requested Range HTTP header. This is due to a webservers # to match the value of requested Range HTTP header. This is due to webservers
# that don't support resuming and serve a whole file with no Content-Range # that don't support resuming and serve a whole file with no Content-Range
# set in response despite of requested Range (see # set in response despite requested Range (see
# https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799) # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
if has_range: if has_range:
content_range = ctx.data.headers.get('Content-Range') content_range = ctx.data.headers.get('Content-Range')
@ -293,10 +293,7 @@ class HttpFD(FileDownloader):
# Progress message # Progress message
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len) speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
if ctx.data_len is None: eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - ctx.resume_len))
eta = None
else:
eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
self._hook_progress({ self._hook_progress({
'status': 'downloading', 'status': 'downloading',

View File

@ -8,6 +8,8 @@ from ..utils import (
ExtractorError, ExtractorError,
GeoRestrictedError, GeoRestrictedError,
int_or_none, int_or_none,
remove_start,
traverse_obj,
update_url_query, update_url_query,
urlencode_postdata, urlencode_postdata,
) )
@ -33,14 +35,17 @@ class AENetworksBaseIE(ThePlatformIE):
} }
def _extract_aen_smil(self, smil_url, video_id, auth=None): def _extract_aen_smil(self, smil_url, video_id, auth=None):
query = {'mbr': 'true'} query = {
'mbr': 'true',
'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
}
if auth: if auth:
query['auth'] = auth query['auth'] = auth
TP_SMIL_QUERY = [{ TP_SMIL_QUERY = [{
'assetTypes': 'high_video_ak', 'assetTypes': 'high_video_ak',
'switch': 'hls_high_ak' 'switch': 'hls_high_ak',
}, { }, {
'assetTypes': 'high_video_s3' 'assetTypes': 'high_video_s3',
}, { }, {
'assetTypes': 'high_video_s3', 'assetTypes': 'high_video_s3',
'switch': 'hls_high_fastly', 'switch': 'hls_high_fastly',
@ -75,7 +80,14 @@ class AENetworksBaseIE(ThePlatformIE):
requestor_id, brand = self._DOMAIN_MAP[domain] requestor_id, brand = self._DOMAIN_MAP[domain]
result = self._download_json( result = self._download_json(
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand, 'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0] filter_value, query={'filter[%s]' % filter_key: filter_value})
result = traverse_obj(
result, ('results',
lambda k, v: k == 0 and v[filter_key] == filter_value),
get_all=False)
if not result:
raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
video_id=remove_start(filter_value, '/'))
title = result['title'] title = result['title']
video_id = result['id'] video_id = result['id']
media_url = result['publicUrl'] media_url = result['publicUrl']
@ -126,7 +138,7 @@ class AENetworksIE(AENetworksBaseIE):
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'skip': 'This video is only available for users of participating TV providers.', 'skip': 'Geo-restricted - This content is not available in your location.'
}, { }, {
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1', 'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
'info_dict': { 'info_dict': {
@ -143,6 +155,7 @@ class AENetworksIE(AENetworksBaseIE):
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'skip': 'This video is only available for users of participating TV providers.',
}, { }, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
'only_matching': True 'only_matching': True