[AENetworksBaseIE] Report missing show data instead of crash

[devscripts] Add a hack to convert command-line options to API options
[core] Handle `/../` sequences in HTTP URLs
2023-03-14 16:23:20 +00:00 · 2023-03-14 16:23:20 +00:00 · 2023-03-14 16:23:20 +00:00 · 2023-03-14 16:23:20 +00:00 · 2023-03-14 16:23:20 +00:00
8 changed files with 165 additions and 43 deletions
--- a/devscripts/cli_to_api.py
+++ b/devscripts/cli_to_api.py
@ -0,0 +1,64 @@
 #!/usr/bin/env python
 # coding: utf-8
 from __future__ import unicode_literals
 """
 This script displays the API parameters corresponding to a yt-dl command line
 Example:
 $ ./cli_to_api.py -f best
 {u'format': 'best'}
 $
 """
 # Allow direct execution
 import os
 import sys
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import youtube_dl
 from types import MethodType
 def cli_to_api(*opts):
    YDL = youtube_dl.YoutubeDL
    # to extract the parsed options, break out of YoutubeDL instantiation
    # return options via this Exception
    class ParseYTDLResult(Exception):
        def __init__(self, result):
            super(ParseYTDLResult, self).__init__('result')
            self.opts = result
    # replacement constructor that raises ParseYTDLResult
    def ytdl_init(ydl, ydl_opts):
        super(YDL, ydl).__init__(ydl_opts)
        raise ParseYTDLResult(ydl_opts)
    # patch in the constructor
    YDL.__init__ = MethodType(ytdl_init, YDL)
    # core parser
    def parsed_options(argv):
        try:
            youtube_dl._real_main(list(argv))
        except ParseYTDLResult as result:
            return result.opts
    # from https://github.com/yt-dlp/yt-dlp/issues/5859#issuecomment-1363938900
    default = parsed_options([])
    diff = dict((k, v) for k, v in parsed_options(opts).items() if default[k] != v)
    if 'postprocessors' in diff:
        diff['postprocessors'] = [pp for pp in diff['postprocessors'] if pp not in default['postprocessors']]
    return diff
 def main():
    from pprint import pprint
    pprint(cli_to_api(*sys.argv))
 if __name__ == '__main__':
    main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -39,6 +39,7 @@ from .compat import (
    compat_str,
    compat_tokenize_tokenize,
    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_request,
    compat_urllib_request_DataHandler,
 )
@ -60,6 +61,7 @@ from .utils import (
    format_bytes,
    formatSeconds,
    GeoRestrictedError,
    HEADRequest,
    int_or_none,
    ISO3166Utils,
    locked_file,
@ -74,6 +76,7 @@ from .utils import (
    preferredencoding,
    prepend_extension,
    process_communicate_or_kill,
    PUTRequest,
    register_socks_protocols,
    render_table,
    replace_extension,
@ -2297,6 +2300,27 @@ class YoutubeDL(object):
        """ Start an HTTP download """
        if isinstance(req, compat_basestring):
            req = sanitized_Request(req)
        # an embedded /../ sequence is not automatically handled by urllib2
        # see https://github.com/yt-dlp/yt-dlp/issues/3355
        url = req.get_full_url()
        parts = url.partition('/../')
        if parts[1]:
            url = compat_urllib_parse.urljoin(parts[0] + parts[1][:1], parts[1][1:] + parts[2])
        if url:
            # worse, URL path may have initial /../ against RFCs: work-around
            # by stripping such prefixes, like eg Firefox
            parts = compat_urllib_parse.urlsplit(url)
            path = parts.path
            while path.startswith('/../'):
                path = path[3:]
            url = parts._replace(path=path).geturl()
            # get a new Request with the munged URL
            if url != req.get_full_url():
                req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get(
                    req.get_method(), compat_urllib_request.Request)
                req = req_type(
                    url, data=req.data, headers=dict(req.header_items()),
                    origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
        return self._opener.open(req, timeout=self._socket_timeout)
    def print_debug_header(self):
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@ -88,17 +88,21 @@ class FileDownloader(object):
            return '---.-%'
        return '%6s' % ('%3.1f%%' % percent)
-    @staticmethod
+    @classmethod
-    def calc_eta(start, now, total, current):
+    def calc_eta(cls, start_or_rate, now_or_remaining, *args):
        if len(args) < 2:
            rate, remaining = (start_or_rate, now_or_remaining)
            if None in (rate, remaining):
                return None
            return int(float(remaining) / rate)
        start, now = (start_or_rate, now_or_remaining)
        total, current = args
        if total is None:
            return None
        if now is None:
            now = time.time()
-        dif = now - start
+        rate = cls.calc_speed(start, now, current)
-        if current == 0 or dif < 0.001:  # One millisecond
+        return rate and int((float(total) - float(current)) / rate)
            return None
        rate = float(current) / dif
        return int((float(total) - float(current)) / rate)
    @staticmethod
    def format_eta(eta):
@ -123,6 +127,12 @@ class FileDownloader(object):
    def format_retries(retries):
        return 'inf' if retries == float('inf') else '%.0f' % retries
    @staticmethod
    def filesize_or_none(unencoded_filename):
        fn = encodeFilename(unencoded_filename)
        if os.path.isfile(fn):
            return os.path.getsize(fn)
    @staticmethod
    def best_block_size(elapsed_time, bytes):
        new_min = max(bytes / 2.0, 1.0)
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@ -38,8 +38,7 @@ class DashSegmentsFD(FragmentFD):
            # In DASH, the first segment contains necessary headers to
            # generate a valid MP4 file, so always abort for the first segment
            fatal = i == 0 or not skip_unavailable_fragments
-            count = 0
+            for count in range(fragment_retries + 1):
            while count <= fragment_retries:
                try:
                    fragment_url = fragment.get('url')
                    if not fragment_url:
@ -57,9 +56,8 @@ class DashSegmentsFD(FragmentFD):
                    # is usually enough) thus allowing to download the whole file successfully.
                    # To be future-proof we will retry all fragments that fail with any
                    # HTTP error.
-                    count += 1
+                    if count < fragment_retries:
-                    if count <= fragment_retries:
+                        self.report_retry_fragment(err, frag_index, count + 1, fragment_retries)
                        self.report_retry_fragment(err, frag_index, count, fragment_retries)
                except DownloadError:
                    # Don't retry fragment if error occurred during HTTP downloading
                    # itself since it has own retry settings
@ -68,7 +66,7 @@ class DashSegmentsFD(FragmentFD):
                        break
                    raise
-            if count > fragment_retries:
+            if count >= fragment_retries:
                if not fatal:
                    self.report_skip_fragment(frag_index)
                    continue
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@ -273,7 +273,7 @@ class HttpieFD(ExternalFD):
 class FFmpegFD(ExternalFD):
    @classmethod
    def supports(cls, info_dict):
-        return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
+        return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms', 'http_dash_segments')
    @classmethod
    def available(cls):
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@ -71,7 +71,7 @@ class FragmentFD(FileDownloader):
    @staticmethod
    def __do_ytdl_file(ctx):
-        return not ctx['live'] and not ctx['tmpfilename'] == '-'
+        return ctx['live'] is not True and ctx['tmpfilename'] != '-'
    def _read_ytdl_file(self, ctx):
        assert 'ytdl_corrupt' not in ctx
@ -101,6 +101,13 @@ class FragmentFD(FileDownloader):
            'url': frag_url,
            'http_headers': headers or info_dict.get('http_headers'),
        }
        frag_resume_len = 0
        if ctx['dl'].params.get('continuedl', True):
            frag_resume_len = self.filesize_or_none(
                self.temp_name(fragment_filename))
        fragment_info_dict['frag_resume_len'] = frag_resume_len
        ctx['frag_resume_len'] = frag_resume_len or 0
        success = ctx['dl'].download(fragment_filename, fragment_info_dict)
        if not success:
            return False, None
@ -124,9 +131,7 @@ class FragmentFD(FileDownloader):
            del ctx['fragment_filename_sanitized']
    def _prepare_frag_download(self, ctx):
-        if 'live' not in ctx:
+        if not ctx.setdefault('live', False):
            ctx['live'] = False
        if not ctx['live']:
            total_frags_str = '%d' % ctx['total_frags']
            ad_frags = ctx.get('ad_frags', 0)
            if ad_frags:
@ -136,10 +141,11 @@ class FragmentFD(FileDownloader):
        self.to_screen(
            '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
        self.report_destination(ctx['filename'])
        continuedl = self.params.get('continuedl', True)
        dl = HttpQuietDownloader(
            self.ydl,
            {
-                'continuedl': True,
+                'continuedl': continuedl,
                'quiet': True,
                'noprogress': True,
                'ratelimit': self.params.get('ratelimit'),
@ -150,12 +156,11 @@ class FragmentFD(FileDownloader):
        )
        tmpfilename = self.temp_name(ctx['filename'])
        open_mode = 'wb'
        resume_len = 0
        # Establish possible resume length
-        if os.path.isfile(encodeFilename(tmpfilename)):
+        resume_len = self.filesize_or_none(tmpfilename) or 0
        if resume_len > 0:
            open_mode = 'ab'
            resume_len = os.path.getsize(encodeFilename(tmpfilename))
        # Should be initialized before ytdl file check
        ctx.update({
@ -164,7 +169,8 @@ class FragmentFD(FileDownloader):
        })
        if self.__do_ytdl_file(ctx):
-            if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
+            ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
            if continuedl and ytdl_file_exists:
                self._read_ytdl_file(ctx)
                is_corrupt = ctx.get('ytdl_corrupt') is True
                is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
@ -178,7 +184,12 @@ class FragmentFD(FileDownloader):
                    if 'ytdl_corrupt' in ctx:
                        del ctx['ytdl_corrupt']
                    self._write_ytdl_file(ctx)
            else:
                if not continuedl:
                    if ytdl_file_exists:
                        self._read_ytdl_file(ctx)
                    ctx['fragment_index'] = resume_len = 0
                self._write_ytdl_file(ctx)
                assert ctx['fragment_index'] == 0
@ -209,6 +220,7 @@ class FragmentFD(FileDownloader):
        start = time.time()
        ctx.update({
            'started': start,
            'fragment_started': start,
            # Amount of fragment's bytes downloaded by the time of the previous
            # frag progress hook invocation
            'prev_frag_downloaded_bytes': 0,
@ -218,6 +230,9 @@ class FragmentFD(FileDownloader):
            if s['status'] not in ('downloading', 'finished'):
                return
            if not total_frags and ctx.get('fragment_count'):
                state['fragment_count'] = ctx['fragment_count']
            time_now = time.time()
            state['elapsed'] = time_now - start
            frag_total_bytes = s.get('total_bytes') or 0
@ -232,16 +247,17 @@ class FragmentFD(FileDownloader):
                ctx['fragment_index'] = state['fragment_index']
                state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
                ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
                ctx['speed'] = state['speed'] = self.calc_speed(
                    ctx['fragment_started'], time_now, frag_total_bytes)
                ctx['fragment_started'] = time.time()
                ctx['prev_frag_downloaded_bytes'] = 0
            else:
                frag_downloaded_bytes = s['downloaded_bytes']
                state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
                ctx['speed'] = state['speed'] = self.calc_speed(
                    ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx['frag_resume_len'])
                if not ctx['live']:
-                    state['eta'] = self.calc_eta(
+                    state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
                        start, time_now, estimated_size - resume_len,
                        state['downloaded_bytes'] - resume_len)
                state['speed'] = s.get('speed') or ctx.get('speed')
                ctx['speed'] = state['speed']
                ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
            self._hook_progress(state)
@ -268,7 +284,7 @@ class FragmentFD(FileDownloader):
                        os.utime(ctx['filename'], (time.time(), filetime))
                    except Exception:
                        pass
-            downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
+            downloaded_bytes = self.filesize_or_none(ctx['filename']) or 0
        self._hook_progress({
            'downloaded_bytes': downloaded_bytes,
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@ -58,9 +58,9 @@ class HttpFD(FileDownloader):
        if self.params.get('continuedl', True):
            # Establish possible resume length
-            if os.path.isfile(encodeFilename(ctx.tmpfilename)):
+            ctx.resume_len = info_dict.get('frag_resume_len')
-                ctx.resume_len = os.path.getsize(
+            if ctx.resume_len is None:
-                    encodeFilename(ctx.tmpfilename))
+                ctx.resume_len = self.filesize_or_none(ctx.tmpfilename) or 0
        ctx.is_resume = ctx.resume_len > 0
@ -115,9 +115,9 @@ class HttpFD(FileDownloader):
                        raise RetryDownload(err)
                    raise err
                # When trying to resume, Content-Range HTTP header of response has to be checked
-                # to match the value of requested Range HTTP header. This is due to a webservers
+                # to match the value of requested Range HTTP header. This is due to webservers
                # that don't support resuming and serve a whole file with no Content-Range
-                # set in response despite of requested Range (see
+                # set in response despite requested Range (see
                # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
                if has_range:
                    content_range = ctx.data.headers.get('Content-Range')
@ -293,10 +293,7 @@ class HttpFD(FileDownloader):
                # Progress message
                speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
-                if ctx.data_len is None:
+                eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - ctx.resume_len))
                    eta = None
                else:
                    eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
                self._hook_progress({
                    'status': 'downloading',
--- a/youtube_dl/extractor/aenetworks.py
+++ b/youtube_dl/extractor/aenetworks.py
@ -8,6 +8,8 @@ from ..utils import (
    ExtractorError,
    GeoRestrictedError,
    int_or_none,
    remove_start,
    traverse_obj,
    update_url_query,
    urlencode_postdata,
 )
@ -33,14 +35,17 @@ class AENetworksBaseIE(ThePlatformIE):
    }
    def _extract_aen_smil(self, smil_url, video_id, auth=None):
-        query = {'mbr': 'true'}
+        query = {
            'mbr': 'true',
            'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
        }
        if auth:
            query['auth'] = auth
        TP_SMIL_QUERY = [{
            'assetTypes': 'high_video_ak',
-            'switch': 'hls_high_ak'
+            'switch': 'hls_high_ak',
        }, {
-            'assetTypes': 'high_video_s3'
+            'assetTypes': 'high_video_s3',
        }, {
            'assetTypes': 'high_video_s3',
            'switch': 'hls_high_fastly',
@ -75,7 +80,14 @@ class AENetworksBaseIE(ThePlatformIE):
        requestor_id, brand = self._DOMAIN_MAP[domain]
        result = self._download_json(
            'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
-            filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
+            filter_value, query={'filter[%s]' % filter_key: filter_value})
        result = traverse_obj(
            result, ('results',
                     lambda k, v: k == 0 and v[filter_key] == filter_value),
            get_all=False)
        if not result:
            raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
                                 video_id=remove_start(filter_value, '/'))
        title = result['title']
        video_id = result['id']
        media_url = result['publicUrl']
@ -126,7 +138,7 @@ class AENetworksIE(AENetworksBaseIE):
            'skip_download': True,
        },
        'add_ie': ['ThePlatform'],
-        'skip': 'This video is only available for users of participating TV providers.',
+        'skip': 'Geo-restricted - This content is not available in your location.'
    }, {
        'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
        'info_dict': {
@ -143,6 +155,7 @@ class AENetworksIE(AENetworksBaseIE):
            'skip_download': True,
        },
        'add_ie': ['ThePlatform'],
        'skip': 'This video is only available for users of participating TV providers.',
    }, {
        'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
        'only_matching': True
Author	SHA1	Message	Date
dirkf	6fece0a96b	[AENetworksBaseIE] Report missing show data instead of crash	2023-03-14 16:23:20 +00:00
dirkf	70ff013910	[devscripts] Add a hack to convert command-line options to API options	2023-03-14 16:23:20 +00:00
dirkf	e8de54bce5	[core] Handle `/../` sequences in HTTP URLs * use Python's RFC implementation for embedded sequences * hack: strip unbalanced leading `../` from path, like eg Firefox See https://github.com/yt-dlp/yt-dlp/issues/3355	2023-03-14 16:23:20 +00:00
dirkf	baa6c5e95c	[FragmentFD] Respect `--no-continue` * discard partial fragment on `--no-continue` * continue with correct progress display otherwise Resolves #21467	2023-03-14 16:23:20 +00:00
dirkf	5c985d4f81	[downloader] Let _ffmpeg_ handle DASH segments Fixes https://github.com/ytdl-org/youtube-dl/issues/31792 after `3da1783`.	2023-03-14 16:23:20 +00:00