From 14ef89a8dab4f6ba6185d6f5bf0317a705d7b842 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Wed, 1 Feb 2023 09:39:49 +0530
Subject: [PATCH 001/156] Support `if` statements

Fix for yt-dlp/yt_dlp#6131
Closes #31509
---
 test/test_jsinterp.py          | 32 ++++++++++++++++++++++++++++++++
 test/test_youtube_signature.py |  4 ++++
 youtube_dl/jsinterp.py         | 21 ++++++++++++++++++---
 3 files changed, 54 insertions(+), 3 deletions(-)
diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 5121c8cf8..c47def737 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -158,6 +158,38 @@ class TestJSInterpreter(unittest.TestCase):
         self.assertEqual(jsi.call_function('z'), 5)
         self.assertEqual(jsi.call_function('y'), 2)
 
+    def test_if(self):
+        jsi = JSInterpreter('''
+        function x() {
+            let a = 9;
+            if (0==0) {a++}
+            return a
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+
+        jsi = JSInterpreter('''
+        function x() {
+            if (0==0) {return 10}
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+
+        jsi = JSInterpreter('''
+        function x() {
+            if (0!=0) {return 1}
+            else {return 10}
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+
+        """  # Unsupported
+        jsi = JSInterpreter('''
+        function x() {
+            if (0!=0) {return 1}
+            else if (1==0) {return 2}
+            else {return 10}
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+        """
+
     def test_for_loop(self):
         # function x() { a=0; for (i=0; i-10; i++) {a++} a }
         jsi = JSInterpreter('''
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 4e678cae0..ac37ffa45 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -135,6 +135,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
         'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
     ),
+    (
+        'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
+        'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 530a705b4..9a3b8d7f2 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -214,7 +214,7 @@ class JSInterpreter(object):
         def __init__(self, msg, *args, **kwargs):
             expr = kwargs.pop('expr', None)
             if expr is not None:
-                msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
+                msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
             super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
 
     @classmethod
@@ -268,7 +268,7 @@ class JSInterpreter(object):
                 elif in_quote == '/' and char in '[]':
                     in_regex_char_group = char == '['
             escaping = not escaping and in_quote and char == '\\'
-            after_op = not in_quote and (char in cls.OP_CHARS or char == '[' or (char.isspace() and after_op))
+            after_op = not in_quote and (char in cls.OP_CHARS or (char.isspace() and after_op))
 
             if char != delim[pos] or any(counters.values()) or in_quote:
                 pos = skipping = 0
@@ -301,7 +301,7 @@ class JSInterpreter(object):
         separated = list(cls._separate(expr, delim, 1))
 
         if len(separated) < 2:
-            raise cls.Exception('No terminating paren {delim} in {expr:.100}'.format(**locals()))
+            raise cls.Exception('No terminating paren {delim} in {expr}'.format(**locals()))
         return separated[0][1:].strip(), separated[1].strip()
 
     @staticmethod
@@ -428,10 +428,25 @@ class JSInterpreter(object):
 
         m = re.match(r'''(?x)
                 (?P<try>try)\s*\{|
+                (?P<if>if)\s*\(|
                 (?P<switch>switch)\s*\(|
                 (?P<for>for)\s*\(
                 ''', expr)
         md = m.groupdict() if m else {}
+        if md.get('if'):
+            cndn, expr = self._separate_at_paren(expr[m.end() - 1:])
+            if_expr, expr = self._separate_at_paren(expr.lstrip())
+            # TODO: "else if" is not handled
+            else_expr = None
+            m = re.match(r'else\s*{', expr)
+            if m:
+                else_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+            cndn = _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion))
+            ret, should_abort = self.interpret_statement(
+                if_expr if cndn else else_expr, local_vars, allow_recursion)
+            if should_abort:
+                return ret, True
+
         if md.get('try'):
             try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
             err = None

From 295736c9cba714fb5de7d1c3dd31d86e50091cf8 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 2 Feb 2023 14:28:32 +0000
Subject: [PATCH 002/156] [jsinterp] Improve parsing * support subset `... else
 if ...` * support `while` * add `RegExp` class * generalise `new` support *
 limited more debug strings * matching test changes

---
 test/test_jsinterp.py  |  53 +++++++++++++-
 youtube_dl/jsinterp.py | 156 +++++++++++++++++++++++++++--------------
 2 files changed, 154 insertions(+), 55 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index c47def737..b5962356c 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -11,8 +11,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import math
 import re
 
-from youtube_dl.compat import compat_re_Pattern
-
 from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
 
 
@@ -140,15 +138,23 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertTrue(math.isnan(jsi.call_function('x')))
 
+    def test_Date(self):
         jsi = JSInterpreter('''
         function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
         ''')
         self.assertEqual(jsi.call_function('x'), 86000)
+
         jsi = JSInterpreter('''
         function x(dt) { return new Date(dt) - 0; }
         ''')
         self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
 
+        # date format m/d/y
+        jsi = JSInterpreter('''
+        function x() { return new Date('12/31/1969 18:01:26 MDT') - 0; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 86000)
+
     def test_call(self):
         jsi = JSInterpreter('''
         function x() { return 2; }
@@ -181,6 +187,15 @@ class TestJSInterpreter(unittest.TestCase):
         self.assertEqual(jsi.call_function('x'), 10)
 
         """  # Unsupported
+        jsi = JSInterpreter('''
+        function x() {
+            if (0!=0) return 1;
+            else {return 10}
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+        """
+
+    def test_elseif(self):
         jsi = JSInterpreter('''
         function x() {
             if (0!=0) {return 1}
@@ -188,6 +203,16 @@ class TestJSInterpreter(unittest.TestCase):
             else {return 10}
         }''')
         self.assertEqual(jsi.call_function('x'), 10)
+
+        """  # Unsupported
+        jsi = JSInterpreter('''
+        function x() {
+            if (0!=0) return 1;
+            else if (1==0) {return 2}
+            else {return 10}
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+        # etc
         """
 
     def test_for_loop(self):
@@ -197,6 +222,13 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x'), 10)
 
+    def test_while_loop(self):
+        # function x() { a=0; while (a<10) {a++} a }
+        jsi = JSInterpreter('''
+        function x() { a=0; while (a<10) {a++} return a }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 10)
+
     def test_switch(self):
         jsi = JSInterpreter('''
         function x(f) { switch(f){
@@ -415,13 +447,28 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('''
         function x() { let a=/,,[/,913,/](,)}/; return a; }
         ''')
-        self.assertIsInstance(jsi.call_function('x'), compat_re_Pattern)
+        attrs = set(('findall', 'finditer', 'flags', 'groupindex',
+                     'groups', 'match', 'pattern', 'scanner',
+                     'search', 'split', 'sub', 'subn'))
+        self.assertTrue(set(dir(jsi.call_function('x'))) > attrs)
 
         jsi = JSInterpreter('''
         function x() { let a=/,,[/,913,/](,)}/i; return a; }
         ''')
         self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
 
+        jsi = JSInterpreter(r'''
+        function x() { let a=[/[)\\]/]; return a[0]; }
+        ''')
+        self.assertEqual(jsi.call_function('x').pattern, r'[)\\]')
+
+        """  # fails
+        jsi = JSInterpreter(r'''
+        function x() { let a=100; a/=/[0-9]+/.exec('divide by 20 today')[0]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 5)
+        """
+
     def test_char_code_at(self):
         jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
         self.assertEqual(jsi.call_function('x', 0), 116)
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 9a3b8d7f2..1e7b342ac 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -187,19 +187,6 @@ class LocalNameSpace(ChainMap):
 class JSInterpreter(object):
     __named_object_counter = 0
 
-    _RE_FLAGS = {
-        # special knowledge: Python's re flags are bitmask values, current max 128
-        # invent new bitmask values well above that for literal parsing
-        # TODO: new pattern class to execute matches with these flags
-        'd': 1024,  # Generate indices for substring matches
-        'g': 2048,  # Global search
-        'i': re.I,  # Case-insensitive search
-        'm': re.M,  # Multi-line search
-        's': re.S,  # Allows . to match newline characters
-        'u': re.U,  # Treat a pattern as a sequence of unicode code points
-        'y': 4096,  # Perform a "sticky" search that matches starting at the current position in the target string
-    }
-
     _OBJ_NAME = '__youtube_dl_jsinterp_obj'
 
     OP_CHARS = None
@@ -217,9 +204,48 @@ class JSInterpreter(object):
                 msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
             super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
 
+    class JS_RegExp(object):
+        _RE_FLAGS = {
+            # special knowledge: Python's re flags are bitmask values, current max 128
+            # invent new bitmask values well above that for literal parsing
+            # TODO: new pattern class to execute matches with these flags
+            'd': 1024,  # Generate indices for substring matches
+            'g': 2048,  # Global search
+            'i': re.I,  # Case-insensitive search
+            'm': re.M,  # Multi-line search
+            's': re.S,  # Allows . to match newline characters
+            'u': re.U,  # Treat a pattern as a sequence of unicode code points
+            'y': 4096,  # Perform a "sticky" search that matches starting at the current position in the target string
+        }
+
+        def __init__(self, pattern_txt, flags=''):
+            if isinstance(flags, compat_str):
+                flags, _ = self.regex_flags(flags)
+            # Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern
+            # First, avoid https://github.com/python/cpython/issues/74534
+            self.__self = re.compile(pattern_txt.replace('[[', r'[\['), flags)
+            for name in dir(self.__self):
+                # Only these? Obviously __class__, __init__.
+                # PyPy creates a __weakref__ attribute with value None
+                # that can't be setattr'd but also can't need to be copied.
+                if name in ('__class__', '__init__', '__weakref__'):
+                    continue
+                setattr(self, name, getattr(self.__self, name))
+
+        @classmethod
+        def regex_flags(cls, expr):
+            flags = 0
+            if not expr:
+                return flags, expr
+            for idx, ch in enumerate(expr):
+                if ch not in cls._RE_FLAGS:
+                    break
+                flags |= cls._RE_FLAGS[ch]
+            return flags, expr[idx + 1:]
+
     @classmethod
     def __op_chars(cls):
-        op_chars = set(';,')
+        op_chars = set(';,[')
         for op in cls._all_operators():
             for c in op[0]:
                 op_chars.add(c)
@@ -231,17 +257,6 @@ class JSInterpreter(object):
         namespace[name] = obj
         return name
 
-    @classmethod
-    def _regex_flags(cls, expr):
-        flags = 0
-        if not expr:
-            return flags, expr
-        for idx, ch in enumerate(expr):
-            if ch not in cls._RE_FLAGS:
-                break
-            flags |= cls._RE_FLAGS[ch]
-        return flags, expr[idx + 1:]
-
     @classmethod
     def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
         if not expr:
@@ -328,7 +343,7 @@ class JSInterpreter(object):
         try:
             return opfunc(left_val, right_val)
         except Exception as e:
-            raise self.Exception('Failed to evaluate {left_val!r} {op} {right_val!r}'.format(**locals()), expr, cause=e)
+            raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
 
     def _index(self, obj, idx, allow_undefined=False):
         if idx == 'length':
@@ -338,7 +353,7 @@ class JSInterpreter(object):
         except Exception as e:
             if allow_undefined:
                 return JS_Undefined
-            raise self.Exception('Cannot get index {idx}'.format(**locals()), expr=repr(obj), cause=e)
+            raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e)
 
     def _dump(self, obj, namespace):
         try:
@@ -352,6 +367,7 @@ class JSInterpreter(object):
         allow_recursion -= 1
 
         should_return = False
+        # fails on (eg) if (...) stmt1; else stmt2;
         sub_statements = list(self._separate(stmt, ';')) or ['']
         expr = stmt = sub_statements.pop().strip()
         for sub_stmt in sub_statements:
@@ -371,25 +387,30 @@ class JSInterpreter(object):
         if expr[0] in _QUOTES:
             inner, outer = self._separate(expr, expr[0], 1)
             if expr[0] == '/':
-                flags, outer = self._regex_flags(outer)
-                inner = re.compile(inner[1:], flags=flags)  # , strict=True))
+                flags, outer = self.JS_RegExp.regex_flags(outer)
+                inner = self.JS_RegExp(inner[1:], flags=flags)
             else:
                 inner = json.loads(js_to_json(inner + expr[0]))  # , strict=True))
             if not outer:
                 return inner, should_return
             expr = self._named_object(local_vars, inner) + outer
 
-        if expr.startswith('new '):
-            obj = expr[4:]
-            if obj.startswith('Date('):
-                left, right = self._separate_at_paren(obj[4:])
-                expr = unified_timestamp(
-                    self.interpret_expression(left, local_vars, allow_recursion), False)
+        new_kw, _, obj = expr.partition('new ')
+        if not new_kw:
+            for klass, konstr in (('Date', lambda x: int(unified_timestamp(x, False) * 1000)),
+                                  ('RegExp', self.JS_RegExp),
+                                  ('Error', self.Exception)):
+                if not obj.startswith(klass + '('):
+                    continue
+                left, right = self._separate_at_paren(obj[len(klass):])
+                argvals = self.interpret_iter(left, local_vars, allow_recursion)
+                expr = konstr(*argvals)
                 if not expr:
-                    raise self.Exception('Failed to parse date {left!r}'.format(**locals()), expr=expr)
-                expr = self._dump(int(expr * 1000), local_vars) + right
+                    raise self.Exception('Failed to parse {klass} {left!r:.100}'.format(**locals()), expr=expr)
+                expr = self._dump(expr, local_vars) + right
+                break
             else:
-                raise self.Exception('Unsupported object {obj}'.format(**locals()), expr=expr)
+                raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr)
 
         if expr.startswith('void '):
             left = self.interpret_expression(expr[5:], local_vars, allow_recursion)
@@ -430,24 +451,45 @@ class JSInterpreter(object):
                 (?P<try>try)\s*\{|
                 (?P<if>if)\s*\(|
                 (?P<switch>switch)\s*\(|
-                (?P<for>for)\s*\(
+                (?P<for>for)\s*\(|
+                (?P<while>while)\s*\(
                 ''', expr)
         md = m.groupdict() if m else {}
         if md.get('if'):
             cndn, expr = self._separate_at_paren(expr[m.end() - 1:])
-            if_expr, expr = self._separate_at_paren(expr.lstrip())
-            # TODO: "else if" is not handled
+            if expr.startswith('{'):
+                if_expr, expr = self._separate_at_paren(expr)
+            else:
+                # may lose ... else ... because of ll.368-374
+                if_expr, expr = self._separate_at_paren(expr, delim=';')
             else_expr = None
-            m = re.match(r'else\s*{', expr)
+            m = re.match(r'else\s*(?P<block>\{)?', expr)
             if m:
-                else_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+                if m.group('block'):
+                    else_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+                else:
+                    # handle subset ... else if (...) {...} else ...
+                    # TODO: make interpret_statement do this properly, if possible
+                    exprs = list(self._separate(expr[m.end():], delim='}', max_split=2))
+                    if len(exprs) > 1:
+                        if re.match(r'\s*if\s*\(', exprs[0]) and re.match(r'\s*else\b', exprs[1]):
+                            else_expr = exprs[0] + '}' + exprs[1]
+                            expr = (exprs[2] + '}') if len(exprs) == 3 else None
+                        else:
+                            else_expr = exprs[0]
+                            exprs.append('')
+                            expr = '}'.join(exprs[1:])
+                    else:
+                        else_expr = exprs[0]
+                        expr = None
+                    else_expr = else_expr.lstrip() + '}'
             cndn = _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion))
             ret, should_abort = self.interpret_statement(
                 if_expr if cndn else else_expr, local_vars, allow_recursion)
             if should_abort:
                 return ret, True
 
-        if md.get('try'):
+        elif md.get('try'):
             try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
             err = None
             try:
@@ -484,8 +526,8 @@ class JSInterpreter(object):
             if err:
                 raise err
 
-        elif md.get('for'):
-            constructor, remaining = self._separate_at_paren(expr[m.end() - 1:])
+        elif md.get('for') or md.get('while'):
+            init_or_cond, remaining = self._separate_at_paren(expr[m.end() - 1:])
             if remaining.startswith('{'):
                 body, expr = self._separate_at_paren(remaining)
             else:
@@ -496,11 +538,12 @@ class JSInterpreter(object):
                     body = 'switch(%s){%s}' % (switch_val, body)
                 else:
                     body, expr = remaining, ''
-            start, cndn, increment = self._separate(constructor, ';')
-            self.interpret_expression(start, local_vars, allow_recursion)
-            while True:
-                if not _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
-                    break
+            if md.get('for'):
+                start, cndn, increment = self._separate(init_or_cond, ';')
+                self.interpret_expression(start, local_vars, allow_recursion)
+            else:
+                cndn, increment = init_or_cond, None
+            while _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
                 try:
                     ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion)
                     if should_abort:
@@ -509,7 +552,8 @@ class JSInterpreter(object):
                     break
                 except JS_Continue:
                     pass
-                self.interpret_expression(increment, local_vars, allow_recursion)
+                if increment:
+                    self.interpret_expression(increment, local_vars, allow_recursion)
 
         elif md.get('switch'):
             switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:])
@@ -764,6 +808,10 @@ class JSInterpreter(object):
                     if idx >= len(obj):
                         return None
                     return ord(obj[idx])
+                elif member == 'replace':
+                    assertion(isinstance(obj, compat_str), 'must be applied on a string')
+                    assertion(len(argvals) == 2, 'takes exactly two arguments')
+                    return re.sub(argvals[0], argvals[1], obj)
 
                 idx = int(member) if isinstance(obj, list) else member
                 return obj[idx](argvals, allow_recursion=allow_recursion)
@@ -795,6 +843,10 @@ class JSInterpreter(object):
             raise self.Exception('Cannot return from an expression', expr)
         return ret
 
+    def interpret_iter(self, list_txt, local_vars, allow_recursion):
+        for v in self._separate(list_txt):
+            yield self.interpret_expression(v, local_vars, allow_recursion)
+
     def extract_object(self, objname):
         _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
         obj = {}

From 37cbdfa0e7c9d00d450af32dc9cdaf93cbfc4576 Mon Sep 17 00:00:00 2001
From: Brian Marks <bm1549@users.noreply.github.com>
Date: Thu, 2 Feb 2023 11:58:21 -0500
Subject: [PATCH 003/156] [americastestkitchen] Add support for downloading
 entire series (#31493)

Also
* support new sites and URL patterns
* back-port from yt-dlp

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/americastestkitchen.py | 115 +++++++++++++++-----
 1 file changed, 88 insertions(+), 27 deletions(-)

diff --git a/youtube_dl/extractor/americastestkitchen.py b/youtube_dl/extractor/americastestkitchen.py
index be960c0f9..08d3604e9 100644
--- a/youtube_dl/extractor/americastestkitchen.py
+++ b/youtube_dl/extractor/americastestkitchen.py
@@ -15,7 +15,7 @@ from ..utils import (
 
 
 class AmericasTestKitchenIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
     _TESTS = [{
         'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
         'md5': 'b861c3e365ac38ad319cfd509c30577f',
@@ -23,15 +23,20 @@ class AmericasTestKitchenIE(InfoExtractor):
             'id': '5b400b9ee338f922cb06450c',
             'title': 'Japanese Suppers',
             'ext': 'mp4',
+            'display_id': 'weeknight-japanese-suppers',
             'description': 'md5:64e606bfee910627efc4b5f050de92b3',
-            'thumbnail': r're:^https?://',
-            'timestamp': 1523318400,
-            'upload_date': '20180410',
-            'release_date': '20180410',
+            'timestamp': 1523304000,
+            'upload_date': '20180409',
+            'release_date': '20180409',
             'series': "America's Test Kitchen",
+            'season': 'Season 18',
             'season_number': 18,
             'episode': 'Japanese Suppers',
             'episode_number': 15,
+            'duration': 1376,
+            'thumbnail': r're:^https?://',
+            'average_rating': 0,
+            'view_count': int,
         },
         'params': {
             'skip_download': True,
@@ -44,15 +49,20 @@ class AmericasTestKitchenIE(InfoExtractor):
             'id': '5fbe8c61bda2010001c6763b',
             'title': 'Simple Chicken Dinner',
             'ext': 'mp4',
+            'display_id': 'atktv_2103_simple-chicken-dinner_full-episode_web-mp4',
             'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
-            'thumbnail': r're:^https?://',
-            'timestamp': 1610755200,
-            'upload_date': '20210116',
-            'release_date': '20210116',
+            'timestamp': 1610737200,
+            'upload_date': '20210115',
+            'release_date': '20210115',
             'series': "America's Test Kitchen",
+            'season': 'Season 21',
             'season_number': 21,
             'episode': 'Simple Chicken Dinner',
             'episode_number': 3,
+            'duration': 1397,
+            'thumbnail': r're:^https?://',
+            'view_count': int,
+            'average_rating': 0,
         },
         'params': {
             'skip_download': True,
@@ -60,6 +70,12 @@ class AmericasTestKitchenIE(InfoExtractor):
     }, {
         'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
         'only_matching': True,
+    }, {
+        'url': 'https://www.americastestkitchen.com/cookscountry/episode/564-when-only-chocolate-will-do',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
+        'only_matching': True,
     }, {
         'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
         'only_matching': True,
@@ -94,7 +110,7 @@ class AmericasTestKitchenIE(InfoExtractor):
 
 
 class AmericasTestKitchenSeasonIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|(?P<cooks>cooks(?:country|illustrated)))\.com(?:(?:/(?P<show2>cooks(?:country|illustrated)))?(?:/?$|(?<!ated)(?<!ated\.com)/episodes/browse/season_(?P<season>\d+)))'
     _TESTS = [{
         # ATK Season
         'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
@@ -105,48 +121,93 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
         'playlist_count': 13,
     }, {
         # Cooks Country Season
-        'url': 'https://www.cookscountry.com/episodes/browse/season_12',
+        'url': 'https://www.americastestkitchen.com/cookscountry/episodes/browse/season_12',
         'info_dict': {
             'id': 'season_12',
             'title': 'Season 12',
         },
         'playlist_count': 13,
+    }, {
+        # America's Test Kitchen Series
+        'url': 'https://www.americastestkitchen.com/',
+        'info_dict': {
+            'id': 'americastestkitchen',
+            'title': 'America\'s Test Kitchen',
+        },
+        'playlist_count': 558,
+    }, {
+        # Cooks Country Series
+        'url': 'https://www.americastestkitchen.com/cookscountry',
+        'info_dict': {
+            'id': 'cookscountry',
+            'title': 'Cook\'s Country',
+        },
+        'playlist_count': 199,
+    }, {
+        'url': 'https://www.americastestkitchen.com/cookscountry/',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.cookscountry.com/episodes/browse/season_12',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.cookscountry.com',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.americastestkitchen.com/cooksillustrated/',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.cooksillustrated.com',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        show_name, season_number = re.match(self._VALID_URL, url).groups()
-        season_number = int(season_number)
+        match = re.match(self._VALID_URL, url).groupdict()
+        show = match.get('show2')
+        show_path = ('/' + show) if show else ''
+        show = show or match['show']
+        season_number = int_or_none(match.get('season'))
 
-        slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
+        slug, title = {
+            'americastestkitchen': ('atk', 'America\'s Test Kitchen'),
+            'cookscountry': ('cco', 'Cook\'s Country'),
+            'cooksillustrated': ('cio', 'Cook\'s Illustrated'),
+        }[show]
 
-        season = 'Season %d' % season_number
+        facet_filters = [
+            'search_document_klass:episode',
+            'search_show_slug:' + slug,
+        ]
+
+        if season_number:
+            playlist_id = 'season_%d' % season_number
+            playlist_title = 'Season %d' % season_number
+            facet_filters.append('search_season_list:' + playlist_title)
+        else:
+            playlist_id = show
+            playlist_title = title
 
         season_search = self._download_json(
             'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
-            season, headers={
-                'Origin': 'https://www.%s.com' % show_name,
+            playlist_id, headers={
+                'Origin': 'https://www.americastestkitchen.com',
                 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
                 'X-Algolia-Application-Id': 'Y1FNZXUI30',
             }, query={
-                'facetFilters': json.dumps([
-                    'search_season_list:' + season,
-                    'search_document_klass:episode',
-                    'search_show_slug:' + slug,
-                ]),
-                'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
+                'facetFilters': json.dumps(facet_filters),
+                'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
                 'attributesToHighlight': '',
                 'hitsPerPage': 1000,
             })
 
         def entries():
             for episode in (season_search.get('hits') or []):
-                search_url = episode.get('search_url')
+                search_url = episode.get('search_url')  # always formatted like '/episode/123-title-of-episode'
                 if not search_url:
                     continue
                 yield {
                     '_type': 'url',
-                    'url': 'https://www.%s.com%s' % (show_name, search_url),
-                    'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
+                    'url': 'https://www.americastestkitchen.com%s%s' % (show_path, search_url),
+                    'id': try_get(episode, lambda e: e['objectID'].rsplit('_', 1)[-1]),
                     'title': episode.get('title'),
                     'description': episode.get('description'),
                     'timestamp': unified_timestamp(episode.get('search_document_date')),
@@ -156,4 +217,4 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
                 }
 
         return self.playlist_result(
-            entries(), 'season_%d' % season_number, season)
+            entries(), playlist_id, playlist_title)

From 297fbff23b347612a5f6002b40adba9dfad85413 Mon Sep 17 00:00:00 2001
From: Rodrigo Dias <roycocup@users.noreply.github.com>
Date: Thu, 2 Feb 2023 17:10:09 +0000
Subject: [PATCH 004/156] [doc] Fixed typo appearing to promise an example
 (#31489)

Resolves #31425

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index cd888c731..6e07ddb1c 100644
--- a/README.md
+++ b/README.md
@@ -632,7 +632,7 @@ To use percent literals in an output template use `%%`. To output to stdout use
 
 The current default template is `%(title)s-%(id)s.%(ext)s`.
 
-In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
+In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title.
 
 #### Output template and Windows batch files
 

From 807e593a32a1ace8fa0be8129fc5071d86516c99 Mon Sep 17 00:00:00 2001
From: JChris246 <43832407+JChris246@users.noreply.github.com>
Date: Thu, 2 Feb 2023 13:12:36 -0400
Subject: [PATCH 005/156] [cammodels] fix and improve extractor (#31453)

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/cammodels.py | 34 +++++++++----------------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py
index 1eb81b75e..d2e860b24 100644
--- a/youtube_dl/extractor/cammodels.py
+++ b/youtube_dl/extractor/cammodels.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
     int_or_none,
     url_or_none,
 )
@@ -20,32 +19,11 @@ class CamModelsIE(InfoExtractor):
     def _real_extract(self, url):
         user_id = self._match_id(url)
 
-        webpage = self._download_webpage(
-            url, user_id, headers=self.geo_verification_headers())
-
-        manifest_root = self._html_search_regex(
-            r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
-
-        if not manifest_root:
-            ERRORS = (
-                ("I'm offline, but let's stay connected", 'This user is currently offline'),
-                ('in a private show', 'This user is in a private show'),
-                ('is currently performing LIVE', 'This model is currently performing live'),
-            )
-            for pattern, message in ERRORS:
-                if pattern in webpage:
-                    error = message
-                    expected = True
-                    break
-            else:
-                error = 'Unable to find manifest URL root'
-                expected = False
-            raise ExtractorError(error, expected=expected)
-
         manifest = self._download_json(
-            '%s%s.json' % (manifest_root, user_id), user_id)
+            'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id)
 
         formats = []
+        thumbnails = []
         for format_id, format_dict in manifest['formats'].items():
             if not isinstance(format_dict, dict):
                 continue
@@ -85,6 +63,13 @@ class CamModelsIE(InfoExtractor):
                         'preference': -1,
                     })
                 else:
+                    if format_id == 'jpeg':
+                        thumbnails.append({
+                            'url': f['url'],
+                            'width': f['width'],
+                            'height': f['height'],
+                            'format_id': f['format_id'],
+                        })
                     continue
                 formats.append(f)
         self._sort_formats(formats)
@@ -92,6 +77,7 @@ class CamModelsIE(InfoExtractor):
         return {
             'id': user_id,
             'title': self._live_title(user_id),
+            'thumbnails': thumbnails,
             'is_live': True,
             'formats': formats,
             'age_limit': 18

From e9611a2a3603ee201d0c1ba99e8bfd8ec1e697cd Mon Sep 17 00:00:00 2001
From: Leon Etienne <40911701+Leonetienne@users.noreply.github.com>
Date: Thu, 2 Feb 2023 18:13:39 +0100
Subject: [PATCH 006/156] [pr0gramm] implement InfoExtractor, Resolves #31433
 (#31434)

* [pr0gramm] implement infoextractor

* [pr0gramm] remove misplaced comment, uncapture regex-group

* [pr0gramm]: specify utf-8 coding

* [pr0gramm]: add trailing comma to lists for maintainability

* [pr0gramm]: ie only sets upload_date attribute

* [pr0gramm]: add video_id to title

* [pr0gramm]: more forgiving _valid_url regex

* [pr0gramm]: add uploader to title, if set

* Discriminate URL pattern

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |   4 ++
 youtube_dl/extractor/pr0gramm.py   | 105 +++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+)
 create mode 100644 youtube_dl/extractor/pr0gramm.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 947cbe8fd..cf0388ed2 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1667,3 +1667,7 @@ from .zingmp3 import (
 )
 from .zoom import ZoomIE
 from .zype import ZypeIE
+from .pr0gramm import (
+    Pr0grammIE,
+    Pr0grammStaticIE,
+)
diff --git a/youtube_dl/extractor/pr0gramm.py b/youtube_dl/extractor/pr0gramm.py
new file mode 100644
index 000000000..b68224fd5
--- /dev/null
+++ b/youtube_dl/extractor/pr0gramm.py
@@ -0,0 +1,105 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+import re
+from ..utils import (
+    merge_dicts,
+)
+
+
+class Pr0grammStaticIE(InfoExtractor):
+    # Possible urls:
+    # https://pr0gramm.com/static/5466437
+    _VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'https://pr0gramm.com/static/5466437',
+        'md5': '52fa540d70d3edc286846f8ca85938aa',
+        'info_dict': {
+            'id': '5466437',
+            'ext': 'mp4',
+            'title': 'pr0gramm-5466437 by g11st',
+            'uploader': 'g11st',
+            'upload_date': '20221221',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        # Fetch media sources
+        entries = self._parse_html5_media_entries(url, webpage, video_id)
+        media_info = entries[0]
+
+        # this raises if there are no formats
+        self._sort_formats(media_info.get('formats') or [])
+
+        # Fetch author
+        uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader')
+
+        # Fetch approx upload timestamp from filename
+        # Have None-defaults in case the extraction fails
+        uploadDay = None
+        uploadMon = None
+        uploadYear = None
+        uploadTimestr = None
+        # (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4)
+        m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage)
+
+        if (m):
+            # Up to a day of accuracy should suffice...
+            uploadDay = m.groupdict().get('day')
+            uploadMon = m.groupdict().get('mon')
+            uploadYear = m.groupdict().get('year')
+            uploadTimestr = uploadYear + uploadMon + uploadDay
+
+        return merge_dicts({
+            'id': video_id,
+            'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''),
+            'uploader': uploader,
+            'upload_date': uploadTimestr
+        }, media_info)
+
+
+# This extractor is for the primary url (used for sharing, and appears in the
+# location bar) Since this page loads the DOM via JS, yt-dl can't find any
+# video information here. So let's redirect to a compatibility version of
+# the site, which does contain the <video>-element  by itself,  without requiring
+# js to be ran.
+class Pr0grammIE(InfoExtractor):
+    # Possible urls:
+    # https://pr0gramm.com/new/546637
+    # https://pr0gramm.com/new/video/546637
+    # https://pr0gramm.com/top/546637
+    # https://pr0gramm.com/top/video/546637
+    # https://pr0gramm.com/user/g11st/uploads/5466437
+    # https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290
+    # https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030
+    # https://pr0gramm.com/user/froschler/1elf/5232030
+    # https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id!
+    # https://pr0gramm.com/top/fruher war alles damals/5498175
+
+    _VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)'
+    _TEST = {
+        'url': 'https://pr0gramm.com/new/video/5466437',
+        'info_dict': {
+            'id': '5466437',
+            'ext': 'mp4',
+            'title': 'pr0gramm-5466437 by g11st',
+            'uploader': 'g11st',
+            'upload_date': '20221221',
+        }
+    }
+
+    def _generic_title():
+        return "oof"
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        return self.url_result(
+            'https://pr0gramm.com/static/' + video_id,
+            video_id=video_id,
+            ie=Pr0grammStaticIE.ie_key())

From 98b0cf1cd05c493eae0f37aaa599d25d2848c0b0 Mon Sep 17 00:00:00 2001
From: Ruowang Sun <91006887+JohnnySunUmich@users.noreply.github.com>
Date: Thu, 2 Feb 2023 12:21:05 -0500
Subject: [PATCH 007/156] [Callin] Add new extractor (#31414)

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/callin.py     | 74 ++++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |  1 +
 2 files changed, 75 insertions(+)
 create mode 100644 youtube_dl/extractor/callin.py

diff --git a/youtube_dl/extractor/callin.py b/youtube_dl/extractor/callin.py
new file mode 100644
index 000000000..341be479f
--- /dev/null
+++ b/youtube_dl/extractor/callin.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    traverse_obj,
+    try_get,
+)
+
+
+class CallinIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?:[^/#?-]+-)*(?P<id>[^/#?-]+)'
+    _TESTS = [{
+        'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
+        'md5': '14ede27ee2c957b7e4db93140fc0745c',
+        'info_dict': {
+            'id': 'PrumRdSQJW',
+            'ext': 'mp4',
+            'title': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
+            'description': 'Or, why the government doesn’t like SpaceX',
+            'channel': 'The Pull Request',
+            'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
+        }
+    }, {
+        'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
+        'md5': '16f704ddbf82a27e3930533b12062f07',
+        'info_dict': {
+            'id': 'lzxMidUnjA',
+            'ext': 'mp4',
+            'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
+            'description': 'Let’s talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
+            'channel': 'The DEBRIEF With Briahna Joy Gray',
+            'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
+        }
+    }]
+
+    def _search_nextjs_data(self, webpage, video_id, transform_source=None, fatal=True, **kw):
+        return self._parse_json(
+            self._search_regex(
+                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
+                webpage, 'next.js data', fatal=fatal, **kw),
+            video_id, transform_source=transform_source, fatal=fatal)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        next_data = self._search_nextjs_data(webpage, video_id)
+        episode = traverse_obj(next_data, ('props', 'pageProps', 'episode'), expected_type=dict)
+        if not episode:
+            raise ExtractorError('Failed to find episode data')
+
+        title = episode.get('title') or self._og_search_title(webpage)
+        description = episode.get('description') or self._og_search_description(webpage)
+
+        formats = []
+        formats.extend(self._extract_m3u8_formats(
+            episode.get('m3u8'), video_id, 'mp4',
+            entry_protocol='m3u8_native', fatal=False))
+        self._sort_formats(formats)
+
+        channel = try_get(episode, lambda x: x['show']['title'], compat_str)
+        channel_url = try_get(episode, lambda x: x['show']['linkObj']['resourceUrl'], compat_str)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'formats': formats,
+            'channel': channel,
+            'channel_url': channel_url,
+        }
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index cf0388ed2..f7bb4042f 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -158,6 +158,7 @@ from .businessinsider import BusinessInsiderIE
 from .buzzfeed import BuzzFeedIE
 from .byutv import BYUtvIE
 from .c56 import C56IE
+from .callin import CallinIE
 from .camdemy import (
     CamdemyIE,
     CamdemyFolderIE

From 6d829d811932b24be4d3cc8b6c1e0d46c2b1566c Mon Sep 17 00:00:00 2001
From: zhangeric-15 <71106422+zhangeric-15@users.noreply.github.com>
Date: Thu, 2 Feb 2023 12:26:31 -0500
Subject: [PATCH 008/156] [YouTube] Fix not finding videos listed under a
 channel's "shorts" subpage.  (#31409)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resolves #31336

Co-authored-by: Jouni Järvinen <rautamiekka@users.noreply.github.com>
Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/youtube.py | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 3d12e2e4a..28fdb086a 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -315,7 +315,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         title = try_get(
             renderer,
             (lambda x: x['title']['runs'][0]['text'],
-             lambda x: x['title']['simpleText']), compat_str)
+             lambda x: x['title']['simpleText'],
+             lambda x: x['headline']['simpleText']), compat_str)
         description = try_get(
             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
             compat_str)
@@ -2207,6 +2208,24 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
     IE_NAME = 'youtube:tab'
 
     _TESTS = [{
+        # Shorts
+        'url': 'https://www.youtube.com/@SuperCooperShorts/shorts',
+        'playlist_mincount': 5,
+        'info_dict': {
+            'description': 'Short clips from Super Cooper Sundays!',
+            'id': 'UCKMA8kHZ8bPYpnMNaUSxfEQ',
+            'title': 'Super Cooper Shorts - Shorts',
+        }
+    }, {
+        # Channel that does not have a Shorts tab. Test should just download videos on Home tab instead
+        'url': 'https://www.youtube.com/@emergencyawesome/shorts',
+        'info_dict': {
+            'description': 'md5:592c080c06fef4de3c902c4a8eecd850',
+            'id': 'UCDiFRMQWpcp8_KD4vwIVicw',
+            'title': 'Emergency Awesome - Home',
+        },
+        'playlist_mincount': 5,
+    }, {
         # playlists, multipage
         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
         'playlist_mincount': 94,
@@ -2680,7 +2699,11 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 
     def _rich_grid_entries(self, contents):
         for content in contents:
-            video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
+            video_renderer = try_get(
+                content,
+                (lambda x: x['richItemRenderer']['content']['videoRenderer'],
+                 lambda x: x['richItemRenderer']['content']['reelItemRenderer']),
+                dict)
             if video_renderer:
                 entry = self._video_entry(video_renderer)
                 if entry:

From be3392a0d491af81f353b4372d47d589fda54b0c Mon Sep 17 00:00:00 2001
From: Epsilonator <28658223+clueless-skywatcher@users.noreply.github.com>
Date: Thu, 2 Feb 2023 23:03:09 +0530
Subject: [PATCH 009/156] [Blerp] Add new extractor (#31398)

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/blerp.py      | 173 +++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |   1 +
 2 files changed, 174 insertions(+)
 create mode 100644 youtube_dl/extractor/blerp.py

diff --git a/youtube_dl/extractor/blerp.py b/youtube_dl/extractor/blerp.py
new file mode 100644
index 000000000..355daef6e
--- /dev/null
+++ b/youtube_dl/extractor/blerp.py
@@ -0,0 +1,173 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from ..utils import (
+    strip_or_none,
+    traverse_obj,
+)
+from .common import InfoExtractor
+
+
+class BlerpIE(InfoExtractor):
+    IE_NAME = 'blerp'
+    _VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
+    _TESTS = [{
+        'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
+        'info_dict': {
+            'id': '6320fe8745636cb4dd677a5a',
+            'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
+            'uploader': 'luminousaj',
+            'uploader_id': '5fb81e51aa66ae000c395478',
+            'ext': 'mp3',
+            'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
+        }
+    }, {
+        'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
+        'info_dict': {
+            'id': '5bc94ef4796001000498429f',
+            'title': 'Yee',
+            'uploader': '179617322678353920',
+            'uploader_id': '5ba99cf71386730004552c42',
+            'ext': 'mp3',
+            'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
+        }
+    }]
+
+    _GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
+    _GRAPHQL_QUERY = (
+        '''query webBitePageGetBite($_id: MongoID!) {
+            web {
+                biteById(_id: $_id) {
+                    ...bitePageFrag
+                    __typename
+                }
+                __typename
+            }
+        }
+
+        fragment bitePageFrag on Bite {
+            _id
+            title
+            userKeywords
+            keywords
+            color
+            visibility
+            isPremium
+            owned
+            price
+            extraReview
+            isAudioExists
+            image {
+                filename
+                original {
+                    url
+                    __typename
+                }
+                __typename
+            }
+            userReactions {
+                _id
+                reactions
+                createdAt
+                __typename
+            }
+            topReactions
+            totalSaveCount
+            saved
+            blerpLibraryType
+            license
+            licenseMetaData
+            playCount
+            totalShareCount
+            totalFavoriteCount
+            totalAddedToBoardCount
+            userCategory
+            userAudioQuality
+            audioCreationState
+            transcription
+            userTranscription
+            description
+            createdAt
+            updatedAt
+            author
+            listingType
+            ownerObject {
+                _id
+                username
+                profileImage {
+                    filename
+                    original {
+                        url
+                        __typename
+                    }
+                    __typename
+                }
+                __typename
+            }
+            transcription
+            favorited
+            visibility
+            isCurated
+            sourceUrl
+            audienceRating
+            strictAudienceRating
+            ownerId
+            reportObject {
+                reportedContentStatus
+                __typename
+            }
+            giphy {
+                mp4
+                gif
+                __typename
+            }
+            audio {
+                filename
+                original {
+                    url
+                    __typename
+                }
+                mp3 {
+                    url
+                    __typename
+                }
+                __typename
+            }
+            __typename
+        }
+
+        ''')
+
+    def _real_extract(self, url):
+        audio_id = self._match_id(url)
+
+        data = {
+            'operationName': self._GRAPHQL_OPERATIONNAME,
+            'query': self._GRAPHQL_QUERY,
+            'variables': {
+                '_id': audio_id
+            }
+        }
+
+        headers = {
+            'Content-Type': 'application/json'
+        }
+
+        json_result = self._download_json('https://api.blerp.com/graphql',
+                                          audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
+
+        bite_json = json_result['data']['web']['biteById']
+
+        info_dict = {
+            'id': bite_json['_id'],
+            'url': bite_json['audio']['mp3']['url'],
+            'title': bite_json['title'],
+            'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
+            'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
+            'ext': 'mp3',
+            'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
+        }
+
+        return info_dict
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index f7bb4042f..b8db4c818 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -138,6 +138,7 @@ from .bleacherreport import (
     BleacherReportIE,
     BleacherReportCMSIE,
 )
+from .blerp import BlerpIE
 from .bloomberg import BloombergIE
 from .bokecc import BokeCCIE
 from .bongacams import BongaCamsIE

From bc6f94e459694f541a2a1078fad59b02f2fc9d4c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 2 Feb 2023 23:19:03 +0000
Subject: [PATCH 010/156] [FIFA] Back-port extractor from yt-dlp (#31385)

---
 youtube_dl/extractor/extractors.py |   1 +
 youtube_dl/extractor/fifa.py       | 101 +++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+)
 create mode 100644 youtube_dl/extractor/fifa.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index b8db4c818..31a3e588e 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -376,6 +376,7 @@ from .fc2 import (
     FC2EmbedIE,
 )
 from .fczenit import FczenitIE
+from .fifa import FifaIE
 from .filmon import (
     FilmOnIE,
     FilmOnChannelIE,
diff --git a/youtube_dl/extractor/fifa.py b/youtube_dl/extractor/fifa.py
new file mode 100644
index 000000000..15157774e
--- /dev/null
+++ b/youtube_dl/extractor/fifa.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+    int_or_none,
+    traverse_obj,
+    unified_timestamp,
+)
+
+if not callable(getattr(InfoExtractor, '_match_valid_url', None)):
+
+    BaseInfoExtractor = InfoExtractor
+
+    import re
+
+    class InfoExtractor(BaseInfoExtractor):
+
+        @classmethod
+        def _match_valid_url(cls, url):
+            return re.match(cls._VALID_URL, url)
+
+
+class FifaIE(InfoExtractor):
+    _VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
+    _TESTS = [{
+        'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y',
+        'info_dict': {
+            'id': '7on10qPcnyLajDDU3ntg6y',
+            'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay',
+            'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b',
+            'ext': 'mp4',
+            'categories': ['FIFA Tournaments'],
+            'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero',
+            'duration': 8165,
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV',
+        'info_dict': {
+            'id': '1cg5r5Qt6Qt12ilkDgb1sV',
+            'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights',
+            'description': 'md5:d908c74ee66322b804ae2e521b02a855',
+            'ext': 'mp4',
+            'categories': ['FIFA Tournaments', 'Highlights'],
+            'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB',
+            'duration': 902,
+            'release_timestamp': 1404777600,
+            'release_date': '20140708',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp',
+        'info_dict': {
+            'id': '3C6gQH9C2DLwzNx7BMRQdp',
+            'title': 'Josimar goal against Northern Ireland | Classic Goals',
+            'description': 'md5:cbe7e7bb52f603c9f1fe9a4780fe983b',
+            'ext': 'mp4',
+            'categories': ['FIFA Tournaments', 'Goal'],
+            'duration': 28,
+            'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
+
+    def _real_extract(self, url):
+        video_id, locale = self._match_valid_url(url).group('id', 'locale')
+        webpage = self._download_webpage(url, video_id)
+
+        preconnect_link = self._search_regex(
+            r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link')
+
+        video_details = self._download_json(
+            '{preconnect_link}/sections/videoDetails/{video_id}'.format(**locals()), video_id, 'Downloading Video Details', fatal=False)
+
+        preplay_parameters = self._download_json(
+            '{preconnect_link}/videoPlayerData/{video_id}'.format(**locals()), video_id, 'Downloading Preplay Parameters')['preplayParameters']
+
+        content_data = self._download_json(
+            # 1. query string is expected to be sent as-is
+            # 2. `sig` must be appended
+            # 3. if absent, the call appears to work but the manifest is bad (404)
+            'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters),
+            video_id, 'Downloading Content Data')
+
+        # formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)
+        formats, subtitles = self._extract_m3u8_formats(content_data['playURL'], video_id, ext='mp4', entry_protocol='m3u8_native'), None
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_details['title'],
+            'description': video_details.get('description'),
+            'duration': int_or_none(video_details.get('duration')),
+            'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
+            'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
+            'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
+            'formats': formats,
+            'subtitles': subtitles,
+        }

From f316f5d4e391ca40273bce65c67bedc16ae99172 Mon Sep 17 00:00:00 2001
From: afterdelight <39585663+afterdelight@users.noreply.github.com>
Date: Fri, 3 Feb 2023 06:20:14 +0700
Subject: [PATCH 011/156] [xhamster] add support for new domain xhvid.com
 (#31370)

---
 youtube_dl/extractor/xhamster.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index f764021ba..e17947fc6 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -24,7 +24,7 @@ from ..utils import (
 
 
 class XHamsterIE(InfoExtractor):
-    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)'
+    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com|xhvid\.com)'
     _VALID_URL = r'''(?x)
                     https?://
                         (?:.+?\.)?%s/
@@ -123,6 +123,9 @@ class XHamsterIE(InfoExtractor):
     }, {
         'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
         'only_matching': True,
+    }, {
+        'url': 'https://xhvid.com/videos/lk-mm-xhc6wn6',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -433,6 +436,9 @@ class XHamsterUserIE(InfoExtractor):
     }, {
         'url': 'https://xhday.com/users/mobhunter',
         'only_matching': True,
+    }, {
+        'url': 'https://xhvid.com/users/pelushe21',
+        'only_matching': True,
     }]
 
     def _entries(self, user_id):

From 9d17948b5a1cc48bd526b1163292415577131c31 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 2 Feb 2023 23:25:44 +0000
Subject: [PATCH 012/156] [myvideoge] Add new extractor (#31360)

NB download tests on CI servers blocked

Co-authored-by: Alfonso Solbes <fonk666@gmail.com>
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/myvideoge.py  | 87 ++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)
 create mode 100644 youtube_dl/extractor/myvideoge.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 31a3e588e..96b27b179 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -728,6 +728,7 @@ from .myvi import (
     MyviIE,
     MyviEmbedIE,
 )
+from .myvideoge import MyVideoGeIE
 from .myvidster import MyVidsterIE
 from .nationalgeographic import (
     NationalGeographicVideoIE,
diff --git a/youtube_dl/extractor/myvideoge.py b/youtube_dl/extractor/myvideoge.py
new file mode 100644
index 000000000..efbfda7a6
--- /dev/null
+++ b/youtube_dl/extractor/myvideoge.py
@@ -0,0 +1,87 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    get_element_by_id,
+    get_element_by_class,
+    int_or_none,
+    js_to_json,
+    MONTH_NAMES,
+    qualities,
+    unified_strdate,
+)
+
+
+class MyVideoGeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?myvideo\.ge/v/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'https://www.myvideo.ge/v/3941048',
+        'md5': '8c192a7d2b15454ba4f29dc9c9a52ea9',
+        'info_dict': {
+            'id': '3941048',
+            'ext': 'mp4',
+            'title': 'The best prikol',
+            'upload_date': '20200611',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'uploader': 'chixa33',
+            'description': 'md5:5b067801318e33c2e6eea4ab90b1fdd3',
+        },
+        # working from local dev system
+        'skip': 'site blocks CI servers',
+    }
+    _MONTH_NAMES_KA = ['იანვარი', 'თებერვალი', 'მარტი', 'აპრილი', 'მაისი', 'ივნისი', 'ივლისი', 'აგვისტო', 'სექტემბერი', 'ოქტომბერი', 'ნოემბერი', 'დეკემბერი']
+
+    _quality = staticmethod(qualities(('SD', 'HD')))
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = (
+            self._og_search_title(webpage, default=None)
+            or clean_html(get_element_by_class('my_video_title', webpage))
+            or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title\b', webpage, 'title'))
+
+        jwplayer_sources = self._parse_json(
+            self._search_regex(
+                r'''(?s)jwplayer\s*\(\s*['"]mvplayer['"]\s*\)\s*\.\s*setup\s*\(.*?\bsources\s*:\s*(\[.*?])\s*[,});]''', webpage, 'jwplayer sources', fatal=False)
+            or '',
+            video_id, transform_source=js_to_json, fatal=False)
+
+        formats = self._parse_jwplayer_formats(jwplayer_sources or [], video_id)
+        for f in formats or []:
+            f['preference'] = self._quality(f['format_id'])
+        self._sort_formats(formats)
+
+        description = (
+            self._og_search_description(webpage)
+            or get_element_by_id('long_desc_holder', webpage)
+            or self._html_search_meta('description', webpage))
+
+        uploader = self._search_regex(r'<a[^>]+class="mv_user_name"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
+
+        upload_date = get_element_by_class('mv_vid_upl_date', webpage)
+        # as ka locale may not be present roll a local date conversion
+        upload_date = (unified_strdate(
+            # translate any ka month to an en one
+            re.sub('|'.join(self._MONTH_NAMES_KA),
+                   lambda m: MONTH_NAMES['en'][self._MONTH_NAMES_KA.index(m.group(0))],
+                   upload_date, re.I))
+            if upload_date else None)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'uploader': uploader,
+            'formats': formats,
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'upload_date': upload_date,
+            'view_count': int_or_none(get_element_by_class('mv_vid_views', webpage)),
+            'like_count': int_or_none(get_element_by_id('likes_count', webpage)),
+            'dislike_count': int_or_none(get_element_by_id('dislikes_count', webpage)),
+        }

From 384f632e8a9b61e864a26678d85b2b39933b9bae Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 3 Feb 2023 21:10:07 +0000
Subject: [PATCH 013/156] [ITV] Overhaul ITV extractor (#30266)

* support ITVX URLs (thanks Vangelis66)
* support legacy ITV Hub URLs
* include extraction fix 4c57dd2 from sleaux-meaux 3 May 2021
* include extraction fix 6fbcc16, fix by staubichsauger & pukkandan
* work-around duration parsing pending fix to utils.parse_duration
* apply default vanilla UA for pages and media to avoid site blocking
* also detect and report `Episode not found` instead of generic 404
* rework ITVBTCCIE with geo-block detection, best effort geo-restriction handling, news article support
* fix tests
---
 youtube_dl/extractor/itv.py | 382 ++++++++++++++++++++++++++++--------
 1 file changed, 299 insertions(+), 83 deletions(-)

diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py
index e86c40b42..7026139ea 100644
--- a/youtube_dl/extractor/itv.py
+++ b/youtube_dl/extractor/itv.py
@@ -3,123 +3,266 @@ from __future__ import unicode_literals
 
 import json
 import re
+import sys
 
 from .common import InfoExtractor
 from .brightcove import BrightcoveNewIE
+from ..compat import (
+    compat_HTTPError,
+    compat_integer_types,
+    compat_kwargs,
+    compat_urlparse,
+)
 from ..utils import (
     clean_html,
     determine_ext,
+    error_to_compat_str,
     extract_attributes,
-    get_element_by_class,
-    JSON_LD_RE,
+    ExtractorError,
+    get_element_by_attribute,
+    int_or_none,
     merge_dicts,
     parse_duration,
+    parse_iso8601,
+    remove_start,
     smuggle_url,
+    strip_or_none,
+    traverse_obj,
     url_or_none,
+    urljoin,
 )
 
 
-class ITVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
-    _GEO_COUNTRIES = ['GB']
+class ITVBaseIE(InfoExtractor):
+
+    def _search_nextjs_data(self, webpage, video_id, **kw):
+        transform_source = kw.pop('transform_source', None)
+        fatal = kw.pop('fatal', True)
+        return self._parse_json(
+            self._search_regex(
+                r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
+                webpage, 'next.js data', group='js', fatal=fatal, **kw),
+            video_id, transform_source=transform_source, fatal=fatal)
+
+    def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
+        if errnote is False:
+            return False
+        if errnote is None:
+            errnote = 'Unable to download webpage'
+
+        errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
+        if fatal:
+            raise ExtractorError(errmsg, sys.exc_info()[2], cause=err, video_id=video_id)
+        else:
+            self._downloader.report_warning(errmsg)
+            return False
+
+    @staticmethod
+    def _vanilla_ua_header():
+        return {'User-agent': 'Mozilla/5.0'}
+
+    def _download_webpage_handle(self, url, video_id, *args, **kwargs):
+        # specialised to (a) use vanilla UA (b) detect geo-block
+        params = self._downloader.params
+        nkwargs = {}
+        if (
+                'user_agent' not in params
+                and not any(re.match(r'(?i)user-agent\s*:', h)
+                            for h in (params.get('headers') or []))
+                and 'User-agent' not in (kwargs.get('headers') or {})):
+
+            kwargs.setdefault('headers', {})
+            kwargs['headers'] = self._vanilla_ua_header()
+            nkwargs = kwargs
+        if kwargs.get('expected_status') is not None:
+            exp = kwargs['expected_status']
+            if isinstance(exp, compat_integer_types):
+                exp = [exp]
+            if isinstance(exp, (list, tuple)) and 403 not in exp:
+                kwargs['expected_status'] = [403]
+                kwargs['expected_status'].extend(exp)
+                nkwargs = kwargs
+        else:
+            kwargs['expected_status'] = 403
+            nkwargs = kwargs
+
+        if nkwargs:
+            kwargs = compat_kwargs(kwargs)
+
+        ret = super(ITVBaseIE, self)._download_webpage_handle(url, video_id, *args, **kwargs)
+        if ret is False:
+            return ret
+        webpage, urlh = ret
+
+        if urlh.getcode() == 403:
+            # geo-block error is like this, with an unnecessary 'Of':
+            # '{\n  "Message" : "Request Originated Outside Of Allowed Geographic Region",\
+            # \n  "TransactionId" : "oas-magni-475082-xbYF0W"\n}'
+            if '"Request Originated Outside Of Allowed Geographic Region"' in webpage:
+                self.raise_geo_restricted(countries=['GB'])
+            ret = self.__handle_request_webpage_error(
+                compat_HTTPError(urlh.geturl(), 403, 'HTTP Error 403: Forbidden', urlh.headers, urlh),
+                fatal=kwargs.get('fatal'))
+
+        return ret
+
+
+class ITVIE(ITVBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
+    _IE_DESC = 'ITVX'
     _TESTS = [{
+        'note': 'Hub URLs redirect to ITVX',
         'url': 'https://www.itv.com/hub/liar/2a4547a0012',
-        'info_dict': {
-            'id': '2a4547a0012',
-            'ext': 'mp4',
-            'title': 'Liar - Series 2 - Episode 6',
-            'description': 'md5:d0f91536569dec79ea184f0a44cca089',
-            'series': 'Liar',
-            'season_number': 2,
-            'episode_number': 6,
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
+        'only_matching': True,
     }, {
-        # unavailable via data-playlist-url
+        'note': 'Hub page unavailable via data-playlist-url (404 now)',
         'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
         'only_matching': True,
     }, {
-        # InvalidVodcrid
+        'note': 'Hub page with InvalidVodcrid (404 now)',
         'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034',
         'only_matching': True,
     }, {
-        # ContentUnavailable
+        'note': 'Hub page with ContentUnavailable (404 now)',
         'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024',
         'only_matching': True,
-    }]
+    }, {
+        'note': 'ITVX, or itvX, show',
+        'url': 'https://www.itv.com/watch/vera/1a7314/1a7314a0014',
+        'md5': 'bd0ad666b2c058fffe7d036785880064',
+        'info_dict': {
+            'id': '1a7314a0014',
+            'ext': 'mp4',
+            'title': 'Vera - Series 3 - Episode 4 - Prodigal Son',
+            'description': 'Vera and her team investigate the fatal stabbing of an ex-Met police officer outside a busy Newcastle nightclub - but there aren\'t many clues.',
+            'timestamp': 1653591600,
+            'upload_date': '20220526',
+            'uploader': 'ITVX',
+            'thumbnail': r're:https://\w+\.itv\.com/images/(?:\w+/)+\d+x\d+\?',
+            'duration': 5340.8,
+            'age_limit': 16,
+            'series': 'Vera',
+            'series_number': 3,
+            'episode': 'Prodigal Son',
+            'episode_number': 4,
+            'channel': 'ITV3',
+            'categories': list,
+        },
+        'params': {
+            # m3u8 download
+            # 'skip_download': True,
+        },
+        'skip': 'only available in UK',
+    }, {
+        'note': 'Latest ITV news bulletin: details change daily',
+        'url': 'https://www.itv.com/watch/news/varies-but-is-not-checked/6js5d0f',
+        'info_dict': {
+            'id': '6js5d0f',
+            'ext': 'mp4',
+            'title': r're:The latest ITV News headlines - \S.+',
+            'description': r'''re:.* today's top stories from the ITV News team.$''',
+            'timestamp': int,
+            'upload_date': r're:2\d\d\d(?:0[1-9]|1[0-2])(?:[012][1-9]|3[01])',
+            'uploader': 'ITVX',
+            'thumbnail': r're:https://images\.ctfassets\.net/(?:\w+/)+[\w.]+\.(?:jpg|png)',
+            'duration': float,
+            'age_limit': None,
+        },
+        'params': {
+            # variable download
+            # 'skip_download': True,
+        },
+        'skip': 'only available in UK',
+    }
+    ]
+
+    def _og_extract(self, webpage, require_title=False):
+        return {
+            'title': self._og_search_title(webpage, fatal=require_title),
+            'description': self._og_search_description(webpage, default=None),
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
+            'uploader': self._og_search_property('site_name', webpage, default=None),
+        }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        params = extract_attributes(self._search_regex(
-            r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
 
-        ios_playlist_url = params.get('data-video-playlist') or params['data-video-id']
-        hmac = params['data-video-hmac']
+        webpage = self._download_webpage(url, video_id)
+
+        # now quite different params!
+        params = extract_attributes(self._search_regex(
+            r'''(<[^>]+\b(?:class|data-testid)\s*=\s*("|')genie-container\2[^>]*>)''',
+            webpage, 'params'))
+
+        ios_playlist_url = traverse_obj(
+            params, 'data-video-id', 'data-video-playlist',
+            get_all=False, expected_type=url_or_none)
+
         headers = self.geo_verification_headers()
         headers.update({
             'Accept': 'application/vnd.itv.vod.playlist.v2+json',
             'Content-Type': 'application/json',
-            'hmac': hmac.upper(),
         })
         ios_playlist = self._download_json(
             ios_playlist_url, video_id, data=json.dumps({
                 'user': {
-                    'itvUserId': '',
                     'entitlements': [],
-                    'token': ''
                 },
                 'device': {
-                    'manufacturer': 'Safari',
-                    'model': '5',
+                    'manufacturer': 'Mobile Safari',
+                    'model': '5.1',
                     'os': {
-                        'name': 'Windows NT',
-                        'version': '6.1',
-                        'type': 'desktop'
+                        'name': 'iOS',
+                        'version': '5.0',
+                        'type': ' mobile'
                     }
                 },
                 'client': {
                     'version': '4.1',
-                    'id': 'browser'
+                    'id': 'browser',
+                    'supportsAdPods': True,
+                    'service': 'itv.x',
+                    'appversion': '2.43.28',
                 },
                 'variantAvailability': {
+                    'player': 'hls',
                     'featureset': {
                         'min': ['hls', 'aes', 'outband-webvtt'],
                         'max': ['hls', 'aes', 'outband-webvtt']
                     },
-                    'platformTag': 'dotcom'
+                    'platformTag': 'mobile'
                 }
             }).encode(), headers=headers)
         video_data = ios_playlist['Playlist']['Video']
-        ios_base_url = video_data.get('Base')
+        ios_base_url = traverse_obj(video_data, 'Base', expected_type=url_or_none)
+
+        media_url = (
+            (lambda u: url_or_none(urljoin(ios_base_url, u)))
+            if ios_base_url else url_or_none)
 
         formats = []
-        for media_file in (video_data.get('MediaFiles') or []):
-            href = media_file.get('Href')
+        for media_file in traverse_obj(video_data, 'MediaFiles', expected_type=list) or []:
+            href = traverse_obj(media_file, 'Href', expected_type=media_url)
             if not href:
                 continue
-            if ios_base_url:
-                href = ios_base_url + href
             ext = determine_ext(href)
             if ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
-                    href, video_id, 'mp4', entry_protocol='m3u8_native',
+                    href, video_id, 'mp4', entry_protocol='m3u8',
                     m3u8_id='hls', fatal=False))
+
             else:
                 formats.append({
                     'url': href,
                 })
         self._sort_formats(formats)
+        for f in formats:
+            f.setdefault('http_headers', {})
+            f['http_headers'].update(self._vanilla_ua_header())
 
         subtitles = {}
-        subs = video_data.get('Subtitles') or []
-        for sub in subs:
-            if not isinstance(sub, dict):
-                continue
-            href = url_or_none(sub.get('Href'))
+        for sub in traverse_obj(video_data, 'Subtitles', expected_type=list) or []:
+            href = traverse_obj(sub, 'Href', expected_type=url_or_none)
             if not href:
                 continue
             subtitles.setdefault('en', []).append({
@@ -127,59 +270,132 @@ class ITVIE(InfoExtractor):
                 'ext': determine_ext(href, 'vtt'),
             })
 
-        info = self._search_json_ld(webpage, video_id, default={})
-        if not info:
-            json_ld = self._parse_json(self._search_regex(
-                JSON_LD_RE, webpage, 'JSON-LD', '{}',
-                group='json_ld'), video_id, fatal=False)
-            if json_ld and json_ld.get('@type') == 'BreadcrumbList':
-                for ile in (json_ld.get('itemListElement:') or []):
-                    item = ile.get('item:') or {}
-                    if item.get('@type') == 'TVEpisode':
-                        item['@context'] = 'http://schema.org'
-                        info = self._json_ld(item, video_id, fatal=False) or {}
-                        break
+        next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}')
+        video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
+        title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
+        info = self._og_extract(webpage, require_title=not title)
+        tn = info.pop('thumbnail', None)
+        if tn:
+            info['thumbnails'] = [{'url': tn}]
+
+        # num. episode title
+        num_ep_title = video_data.get('numberedEpisodeTitle')
+        if not num_ep_title:
+            num_ep_title = clean_html(get_element_by_attribute('data-testid', 'episode-hero-description-strong', webpage))
+            num_ep_title = num_ep_title and num_ep_title.rstrip(' -')
+        ep_title = strip_or_none(
+            video_data.get('episodeTitle')
+            or (num_ep_title.split('.', 1)[-1] if num_ep_title else None))
+        title = title or re.sub(r'\s+-\s+ITVX$', '', info['title'])
+        if ep_title and ep_title != title:
+            title = title + ' - ' + ep_title
+
+        def get_thumbnails():
+            tns = []
+            for w, x in (traverse_obj(video_data, ('imagePresets'), expected_type=dict) or {}).items():
+                if isinstance(x, dict):
+                    for y, z in x.items():
+                        tns.append({'id': w + '_' + y, 'url': z})
+            return tns or None
+
+        video_str = lambda *x: traverse_obj(
+            video_data, *x, get_all=False, expected_type=strip_or_none)
 
         return merge_dicts({
             'id': video_id,
-            'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
+            'title': title,
             'formats': formats,
             'subtitles': subtitles,
-            'duration': parse_duration(video_data.get('Duration')),
-            'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)),
+            # parsing hh:mm:ss:nnn not yet patched
+            'duration': parse_duration(re.sub(r'(\d{2})(:)(\d{3}$)', r'\1.\3', video_data.get('Duration') or '')),
+            'description': video_str('synopsis'),
+            'timestamp': traverse_obj(video_data, 'broadcastDateTime', 'dateTime', expected_type=parse_iso8601),
+            'thumbnails': get_thumbnails(),
+            'series': video_str('showTitle', 'programmeTitle'),
+            'series_number': int_or_none(video_data.get('seriesNumber')),
+            'episode': ep_title,
+            'episode_number': int_or_none((num_ep_title or '').split('.')[0]),
+            'channel': video_str('channel'),
+            'categories': traverse_obj(video_data, ('categories', 'formatted'), expected_type=list),
+            'age_limit': {False: 16, True: 0}.get(video_data.get('isChildrenCategory')),
         }, info)
 
 
-class ITVBTCCIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-    _TEST = {
-        'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
+class ITVBTCCIE(ITVBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
+    _IE_DESC = 'ITV articles: News, British Touring Car Championship'
+    _TESTS = [{
+        'note': 'British Touring Car Championship',
+        'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',
         'info_dict': {
             'id': 'btcc-2018-all-the-action-from-brands-hatch',
             'title': 'BTCC 2018: All the action from Brands Hatch',
         },
         'playlist_mincount': 9,
-    }
-    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
+    }, {
+        'note': 'redirects to /btcc/articles/...',
+        'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
+        'only_matching': True,
+    }, {
+        'note': 'news article',
+        'url': 'https://www.itv.com/news/wales/2020-07-23/sean-fletcher-shows-off-wales-coastline-in-new-itv-series-as-british-tourists-opt-for-staycations',
+        'info_dict': {
+            'id': 'sean-fletcher-shows-off-wales-coastline-in-new-itv-series-as-british-tourists-opt-for-staycations',
+            'title': '''Sean Fletcher on why Wales' coastline should be your 'staycation' destination | ITV News''',
+        },
+        'playlist_mincount': 1,
+    }]
+
+    # should really be a class var of the BC IE
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
+    BRIGHTCOVE_ACCOUNT = '1582188683001'
+    BRIGHTCOVE_PLAYER = 'HkiHLnNRx'
 
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, playlist_id)
+        webpage, urlh = self._download_webpage_handle(url, playlist_id)
+        link = compat_urlparse.urlparse(urlh.geturl()).path.strip('/')
 
-        entries = [
-            self.url_result(
-                smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
-                    # ITV does not like some GB IP ranges, so here are some
-                    # IP blocks it accepts
-                    'geo_ip_blocks': [
-                        '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
-                    ],
-                    'referrer': url,
-                }),
-                ie=BrightcoveNewIE.ie_key(), video_id=video_id)
-            for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)]
+        next_data = self._search_nextjs_data(webpage, playlist_id, fatal=False, default='{}')
+        path_prefix = compat_urlparse.urlparse(next_data.get('assetPrefix') or '').path.strip('/')
+        link = remove_start(link, path_prefix).strip('/')
+
+        content = traverse_obj(
+            next_data, ('props', 'pageProps', Ellipsis),
+            expected_type=lambda x: x if x['link'] == link else None,
+            get_all=False, default={})
+        content = traverse_obj(
+            content, ('body', 'content', Ellipsis, 'data'),
+            expected_type=lambda x: x if x.get('name') == 'Brightcove' or x.get('type') == 'Brightcove' else None)
+
+        contraband = {
+            # ITV does not like some GB IP ranges, so here are some
+            # IP blocks it accepts
+            'geo_ip_blocks': [
+                '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
+            ],
+            'referrer': urlh.geturl(),
+        }
+
+        def entries():
+
+            for data in content or []:
+                video_id = data.get('id')
+                if not video_id:
+                    continue
+                account = data.get('accountId') or self.BRIGHTCOVE_ACCOUNT
+                player = data.get('playerId') or self.BRIGHTCOVE_PLAYER
+                yield self.url_result(
+                    smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account, player, video_id), contraband),
+                    ie=BrightcoveNewIE.ie_key(), video_id=video_id)
+
+            # obsolete ?
+            for video_id in re.findall(r'''data-video-id=["'](\d+)''', webpage):
+                yield self.url_result(
+                    smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (self.BRIGHTCOVE_ACCOUNT, self.BRIGHTCOVE_PLAYER, video_id), contraband),
+                    ie=BrightcoveNewIE.ie_key(), video_id=video_id)
 
         title = self._og_search_title(webpage, fatal=False)
 
-        return self.playlist_result(entries, playlist_id, title)
+        return self.playlist_result(entries(), playlist_id, title)

From d947ffe8e385a541f44c6125b4cbc269de6055a4 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 4 Feb 2023 00:19:48 +0000
Subject: [PATCH 014/156] [IGN] Overhaul extractor to avoid URL redirection
 loop

Consequently/also:
* centralise video data extraction
* detect 404 and 503 expected errors
* handle the test video in IGNVideo
* handle two additional page formats for the tests in IGNArticle
---
 youtube_dl/extractor/ign.py | 347 ++++++++++++++++++++++++++----------
 1 file changed, 252 insertions(+), 95 deletions(-)

diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py
index 0d9f50ed2..c7daa30e5 100644
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@@ -1,19 +1,29 @@
+# coding: utf-8
+
 from __future__ import unicode_literals
 
 import re
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_filter as filter,
+    compat_HTTPError,
     compat_parse_qs,
-    compat_urllib_parse_urlparse,
+    compat_urlparse,
 )
 from ..utils import (
-    HEADRequest,
     determine_ext,
+    error_to_compat_str,
+    extract_attributes,
+    ExtractorError,
     int_or_none,
+    merge_dicts,
+    orderedSet,
     parse_iso8601,
     strip_or_none,
-    try_get,
+    traverse_obj,
+    url_or_none,
+    urljoin,
 )
 
 
@@ -22,14 +32,102 @@ class IGNBaseIE(InfoExtractor):
         return self._download_json(
             'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
 
+    def _checked_call_api(self, slug):
+        try:
+            return self._call_api(slug)
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+                e.cause.args = e.cause.args or [
+                    e.cause.geturl(), e.cause.getcode(), e.cause.reason]
+                raise ExtractorError(
+                    'Content not found: expired?', cause=e.cause,
+                    expected=True)
+            raise
+
+    def _extract_video_info(self, video, fatal=True):
+        video_id = video['videoId']
+
+        formats = []
+        refs = traverse_obj(video, 'refs', expected_type=dict) or {}
+
+        m3u8_url = url_or_none(refs.get('m3uUrl'))
+        if m3u8_url:
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, video_id, 'mp4', 'm3u8_native',
+                m3u8_id='hls', fatal=False))
+
+        f4m_url = url_or_none(refs.get('f4mUrl'))
+        if f4m_url:
+            formats.extend(self._extract_f4m_formats(
+                f4m_url, video_id, f4m_id='hds', fatal=False))
+
+        for asset in (video.get('assets') or []):
+            asset_url = url_or_none(asset.get('url'))
+            if not asset_url:
+                continue
+            formats.append({
+                'url': asset_url,
+                'tbr': int_or_none(asset.get('bitrate'), 1000),
+                'fps': int_or_none(asset.get('frame_rate')),
+                'height': int_or_none(asset.get('height')),
+                'width': int_or_none(asset.get('width')),
+            })
+
+        mezzanine_url = traverse_obj(
+            video, ('system', 'mezzanineUrl'), expected_type=url_or_none)
+        if mezzanine_url:
+            formats.append({
+                'ext': determine_ext(mezzanine_url, 'mp4'),
+                'format_id': 'mezzanine',
+                'preference': 1,
+                'url': mezzanine_url,
+            })
+
+        if formats or fatal:
+            self._sort_formats(formats)
+        else:
+            return
+
+        thumbnails = traverse_obj(
+            video, ('thumbnails', Ellipsis, {'url': 'url'}), expected_type=url_or_none)
+        tags = traverse_obj(
+            video, ('tags', Ellipsis, 'displayName'),
+            expected_type=lambda x: x.strip() or None)
+
+        metadata = traverse_obj(video, 'metadata', expected_type=dict) or {}
+        title = traverse_obj(
+            metadata, 'longTitle', 'title', 'name',
+            expected_type=lambda x: x.strip() or None)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': strip_or_none(metadata.get('description')),
+            'timestamp': parse_iso8601(metadata.get('publishDate')),
+            'duration': int_or_none(metadata.get('duration')),
+            'thumbnails': thumbnails,
+            'formats': formats,
+            'tags': tags,
+        }
+
+    # yt-dlp shim
+    @classmethod
+    def _extract_from_webpage(cls, url, webpage):
+        for embed_url in orderedSet(
+                cls._extract_embed_urls(url, webpage) or [], lazy=True):
+            yield cls.url_result(embed_url, None if cls._VALID_URL is False else cls)
+
 
 class IGNIE(IGNBaseIE):
     """
     Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
     Some videos of it.ign.com are also supported
     """
-
-    _VALID_URL = r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[^/?&#]+)'
+    _VIDEO_PATH_RE = r'/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>.+?)'
+    _PLAYLIST_PATH_RE = r'(?:/?\?(?P<filt>[^&#]+))?'
+    _VALID_URL = (
+        r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos(?:%s)'
+        % '|'.join((_VIDEO_PATH_RE + r'(?:[/?&#]|$)', _PLAYLIST_PATH_RE)))
     IE_NAME = 'ign.com'
     _PAGE_TYPE = 'video'
 
@@ -44,7 +142,10 @@ class IGNIE(IGNBaseIE):
             'timestamp': 1370440800,
             'upload_date': '20130605',
             'tags': 'count:9',
-        }
+        },
+        'params': {
+            'nocheckcertificate': True,
+        },
     }, {
         'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
         'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
@@ -56,86 +157,51 @@ class IGNIE(IGNBaseIE):
             'timestamp': 1420571160,
             'upload_date': '20150106',
             'tags': 'count:4',
-        }
+        },
+        'skip': '404 Not Found',
     }, {
         'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
         'only_matching': True,
     }]
 
+    @classmethod
+    def _extract_embed_urls(cls, url, webpage):
+        grids = re.findall(
+            r'''(?s)<section\b[^>]+\bclass\s*=\s*['"](?:[\w-]+\s+)*?content-feed-grid(?!\B|-)[^>]+>(.+?)</section[^>]*>''',
+            webpage)
+        return filter(None,
+                      (urljoin(url, m.group('path')) for m in re.finditer(
+                          r'''<a\b[^>]+\bhref\s*=\s*('|")(?P<path>/videos%s)\1'''
+                          % cls._VIDEO_PATH_RE, grids[0] if grids else '')))
+
     def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        display_id = m.group('id')
+        if display_id:
+            return self._extract_video(url, display_id)
+        display_id = m.group('filt') or 'all'
+        return self._extract_playlist(url, display_id)
+
+    def _extract_playlist(self, url, display_id):
+        webpage = self._download_webpage(url, display_id)
+
+        return self.playlist_result(
+            (self.url_result(u, ie=self.ie_key())
+             for u in self._extract_embed_urls(url, webpage)),
+            playlist_id=display_id)
+
+    def _extract_video(self, url, display_id):
         display_id = self._match_id(url)
-        video = self._call_api(display_id)
-        video_id = video['videoId']
-        metadata = video['metadata']
-        title = metadata.get('longTitle') or metadata.get('title') or metadata['name']
+        video = self._checked_call_api(display_id)
 
-        formats = []
-        refs = video.get('refs') or {}
+        info = self._extract_video_info(video)
 
-        m3u8_url = refs.get('m3uUrl')
-        if m3u8_url:
-            formats.extend(self._extract_m3u8_formats(
-                m3u8_url, video_id, 'mp4', 'm3u8_native',
-                m3u8_id='hls', fatal=False))
-
-        f4m_url = refs.get('f4mUrl')
-        if f4m_url:
-            formats.extend(self._extract_f4m_formats(
-                f4m_url, video_id, f4m_id='hds', fatal=False))
-
-        for asset in (video.get('assets') or []):
-            asset_url = asset.get('url')
-            if not asset_url:
-                continue
-            formats.append({
-                'url': asset_url,
-                'tbr': int_or_none(asset.get('bitrate'), 1000),
-                'fps': int_or_none(asset.get('frame_rate')),
-                'height': int_or_none(asset.get('height')),
-                'width': int_or_none(asset.get('width')),
-            })
-
-        mezzanine_url = try_get(video, lambda x: x['system']['mezzanineUrl'])
-        if mezzanine_url:
-            formats.append({
-                'ext': determine_ext(mezzanine_url, 'mp4'),
-                'format_id': 'mezzanine',
-                'preference': 1,
-                'url': mezzanine_url,
-            })
-
-        self._sort_formats(formats)
-
-        thumbnails = []
-        for thumbnail in (video.get('thumbnails') or []):
-            thumbnail_url = thumbnail.get('url')
-            if not thumbnail_url:
-                continue
-            thumbnails.append({
-                'url': thumbnail_url,
-            })
-
-        tags = []
-        for tag in (video.get('tags') or []):
-            display_name = tag.get('displayName')
-            if not display_name:
-                continue
-            tags.append(display_name)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': strip_or_none(metadata.get('description')),
-            'timestamp': parse_iso8601(metadata.get('publishDate')),
-            'duration': int_or_none(metadata.get('duration')),
+        return merge_dicts({
             'display_id': display_id,
-            'thumbnails': thumbnails,
-            'formats': formats,
-            'tags': tags,
-        }
+        }, info)
 
 
-class IGNVideoIE(InfoExtractor):
+class IGNVideoIE(IGNBaseIE):
     _VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
     _TESTS = [{
         'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
@@ -147,7 +213,8 @@ class IGNVideoIE(InfoExtractor):
             'description': 'Taking out assassination targets in Hitman has never been more stylish.',
             'timestamp': 1444665600,
             'upload_date': '20151012',
-        }
+        },
+        'expected_warnings': ['HTTP Error 400: Bad Request'],
     }, {
         'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
         'only_matching': True,
@@ -167,22 +234,38 @@ class IGNVideoIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        req = HEADRequest(url.rsplit('/', 1)[0] + '/embed')
-        url = self._request_webpage(req, video_id).geturl()
+        parsed_url = compat_urlparse.urlparse(url)
+        embed_url = compat_urlparse.urlunparse(
+            parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed'))
+
+        webpage, urlh = self._download_webpage_handle(embed_url, video_id)
+        new_url = urlh.geturl()
         ign_url = compat_parse_qs(
-            compat_urllib_parse_urlparse(url).query).get('url', [None])[0]
+            compat_urlparse.urlparse(new_url).query).get('url', [None])[-1]
         if ign_url:
             return self.url_result(ign_url, IGNIE.ie_key())
-        return self.url_result(url)
+        video = self._search_regex(r'(<div\b[^>]+\bdata-video-id\s*=\s*[^>]+>)', webpage, 'video element', fatal=False)
+        if not video:
+            if new_url == url:
+                raise ExtractorError('Redirect loop: ' + url)
+            return self.url_result(new_url)
+        video = extract_attributes(video)
+        video_data = video.get('data-settings') or '{}'
+        video_data = self._parse_json(video_data, video_id)['video']
+        info = self._extract_video_info(video_data)
+
+        return merge_dicts({
+            'display_id': video_id,
+        }, info)
 
 
 class IGNArticleIE(IGNBaseIE):
-    _VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?feature/\d+)/(?P<id>[^/?&#]+)'
+    _VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?(?:[\w-]+/)*?feature/\d+)/(?P<id>[^/?&#]+)'
     _PAGE_TYPE = 'article'
     _TESTS = [{
         'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
         'info_dict': {
-            'id': '524497489e4e8ff5848ece34',
+            'id': '72113',
             'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
         },
         'playlist': [
@@ -190,7 +273,7 @@ class IGNArticleIE(IGNBaseIE):
                 'info_dict': {
                     'id': '5ebbd138523268b93c9141af17bec937',
                     'ext': 'mp4',
-                    'title': 'GTA 5 Video Review',
+                    'title': 'Grand Theft Auto V Video Review',
                     'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
                     'timestamp': 1379339880,
                     'upload_date': '20130916',
@@ -200,7 +283,7 @@ class IGNArticleIE(IGNBaseIE):
                 'info_dict': {
                     'id': '638672ee848ae4ff108df2a296418ee2',
                     'ext': 'mp4',
-                    'title': '26 Twisted Moments from GTA 5 in Slow Motion',
+                    'title': 'GTA 5 In Slow Motion',
                     'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
                     'timestamp': 1386878820,
                     'upload_date': '20131212',
@@ -208,16 +291,17 @@ class IGNArticleIE(IGNBaseIE):
             },
         ],
         'params': {
-            'playlist_items': '2-3',
             'skip_download': True,
         },
+        'expected_warnings': ['Backend fetch failed'],
     }, {
         'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
         'info_dict': {
             'id': '53ee806780a81ec46e0790f8',
             'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
         },
-        'playlist_count': 2,
+        'playlist_count': 1,
+        'expected_warnings': ['Backend fetch failed'],
     }, {
         # videoId pattern
         'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
@@ -240,18 +324,91 @@ class IGNArticleIE(IGNBaseIE):
         'only_matching': True,
     }]
 
+    def _checked_call_api(self, slug):
+        try:
+            return self._call_api(slug)
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError):
+                e.cause.args = e.cause.args or [
+                    e.cause.geturl(), e.cause.getcode(), e.cause.reason]
+                if e.cause.code == 404:
+                    raise ExtractorError(
+                        'Content not found: expired?', cause=e.cause,
+                        expected=True)
+                elif e.cause.code == 503:
+                    self.report_warning(error_to_compat_str(e.cause))
+                    return
+            raise
+
+    def _search_nextjs_data(self, webpage, video_id, **kw):
+        return self._parse_json(
+            self._search_regex(
+                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
+                webpage, 'next.js data', **kw),
+            video_id, **kw)
+
     def _real_extract(self, url):
         display_id = self._match_id(url)
-        article = self._call_api(display_id)
+        article = self._checked_call_api(display_id)
 
-        def entries():
-            media_url = try_get(article, lambda x: x['mediaRelations'][0]['media']['metadata']['url'])
-            if media_url:
-                yield self.url_result(media_url, IGNIE.ie_key())
-            for content in (article.get('content') or []):
-                for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
-                    yield self.url_result(video_url)
+        if article:
+            # obsolete ?
+            def entries():
+                media_url = traverse_obj(
+                    article, ('mediaRelations', 0, 'media', 'metadata', 'url'),
+                    expected_type=url_or_none)
+                if media_url:
+                    yield self.url_result(media_url, IGNIE.ie_key())
+                for content in (article.get('content') or []):
+                    for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
+                        if url_or_none(video_url):
+                            yield self.url_result(video_url)
+
+            return self.playlist_result(
+                entries(), article.get('articleId'),
+                traverse_obj(
+                    article, ('metadata', 'headline'),
+                    expected_type=lambda x: x.strip() or None))
+
+        webpage = self._download_webpage(url, display_id)
+
+        playlist_id = self._html_search_meta('dable:item_id', webpage, default=None)
+        if playlist_id:
+
+            def entries():
+                for m in re.finditer(
+                        r'''(?s)<object\b[^>]+\bclass\s*=\s*("|')ign-videoplayer\1[^>]*>(?P<params>.+?)</object''',
+                        webpage):
+                    flashvars = self._search_regex(
+                        r'''(<param\b[^>]+\bname\s*=\s*("|')flashvars\2[^>]*>)''',
+                        m.group('params'), 'flashvars', default='')
+                    flashvars = compat_parse_qs(extract_attributes(flashvars).get('value') or '')
+                    v_url = url_or_none((flashvars.get('url') or [None])[-1])
+                    if v_url:
+                        yield self.url_result(v_url)
+        else:
+            playlist_id = self._search_regex(
+                r'''\bdata-post-id\s*=\s*("|')(?P<id>[\da-f]+)\1''',
+                webpage, 'id', group='id', default=None)
+
+            nextjs_data = self._search_nextjs_data(webpage, display_id)
+
+            def entries():
+                for player in traverse_obj(
+                        nextjs_data,
+                        ('props', 'apolloState', 'ROOT_QUERY', lambda k, _: k.startswith('videoPlayerProps('), '__ref')):
+                    # skip promo links (which may not always be served, eg GH CI servers)
+                    if traverse_obj(nextjs_data,
+                                    ('props', 'apolloState', player.replace('PlayerProps', 'ModernContent')),
+                                    expected_type=dict):
+                        continue
+                    video = traverse_obj(nextjs_data, ('props', 'apolloState', player), expected_type=dict) or {}
+                    info = self._extract_video_info(video, fatal=False)
+                    if info:
+                        yield merge_dicts({
+                            'display_id': display_id,
+                        }, info)
 
         return self.playlist_result(
-            entries(), article.get('articleId'),
-            strip_or_none(try_get(article, lambda x: x['metadata']['headline'])))
+            entries(), playlist_id or display_id,
+            re.sub(r'\s+-\s+IGN\s*$', '', self._og_search_title(webpage, default='')) or None)

From cd987e6fca336cf6570b4938442c23cd0bdf7256 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 4 Feb 2023 01:53:47 +0000
Subject: [PATCH 015/156] [jsinterp] Nits

---
 youtube_dl/jsinterp.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 1e7b342ac..60fa2b1b9 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -201,7 +201,7 @@ class JSInterpreter(object):
         def __init__(self, msg, *args, **kwargs):
             expr = kwargs.pop('expr', None)
             if expr is not None:
-                msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
+                msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
             super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
 
     class JS_RegExp(object):
@@ -699,7 +699,7 @@ class JSInterpreter(object):
                 """ assert, but without risk of getting optimized out """
                 if not cndn:
                     memb = member
-                    raise self.Exception('{member} {msg}'.format(**locals()), expr=expr)
+                    raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr)
 
             def eval_method():
                 if (variable, member) == ('console', 'debug'):

From f2f90887ca7a452dfafa7ca221fe981a4ec56707 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 4 Feb 2023 00:21:35 +0000
Subject: [PATCH 016/156] [Vimeo] Fix `Unable to extract info section` redux *
 as reported in yt-dlp/yt-dlp#6149 * also allow newline in target JSON object

---
 youtube_dl/extractor/vimeo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 853b38402..14f8dd034 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -663,7 +663,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
 
         if '//player.vimeo.com/video/' in url:
             config = self._parse_json(self._search_regex(
-                r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
+                r'(?s)\b(?:playerC|c)onfig\s*=\s*({.+?})\s*[;\n]', webpage, 'info section'), video_id)
             if config.get('view') == 4:
                 config = self._verify_player_video_password(
                     redirect_url, video_id, headers)

From e19ec5232216fd801ded88728df5b50bfb05c1cc Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 11 Feb 2023 03:25:14 +0000
Subject: [PATCH 017/156] [Vimeo] Support /user{video_id}/{slug} URL format

---
 youtube_dl/extractor/vimeo.py | 55 +++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 22 deletions(-)

diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 14f8dd034..7f2731d83 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -261,27 +261,33 @@ class VimeoIE(VimeoBaseInfoExtractor):
 
     # _VALID_URL matches Vimeo URLs
     _VALID_URL = r'''(?x)
-                    https?://
-                        (?:
-                            (?:
-                                www|
-                                player
-                            )
-                            \.
-                        )?
-                        vimeo(?:pro)?\.com/
-                        (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
-                        (?:.*?/)??
-                        (?:
-                            (?:
-                                play_redirect_hls|
-                                moogaloop\.swf)\?clip_id=
-                            )?
-                        (?:videos?/)?
-                        (?P<id>[0-9]+)
-                        (?:/(?P<unlisted_hash>[\da-f]{10}))?
-                        /?(?:[?&].*)?(?:[#].*)?$
-                    '''
+                     https?://
+                         (?:
+                             (?:
+                                 www|
+                                 player
+                             )
+                             \.
+                         )?
+                         vimeo(?:pro)?\.com/
+                         (?:
+                             (?P<u>user)|
+                             (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
+                             (?:.*?/)??
+                             (?P<q>
+                                 (?:
+                                     play_redirect_hls|
+                                     moogaloop\.swf)\?clip_id=
+                             )?
+                             (?:videos?/)?
+                         )
+                         (?P<id>[0-9]+)
+                         (?(u)
+                             /(?!videos|likes)[^/?#]+/?|
+                             (?(q)|/(?P<unlisted_hash>[\da-f]{10}))?
+                         )
+                         (?:(?(q)[&]|(?(u)|/?)[?]).*?)?(?:[#].*)?$
+                 '''
     IE_NAME = 'vimeo'
     _TESTS = [
         {
@@ -539,7 +545,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
             'params': {
                 'skip_download': True,
             },
-        }
+        },
+        {
+            # user playlist alias -> https://vimeo.com/258705797
+            'url': 'https://vimeo.com/user26785108/newspiritualguide',
+            'only_matching': True,
+        },
         # https://gettingthingsdone.com/workflowmap/
         # vimeo embed with check-password page protected by Referer header
     ]

From 58988c1421b88875a33015b08e4d2ada43021e09 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 31 Jan 2022 04:28:54 +0000
Subject: [PATCH 018/156] [YouTube] Bypass age-gating for certain restricted
 videos

* Use TVHTML5_SIMPLY_EMBEDDED_PLAYER client

* Also add and fix tests

* Introduce and use new utility function `update_url()`
---
 youtube_dl/extractor/youtube.py | 202 +++++++++++++++++++++++++-------
 youtube_dl/utils.py             |  11 ++
 2 files changed, 168 insertions(+), 45 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 28fdb086a..65428528d 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -42,6 +42,7 @@ from ..utils import (
     unescapeHTML,
     unified_strdate,
     unsmuggle_url,
+    update_url,
     update_url_query,
     url_or_none,
     urlencode_postdata,
@@ -286,15 +287,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 
-    def _call_api(self, ep, query, video_id, fatal=True):
+    def _call_api(self, ep, query, video_id, fatal=True, headers=None):
         data = self._DEFAULT_API_DATA.copy()
         data.update(query)
+        real_headers = {'content-type': 'application/json'}
+        if headers:
+            real_headers.update(headers)
 
         return self._download_json(
             'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
             note='Downloading API JSON', errnote='Unable to download API page',
             data=json.dumps(data).encode('utf8'), fatal=fatal,
-            headers={'content-type': 'application/json'},
+            headers=real_headers,
             query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
 
     def _extract_yt_initial_data(self, video_id, webpage):
@@ -515,6 +519,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'Philipp Hagemeister',
                 'uploader_id': 'phihag',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
+                'channel': 'Philipp Hagemeister',
                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
                 'upload_date': '20121002',
@@ -524,10 +529,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'duration': 10,
                 'view_count': int,
                 'like_count': int,
-                'dislike_count': int,
+                'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
                 'start_time': 1,
                 'end_time': 9,
-            }
+            },
         },
         {
             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
@@ -562,7 +567,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'duration': 10,
                 'view_count': int,
                 'like_count': int,
-                'dislike_count': int,
             },
             'params': {
                 'skip_download': True,
@@ -621,8 +625,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
             }
         },
-        # Normal age-gate video (No vevo, embed allowed), available via embed page
+        # Age-gated videos
         {
+            'note': 'Age-gated video (No vevo, embed allowed)',
             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
             'info_dict': {
                 'id': 'HtVdAasjOgU',
@@ -631,17 +636,97 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
                 'duration': 142,
                 'uploader': 'The Witcher',
-                'uploader_id': 'WitcherGame',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
                 'upload_date': '20140605',
+                'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
                 'age_limit': 18,
+                'categories': ['Gaming'],
+                'tags': 'count:17',
+                'channel': 'The Witcher',
+                'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
+                'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
+                'view_count': int,
+                'like_count': int,
             },
         },
         {
-            # Age-gated video only available with authentication (unavailable
-            # via embed page workaround)
+            'note': 'Age-gated video with embed allowed in public site',
+            'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
+            'info_dict': {
+                'id': 'HsUATh_Nc2U',
+                'ext': 'mp4',
+                'title': 'Godzilla 2 (Official Video)',
+                'description': 'md5:bf77e03fcae5529475e500129b05668a',
+                'duration': 177,
+                'uploader': 'FlyingKitty',
+                'upload_date': '20200408',
+                'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
+                'age_limit': 18,
+                'categories': ['Entertainment'],
+                'tags': ['Flyingkitty', 'godzilla 2'],
+                'channel': 'FlyingKitty',
+                'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
+                'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
+                'view_count': int,
+                'like_count': int,
+            },
+        },
+        {
+            'note': 'Age-gated video embedable only with clientScreen=EMBED',
+            'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
+            'info_dict': {
+                'id': 'Tq92D6wQ1mg',
+                'ext': 'mp4',
+                'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
+                'description': 'md5:17eccca93a786d51bc67646756894066',
+                'duration': 106,
+                'uploader': 'Projekt Melody',
+                'upload_date': '20191227',
+                'age_limit': 18,
+                'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
+                'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
+                'categories': ['Entertainment'],
+                'channel': 'Projekt Melody',
+                'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
+                'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
+                'view_count': int,
+                'like_count': int,
+            },
+        },
+        {
+            'note': 'Non-Age-gated non-embeddable video',
+            'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
+            'info_dict': {
+                'id': 'MeJVWBSsPAY',
+                'ext': 'mp4',
+                'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
+                'description': 'Fan Video. Music & Lyrics by OOMPH!.',
+                'duration': 210,
+                'uploader': 'Herr Lurik',
+                'uploader_id': 'st3in234',
+                'upload_date': '20130730',
+                'uploader_url': 'http://www.youtube.com/user/st3in234',
+                'age_limit': 0,
+                'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/hqdefault.jpg',
+                'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
+                'categories': ['Music'],
+                'channel': 'Herr Lurik',
+                'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
+                'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
+                'artist': 'OOMPH!',
+                'view_count': int,
+                'like_count': int,
+            },
+        },
+        {
+            'note': 'Non-bypassable age-gated video',
+            'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
+            'only_matching': True,
+        },
+        {
+            'note': 'Age-gated video only available with authentication (not via embed workaround)',
             'url': 'XgnwCQzjau8',
             'only_matching': True,
+            'skip': '''This video has been removed for violating YouTube's Community Guidelines''',
         },
         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
         # YouTube Red ad is not captured for creator
@@ -670,17 +755,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'info_dict': {
                 'id': 'lqQg6PlCWgI',
                 'ext': 'mp4',
+                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
+                'description': r're:(?s)(?:.+\s)?HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games\s*',
                 'duration': 6085,
                 'upload_date': '20150827',
                 'uploader_id': 'olympic',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
-                'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
-                'uploader': 'Olympic',
-                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
+                'uploader': r're:Olympics?',
+                'age_limit': 0,
+                'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
+                'categories': ['Sports'],
+                'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
+                'channel': 'Olympics',
+                'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
+                'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
+                'view_count': int,
+                'like_count': int,
             },
-            'params': {
-                'skip_download': 'requires avconv',
-            }
         },
         # Non-square pixels
         {
@@ -1683,27 +1774,52 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             player_response = self._call_api(
                 'player', {'videoId': video_id}, video_id)
 
-        playability_status = player_response.get('playabilityStatus') or {}
-        if playability_status.get('reason') == 'Sign in to confirm your age':
-            video_info = self._download_webpage(
-                base_url + 'get_video_info', video_id,
-                'Refetching age-gated info webpage',
-                'unable to download video info webpage', query={
-                    'video_id': video_id,
-                    'eurl': 'https://youtube.googleapis.com/v/' + video_id,
-                    'html5': 1,
-                    # See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544
-                    'c': 'TVHTML5',
-                    'cver': '6.20180913',
-                }, fatal=False)
-            if video_info:
-                pr = self._parse_json(
-                    try_get(
-                        compat_parse_qs(video_info),
-                        lambda x: x['player_response'][0], compat_str) or '{}',
-                    video_id, fatal=False)
-                if pr and isinstance(pr, dict):
-                    player_response = pr
+        def is_agegated(playability):
+            if not isinstance(playability, dict):
+                return
+
+            if playability.get('desktopLegacyAgeGateReason'):
+                return True
+
+            reasons = filter(None, (playability.get(r) for r in ('status', 'reason')))
+            AGE_GATE_REASONS = (
+                'confirm your age', 'age-restricted', 'inappropriate',  # reason
+                'age_verification_required', 'age_check_required',  # status
+            )
+            return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
+
+        def get_playability_status(response):
+            return try_get(response, lambda x: x['playabilityStatus'], dict) or {}
+
+        playability_status = get_playability_status(player_response)
+        if (is_agegated(playability_status)
+                and int_or_none(self._downloader.params.get('age_limit'), default=18) >= 18):
+
+            self.report_age_confirmation()
+
+            # Thanks: https://github.com/yt-dlp/yt-dlp/pull/3233
+            pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
+            query = {
+                'playbackContext': {'contentPlaybackContext': {'html5Preference': 'HTML5_PREF_WANTS'}},
+                'contentCheckOk': True,
+                'racyCheckOk': True,
+                'context': {
+                    'client': {'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'clientVersion': '2.0', 'hl': 'en', 'clientScreen': 'EMBED'},
+                    'thirdParty': {'embedUrl': 'https://google.com'},
+                },
+                'videoId': video_id,
+            }
+            headers = {
+                'X-YouTube-Client-Name': '85',
+                'X-YouTube-Client-Version': '2.0',
+                'Origin': 'https://www.youtube.com'
+            }
+
+            video_info = self._call_api('player', query, video_id, fatal=False, headers=headers)
+            age_gate_status = get_playability_status(video_info)
+            if age_gate_status.get('status') == 'OK':
+                player_response = video_info
+                playability_status = age_gate_status
 
         trailer_video_id = try_get(
             playability_status,
@@ -1932,12 +2048,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             for thumbnail in (try_get(
                     container,
                     lambda x: x['thumbnail']['thumbnails'], list) or []):
-                thumbnail_url = thumbnail.get('url')
+                thumbnail_url = url_or_none(thumbnail.get('url'))
                 if not thumbnail_url:
                     continue
                 thumbnails.append({
                     'height': int_or_none(thumbnail.get('height')),
-                    'url': thumbnail_url,
+                    'url': update_url(thumbnail_url, query=None, fragment=None),
                     'width': int_or_none(thumbnail.get('width')),
                 })
             if thumbnails:
@@ -2142,6 +2258,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     sbr_tooltip = try_get(
                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
                     if sbr_tooltip:
+                        # however dislike_count was hidden by YT, as if there could ever be dislikable content on YT
                         like_count, dislike_count = sbr_tooltip.split(' / ')
                         info.update({
                             'like_count': str_to_int(like_count),
@@ -2411,7 +2528,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'tags': list,
             'view_count': int,
             'like_count': int,
-            'dislike_count': int,
         },
         'params': {
             'skip_download': True,
@@ -2438,7 +2554,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'categories': ['News & Politics'],
             'tags': list,
             'like_count': int,
-            'dislike_count': int,
         },
         'params': {
             'skip_download': True,
@@ -2458,7 +2573,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'categories': ['News & Politics'],
             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
             'like_count': int,
-            'dislike_count': int,
         },
         'params': {
             'skip_download': True,
@@ -3043,8 +3157,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 
     def _real_extract(self, url):
         item_id = self._match_id(url)
-        url = compat_urlparse.urlunparse(
-            compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
+        url = update_url(url, netloc='www.youtube.com')
         # Handle both video/playlist URLs
         qs = parse_qs(url)
         video_id = qs.get('v', [None])[0]
@@ -3178,7 +3291,6 @@ class YoutubeYtBeIE(InfoExtractor):
             'categories': ['Nonprofits & Activism'],
             'tags': list,
             'like_count': int,
-            'dislike_count': int,
         },
         'params': {
             'noplaylist': True,
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index e3c3ccff9..d5cc6386d 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4121,6 +4121,17 @@ def update_url_query(url, query):
         query=compat_urllib_parse_urlencode(qs, True)))
 
 
+def update_url(url, **kwargs):
+    """Replace URL components specified by kwargs
+       url: compat_str or parsed URL tuple
+       returns: compat_str"""
+    if not kwargs:
+        return compat_urlparse.urlunparse(url) if isinstance(url, tuple) else url
+    if not isinstance(url, tuple):
+        url = compat_urlparse.urlparse(url)
+    return compat_urlparse.urlunparse(url._replace(**kwargs))
+
+
 def update_Request(req, url=None, data=None, headers={}, query={}):
     req_headers = req.headers.copy()
     req_headers.update(headers)

From 30e986b83493f68bd4c2405b5f4d801891c9bdde Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 20 Jun 2022 23:15:20 +0100
Subject: [PATCH 019/156] [YouTube] Add `signatureTimestamp` for age-gate
 bypass

---
 youtube_dl/extractor/youtube.py | 34 +++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 65428528d..6c1cfe7f2 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1642,6 +1642,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             fmt['url'] = compat_urlparse.urlunparse(
                 parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
 
+    # from yt-dlp, with tweaks
+    def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
+        """
+        Extract signatureTimestamp (sts)
+        Required to tell API what sig/player version is in use.
+        """
+        sts = int_or_none(ytcfg.get('STS')) if isinstance(ytcfg, dict) else None
+        if not sts:
+            # Attempt to extract from player
+            if player_url is None:
+                error_msg = 'Cannot extract signature timestamp without player_url.'
+                if fatal:
+                    raise ExtractorError(error_msg)
+                self._downloader.report_warning(error_msg)
+                return
+            code = self._get_player_code(video_id, player_url)
+            sts = int_or_none(self._search_regex(
+                r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '',
+                'JS player signature timestamp', group='sts', fatal=fatal))
+        return sts
+
     def _mark_watched(self, video_id, player_response):
         playback_url = url_or_none(try_get(
             player_response,
@@ -1766,6 +1787,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
 
         player_response = None
+        player_url = None
         if webpage:
             player_response = self._extract_yt_initial_variable(
                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
@@ -1799,8 +1821,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
             # Thanks: https://github.com/yt-dlp/yt-dlp/pull/3233
             pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
+
+            # Use signatureTimestamp if available
+            # Thanks https://github.com/ytdl-org/youtube-dl/issues/31034#issuecomment-1160718026
+            player_url = self._extract_player_url(webpage)
+            ytcfg = self._extract_ytcfg(video_id, webpage)
+            sts = self._extract_signature_timestamp(video_id, player_url, ytcfg)
+            if sts:
+                pb_context['signatureTimestamp'] = sts
+
             query = {
-                'playbackContext': {'contentPlaybackContext': {'html5Preference': 'HTML5_PREF_WANTS'}},
+                'playbackContext': {'contentPlaybackContext': pb_context},
                 'contentCheckOk': True,
                 'racyCheckOk': True,
                 'context': {
@@ -1901,7 +1932,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         formats = []
         itags = []
         itag_qualities = {}
-        player_url = None
         q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
         streaming_data = player_response.get('streamingData') or {}
         streaming_formats = streaming_data.get('formats') or []

From d6b14ba3163b255d0dd8d3b9ddf25d977b8262e7 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 4 Feb 2023 23:18:24 +0000
Subject: [PATCH 020/156] [test] Fix TestAgeRestriction

* age restriction may cause DownloadError
* update obsolete test URLs
[skip ci]
---
 test/test_age_restriction.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py
index 6f5513faa..db98494ab 100644
--- a/test/test_age_restriction.py
+++ b/test/test_age_restriction.py
@@ -11,6 +11,7 @@ from test.helper import try_rm
 
 
 from youtube_dl import YoutubeDL
+from youtube_dl.utils import DownloadError
 
 
 def _download_restricted(url, filename, age):
@@ -26,7 +27,10 @@ def _download_restricted(url, filename, age):
     ydl.add_default_info_extractors()
     json_filename = os.path.splitext(filename)[0] + '.info.json'
     try_rm(json_filename)
-    ydl.download([url])
+    try:
+        ydl.download([url])
+    except DownloadError:
+        try_rm(json_filename)
     res = os.path.exists(json_filename)
     try_rm(json_filename)
     return res
@@ -38,12 +42,12 @@ class TestAgeRestriction(unittest.TestCase):
         self.assertFalse(_download_restricted(url, filename, age))
 
     def test_youtube(self):
-        self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
+        self._assert_restricted('HtVdAasjOgU', 'HtVdAasjOgU.mp4', 10)
 
     def test_youporn(self):
         self._assert_restricted(
-            'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
-            '505835.mp4', 2, old_age=25)
+            'https://www.youporn.com/watch/16715086/sex-ed-in-detention-18-asmr/',
+            '16715086.mp4', 2, old_age=25)
 
 
 if __name__ == '__main__':

From 249f2b631629471af5cfee2993e62de58c8f5990 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 5 Feb 2023 15:43:43 +0000
Subject: [PATCH 021/156] [compat] Systematise compat_ naming

[skip ci]
---
 test/test_compat.py  |   3 +-
 youtube_dl/compat.py | 221 +++++++++++++++++++++++++++----------------
 2 files changed, 139 insertions(+), 85 deletions(-)

diff --git a/test/test_compat.py b/test/test_compat.py
index 0986cff37..4dddd9a38 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -48,7 +48,8 @@ class TestCompat(unittest.TestCase):
 
     def test_all_present(self):
         import youtube_dl.compat
-        all_names = youtube_dl.compat.__all__
+        all_names = sorted(
+            youtube_dl.compat.__all__ + youtube_dl.compat.legacy)
         present_names = set(filter(
             lambda c: '_' in c and not c.startswith('_'),
             dir(youtube_dl.compat))) - set(['unicode_literals'])
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 28942a8c1..39551f810 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -21,6 +21,10 @@ import subprocess
 import sys
 import xml.etree.ElementTree
 
+# naming convention
+# 'compat_' + Python3_name.replace('.', '_')
+# other aliases exist for convenience and/or legacy
+
 # deal with critical unicode/str things first
 try:
     # Python 2
@@ -28,6 +32,7 @@ try:
         unicode, basestring, unichr
     )
     from .casefold import casefold as compat_casefold
+
 except NameError:
     compat_str, compat_basestring, compat_chr = (
         str, str, chr
@@ -53,16 +58,15 @@ try:
     import urllib.parse as compat_urllib_parse
 except ImportError:  # Python 2
     import urllib as compat_urllib_parse
+    import urlparse as _urlparse
+    for a in dir(_urlparse):
+        if not hasattr(compat_urllib_parse, a):
+            setattr(compat_urllib_parse, a, getattr(_urlparse, a))
+    del _urlparse
 
-try:
-    from urllib.parse import urlparse as compat_urllib_parse_urlparse
-except ImportError:  # Python 2
-    from urlparse import urlparse as compat_urllib_parse_urlparse
-
-try:
-    import urllib.parse as compat_urlparse
-except ImportError:  # Python 2
-    import urlparse as compat_urlparse
+# unfavoured aliases
+compat_urlparse = compat_urllib_parse
+compat_urllib_parse_urlparse = compat_urllib_parse.urlparse
 
 try:
     import urllib.response as compat_urllib_response
@@ -73,6 +77,7 @@ try:
     import http.cookiejar as compat_cookiejar
 except ImportError:  # Python 2
     import cookielib as compat_cookiejar
+compat_http_cookiejar = compat_cookiejar
 
 if sys.version_info[0] == 2:
     class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
@@ -84,11 +89,13 @@ if sys.version_info[0] == 2:
             compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
 else:
     compat_cookiejar_Cookie = compat_cookiejar.Cookie
+compat_http_cookiejar_Cookie = compat_cookiejar_Cookie
 
 try:
     import http.cookies as compat_cookies
 except ImportError:  # Python 2
     import Cookie as compat_cookies
+compat_http_cookies = compat_cookies
 
 if sys.version_info[0] == 2:
     class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
@@ -98,6 +105,7 @@ if sys.version_info[0] == 2:
             return super(compat_cookies_SimpleCookie, self).load(rawdata)
 else:
     compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
+compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
 
 try:
     import html.entities as compat_html_entities
@@ -2351,16 +2359,19 @@ try:
     from urllib.error import HTTPError as compat_HTTPError
 except ImportError:  # Python 2
     from urllib2 import HTTPError as compat_HTTPError
+compat_urllib_HTTPError = compat_HTTPError
 
 try:
     from urllib.request import urlretrieve as compat_urlretrieve
 except ImportError:  # Python 2
     from urllib import urlretrieve as compat_urlretrieve
+compat_urllib_request_urlretrieve = compat_urlretrieve
 
 try:
     from html.parser import HTMLParser as compat_HTMLParser
 except ImportError:  # Python 2
     from HTMLParser import HTMLParser as compat_HTMLParser
+compat_html_parser_HTMLParser = compat_HTMLParser
 
 try:  # Python 2
     from HTMLParser import HTMLParseError as compat_HTMLParseError
@@ -2374,6 +2385,7 @@ except ImportError:  # Python <3.4
         # and uniform cross-version exception handling
         class compat_HTMLParseError(Exception):
             pass
+compat_html_parser_HTMLParseError = compat_HTMLParseError
 
 try:
     from subprocess import DEVNULL
@@ -2390,6 +2402,8 @@ try:
     from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
     from urllib.parse import unquote as compat_urllib_parse_unquote
     from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
+    from urllib.parse import urlencode as compat_urllib_parse_urlencode
+    from urllib.parse import parse_qs as compat_parse_qs
 except ImportError:  # Python 2
     _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
                 else re.compile(r'([\x00-\x7f]+)'))
@@ -2456,9 +2470,6 @@ except ImportError:  # Python 2
         string = string.replace('+', ' ')
         return compat_urllib_parse_unquote(string, encoding, errors)
 
-try:
-    from urllib.parse import urlencode as compat_urllib_parse_urlencode
-except ImportError:  # Python 2
     # Python 2 will choke in urlencode on mixture of byte and unicode strings.
     # Possible solutions are to either port it from python 3 with all
     # the friends or manually ensure input query contains only byte strings.
@@ -2480,7 +2491,62 @@ except ImportError:  # Python 2
         def encode_list(l):
             return [encode_elem(e) for e in l]
 
-        return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
+        return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
+
+    # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
+    # Python 2's version is apparently totally broken
+    def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
+                   encoding='utf-8', errors='replace'):
+        qs, _coerce_result = qs, compat_str
+        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+        r = []
+        for name_value in pairs:
+            if not name_value and not strict_parsing:
+                continue
+            nv = name_value.split('=', 1)
+            if len(nv) != 2:
+                if strict_parsing:
+                    raise ValueError('bad query field: %r' % (name_value,))
+                # Handle case of a control-name with no equal sign
+                if keep_blank_values:
+                    nv.append('')
+                else:
+                    continue
+            if len(nv[1]) or keep_blank_values:
+                name = nv[0].replace('+', ' ')
+                name = compat_urllib_parse_unquote(
+                    name, encoding=encoding, errors=errors)
+                name = _coerce_result(name)
+                value = nv[1].replace('+', ' ')
+                value = compat_urllib_parse_unquote(
+                    value, encoding=encoding, errors=errors)
+                value = _coerce_result(value)
+                r.append((name, value))
+        return r
+
+    def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
+                        encoding='utf-8', errors='replace'):
+        parsed_result = {}
+        pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
+                           encoding=encoding, errors=errors)
+        for name, value in pairs:
+            if name in parsed_result:
+                parsed_result[name].append(value)
+            else:
+                parsed_result[name] = [value]
+        return parsed_result
+
+    setattr(compat_urllib_parse, '_urlencode',
+            getattr(compat_urllib_parse, 'urlencode'))
+    for name, fix in (
+            ('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes),
+            ('parse_unquote', compat_urllib_parse_unquote),
+            ('unquote_plus', compat_urllib_parse_unquote_plus),
+            ('urlencode', compat_urllib_parse_urlencode),
+            ('parse_qs', compat_parse_qs)):
+        setattr(compat_urllib_parse, name, fix)
+
+compat_urllib_parse_parse_qs = compat_parse_qs
 
 try:
     from urllib.request import DataHandler as compat_urllib_request_DataHandler
@@ -2520,6 +2586,7 @@ try:
     from xml.etree.ElementTree import ParseError as compat_xml_parse_error
 except ImportError:  # Python 2.6
     from xml.parsers.expat import ExpatError as compat_xml_parse_error
+compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error
 
 etree = xml.etree.ElementTree
 
@@ -2533,10 +2600,11 @@ try:
     # xml.etree.ElementTree.Element is a method in Python <=2.6 and
     # the following will crash with:
     #  TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
-    isinstance(None, xml.etree.ElementTree.Element)
+    isinstance(None, etree.Element)
     from xml.etree.ElementTree import Element as compat_etree_Element
 except TypeError:  # Python <=2.6
     from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
+compat_xml_etree_ElementTree_Element = compat_etree_Element
 
 if sys.version_info[0] >= 3:
     def compat_etree_fromstring(text):
@@ -2592,6 +2660,7 @@ else:
             if k == uri or v == prefix:
                 del etree._namespace_map[k]
         etree._namespace_map[uri] = prefix
+compat_xml_etree_register_namespace = compat_etree_register_namespace
 
 if sys.version_info < (2, 7):
     # Here comes the crazy part: In 2.6, if the xpath is a unicode,
@@ -2603,53 +2672,6 @@ if sys.version_info < (2, 7):
 else:
     compat_xpath = lambda xpath: xpath
 
-try:
-    from urllib.parse import parse_qs as compat_parse_qs
-except ImportError:  # Python 2
-    # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
-    # Python 2's version is apparently totally broken
-
-    def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
-                   encoding='utf-8', errors='replace'):
-        qs, _coerce_result = qs, compat_str
-        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
-        r = []
-        for name_value in pairs:
-            if not name_value and not strict_parsing:
-                continue
-            nv = name_value.split('=', 1)
-            if len(nv) != 2:
-                if strict_parsing:
-                    raise ValueError('bad query field: %r' % (name_value,))
-                # Handle case of a control-name with no equal sign
-                if keep_blank_values:
-                    nv.append('')
-                else:
-                    continue
-            if len(nv[1]) or keep_blank_values:
-                name = nv[0].replace('+', ' ')
-                name = compat_urllib_parse_unquote(
-                    name, encoding=encoding, errors=errors)
-                name = _coerce_result(name)
-                value = nv[1].replace('+', ' ')
-                value = compat_urllib_parse_unquote(
-                    value, encoding=encoding, errors=errors)
-                value = _coerce_result(value)
-                r.append((name, value))
-        return r
-
-    def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
-                        encoding='utf-8', errors='replace'):
-        parsed_result = {}
-        pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
-                           encoding=encoding, errors=errors)
-        for name, value in pairs:
-            if name in parsed_result:
-                parsed_result[name].append(value)
-            else:
-                parsed_result[name] = [value]
-        return parsed_result
-
 
 compat_os_name = os._name if os.name == 'java' else os.name
 
@@ -2774,6 +2796,8 @@ else:
     else:
         compat_expanduser = os.path.expanduser
 
+compat_os_path_expanduser = compat_expanduser
+
 
 if compat_os_name == 'nt' and sys.version_info < (3, 8):
     # os.path.realpath on Windows does not follow symbolic links
@@ -2785,6 +2809,8 @@ if compat_os_name == 'nt' and sys.version_info < (3, 8):
 else:
     compat_realpath = os.path.realpath
 
+compat_os_path_realpath = compat_realpath
+
 
 if sys.version_info < (3, 0):
     def compat_print(s):
@@ -2805,11 +2831,15 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
 else:
     compat_getpass = getpass.getpass
 
+compat_getpass_getpass = compat_getpass
+
+
 try:
     compat_input = raw_input
 except NameError:  # Python 3
     compat_input = input
 
+
 # Python < 2.6.5 require kwargs to be bytes
 try:
     def _testfunc(x):
@@ -2915,15 +2945,16 @@ else:
                 lines = _lines
         return _terminal_size(columns, lines)
 
+
 try:
     itertools.count(start=0, step=1)
     compat_itertools_count = itertools.count
 except TypeError:  # Python 2.6
     def compat_itertools_count(start=0, step=1):
-        n = start
         while True:
-            yield n
-            n += step
+            yield start
+            start += step
+
 
 if sys.version_info >= (3, 0):
     from tokenize import tokenize as compat_tokenize_tokenize
@@ -3075,6 +3106,8 @@ if sys.version_info < (3, 3):
 else:
     compat_b64decode = base64.b64decode
 
+compat_base64_b64decode = compat_b64decode
+
 
 if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
     # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
@@ -3094,30 +3127,53 @@ else:
         return ctypes.WINFUNCTYPE(*args, **kwargs)
 
 
-__all__ = [
+legacy = [
     'compat_HTMLParseError',
     'compat_HTMLParser',
     'compat_HTTPError',
-    'compat_Struct',
     'compat_b64decode',
+    'compat_cookiejar',
+    'compat_cookiejar_Cookie',
+    'compat_cookies',
+    'compat_cookies_SimpleCookie',
+    'compat_etree_Element',
+    'compat_etree_register_namespace',
+    'compat_expanduser',
+    'compat_getpass',
+    'compat_parse_qs',
+    'compat_realpath',
+    'compat_urllib_parse_parse_qs',
+    'compat_urllib_parse_unquote',
+    'compat_urllib_parse_unquote_plus',
+    'compat_urllib_parse_unquote_to_bytes',
+    'compat_urllib_parse_urlencode',
+    'compat_urllib_parse_urlparse',
+    'compat_urlparse',
+    'compat_urlretrieve',
+    'compat_xml_parse_error',
+]
+
+
+__all__ = [
+    'compat_html_parser_HTMLParseError',
+    'compat_html_parser_HTMLParser',
+    'compat_Struct',
+    'compat_base64_b64decode',
     'compat_basestring',
     'compat_casefold',
     'compat_chr',
     'compat_collections_abc',
     'compat_collections_chain_map',
-    'compat_cookiejar',
-    'compat_cookiejar_Cookie',
-    'compat_cookies',
-    'compat_cookies_SimpleCookie',
+    'compat_http_cookiejar',
+    'compat_http_cookiejar_Cookie',
+    'compat_http_cookies',
+    'compat_http_cookies_SimpleCookie',
     'compat_ctypes_WINFUNCTYPE',
-    'compat_etree_Element',
     'compat_etree_fromstring',
-    'compat_etree_register_namespace',
-    'compat_expanduser',
     'compat_filter',
     'compat_get_terminal_size',
     'compat_getenv',
-    'compat_getpass',
+    'compat_getpass_getpass',
     'compat_html_entities',
     'compat_html_entities_html5',
     'compat_http_client',
@@ -3131,11 +3187,11 @@ __all__ = [
     'compat_numeric_types',
     'compat_ord',
     'compat_os_name',
-    'compat_parse_qs',
+    'compat_os_path_expanduser',
+    'compat_os_path_realpath',
     'compat_print',
     'compat_re_Match',
     'compat_re_Pattern',
-    'compat_realpath',
     'compat_setenv',
     'compat_shlex_quote',
     'compat_shlex_split',
@@ -3147,17 +3203,14 @@ __all__ = [
     'compat_tokenize_tokenize',
     'compat_urllib_error',
     'compat_urllib_parse',
-    'compat_urllib_parse_unquote',
-    'compat_urllib_parse_unquote_plus',
-    'compat_urllib_parse_unquote_to_bytes',
-    'compat_urllib_parse_urlencode',
-    'compat_urllib_parse_urlparse',
     'compat_urllib_request',
     'compat_urllib_request_DataHandler',
     'compat_urllib_response',
-    'compat_urlparse',
-    'compat_urlretrieve',
-    'compat_xml_parse_error',
+    'compat_urllib_request_urlretrieve',
+    'compat_urllib_HTTPError',
+    'compat_xml_etree_ElementTree_Element',
+    'compat_xml_etree_ElementTree_ParseError',
+    'compat_xml_etree_register_namespace',
     'compat_xpath',
     'compat_zip',
     'workaround_optparse_bug9161',

From 90c9f789d94fc2c0b4c28c57ba2e0b2f09ef95e3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 5 Feb 2023 13:46:43 +0000
Subject: [PATCH 022/156] [utils] Add parse_qs, update_url

[skip ci]
---
 youtube_dl/utils.py | 64 ++++++++++++++++++++++-----------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index d5cc6386d..4edbfa27b 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -42,6 +42,7 @@ from .compat import (
     compat_HTMLParser,
     compat_HTTPError,
     compat_basestring,
+    compat_casefold,
     compat_chr,
     compat_collections_abc,
     compat_cookiejar,
@@ -54,18 +55,18 @@ from .compat import (
     compat_integer_types,
     compat_kwargs,
     compat_os_name,
-    compat_parse_qs,
+    compat_re_Match,
     compat_shlex_quote,
     compat_str,
     compat_struct_pack,
     compat_struct_unpack,
     compat_urllib_error,
     compat_urllib_parse,
+    compat_urllib_parse_parse_qs as compat_parse_qs,
     compat_urllib_parse_urlencode,
     compat_urllib_parse_urlparse,
     compat_urllib_parse_unquote_plus,
     compat_urllib_request,
-    compat_urlparse,
     compat_xpath,
 )
 
@@ -80,12 +81,12 @@ def register_socks_protocols():
     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
-        if scheme not in compat_urlparse.uses_netloc:
-            compat_urlparse.uses_netloc.append(scheme)
+        if scheme not in compat_urllib_parse.uses_netloc:
+            compat_urllib_parse.uses_netloc.append(scheme)
 
 
-# This is not clearly defined otherwise
-compiled_regex_type = type(re.compile(''))
+# Unfavoured alias
+compiled_regex_type = compat_re_Match
 
 
 def random_user_agent():
@@ -2725,7 +2726,7 @@ def make_socks_conn_class(base_class, socks_proxy):
     assert issubclass(base_class, (
         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
 
-    url_components = compat_urlparse.urlparse(socks_proxy)
+    url_components = compat_urllib_parse.urlparse(socks_proxy)
     if url_components.scheme.lower() == 'socks5':
         socks_type = ProxyType.SOCKS5
     elif url_components.scheme.lower() in ('socks', 'socks4'):
@@ -3673,7 +3674,7 @@ def remove_quotes(s):
 
 
 def url_basename(url):
-    path = compat_urlparse.urlparse(url).path
+    path = compat_urllib_parse.urlparse(url).path
     return path.strip('/').split('/')[-1]
 
 
@@ -3693,7 +3694,7 @@ def urljoin(base, path):
     if not isinstance(base, compat_str) or not re.match(
             r'^(?:https?:)?//', base):
         return None
-    return compat_urlparse.urljoin(base, path)
+    return compat_urllib_parse.urljoin(base, path)
 
 
 class HEADRequest(compat_urllib_request.Request):
@@ -4091,6 +4092,10 @@ def escape_url(url):
     ).geturl()
 
 
+def parse_qs(url):
+    return compat_parse_qs(compat_urllib_parse.urlparse(url).query)
+
+
 def read_batch_urls(batch_fd):
     def fixup(url):
         if not isinstance(url, compat_str):
@@ -4111,25 +4116,28 @@ def urlencode_postdata(*args, **kargs):
     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
 
 
-def update_url_query(url, query):
-    if not query:
-        return url
-    parsed_url = compat_urlparse.urlparse(url)
-    qs = compat_parse_qs(parsed_url.query)
-    qs.update(query)
-    return compat_urlparse.urlunparse(parsed_url._replace(
-        query=compat_urllib_parse_urlencode(qs, True)))
-
-
 def update_url(url, **kwargs):
     """Replace URL components specified by kwargs
        url: compat_str or parsed URL tuple
-       returns: compat_str"""
+       if query_update is in kwargs, update query with
+       its value instead of replacing (overrides any `query`)
+       returns: compat_str
+    """
     if not kwargs:
-        return compat_urlparse.urlunparse(url) if isinstance(url, tuple) else url
+        return compat_urllib_parse.urlunparse(url) if isinstance(url, tuple) else url
     if not isinstance(url, tuple):
-        url = compat_urlparse.urlparse(url)
-    return compat_urlparse.urlunparse(url._replace(**kwargs))
+        url = compat_urllib_parse.urlparse(url)
+    query = kwargs.pop('query_update', None)
+    if query:
+        qs = compat_parse_qs(url.query)
+        qs.update(query)
+        kwargs['query'] = compat_urllib_parse_urlencode(qs, True)
+        kwargs = compat_kwargs(kwargs)
+    return compat_urllib_parse.urlunparse(url._replace(**kwargs))
+
+
+def update_url_query(url, query):
+    return update_url(url, query_update=query)
 
 
 def update_Request(req, url=None, data=None, headers={}, query={}):
@@ -5597,7 +5605,7 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
 
         if proxy == '__noproxy__':
             return None  # No Proxy
-        if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
+        if compat_urllib_parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
             req.add_header('Ytdl-socks-proxy', proxy)
             # youtube-dl's http/https handlers do wrapping the socket with socks
             return None
@@ -6035,14 +6043,6 @@ def traverse_obj(obj, *paths, **kwargs):
     str = compat_str
 
     is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes))
-    # stand-in until compat_re_Match is added
-    compat_re_Match = type(re.match('a', 'a'))
-    # stand-in until casefold.py is added
-    try:
-        ''.casefold()
-        compat_casefold = lambda s: s.casefold()
-    except AttributeError:
-        compat_casefold = lambda s: s.lower()
     casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k
 
     if isinstance(expected_type, type):

From 4e04f104994c5dac2cb74b64ba7725716ce939d7 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 6 Feb 2023 15:50:28 +0000
Subject: [PATCH 023/156] [compat] Update test_compat

[skip ci]
---
 test/test_compat.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_compat.py b/test/test_compat.py
index 4dddd9a38..e233b1ae1 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -50,9 +50,9 @@ class TestCompat(unittest.TestCase):
         import youtube_dl.compat
         all_names = sorted(
             youtube_dl.compat.__all__ + youtube_dl.compat.legacy)
-        present_names = set(filter(
+        present_names = set(map(compat_str, filter(
             lambda c: '_' in c and not c.startswith('_'),
-            dir(youtube_dl.compat))) - set(['unicode_literals'])
+            dir(youtube_dl.compat)))) - set(['unicode_literals'])
         self.assertEqual(all_names, sorted(present_names))
 
     def test_compat_urllib_parse_unquote(self):

From bafb6dec72865cc494feb35ecc94481c30a81069 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 6 Feb 2023 16:19:21 +0000
Subject: [PATCH 024/156] [YouTube] Refresh compat/utils usage * import
 parse_qs() * import parse_qs in lazy_extractors (clears old TODO) * clean up
 old compiled lazy_extractors for Py2 * use update_url()

---
 devscripts/make_lazy_extractors.py | 10 ++++-
 test/test_execution.py             | 12 +++---
 youtube_dl/extractor/youtube.py    | 61 +++++++++++-------------------
 3 files changed, 39 insertions(+), 44 deletions(-)

diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index 878ae72b1..edc19183d 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -13,6 +13,11 @@ sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
 lazy_extractors_filename = sys.argv[1]
 if os.path.exists(lazy_extractors_filename):
     os.remove(lazy_extractors_filename)
+# Py2: may be confused by leftover lazy_extractors.pyc
+try:
+    os.remove(lazy_extractors_filename + 'c')
+except OSError:
+    pass
 
 from youtube_dl.extractor import _ALL_CLASSES
 from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
@@ -22,7 +27,10 @@ with open('devscripts/lazy_load_template.py', 'rt') as f:
 
 module_contents = [
     module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
-    'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n']
+    'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n',
+    # needed for suitable() methods of Youtube extractor (see #28780)
+    'from youtube_dl.utils import parse_qs\n',
+]
 
 ie_template = '''
 class {name}({bases}):
diff --git a/test/test_execution.py b/test/test_execution.py
index 32948d93e..704e14612 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -40,14 +40,16 @@ class TestExecution(unittest.TestCase):
         self.assertFalse(stderr)
 
     def test_lazy_extractors(self):
+        lazy_extractors = 'youtube_dl/extractor/lazy_extractors.py'
         try:
-            subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
+            subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', lazy_extractors], cwd=rootDir, stdout=_DEV_NULL)
             subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
         finally:
-            try:
-                os.remove('youtube_dl/extractor/lazy_extractors.py')
-            except (IOError, OSError):
-                pass
+            for x in ['', 'c'] if sys.version_info[0] < 3 else ['']:
+                try:
+                    os.remove(lazy_extractors + x)
+                except (IOError, OSError):
+                    pass
 
 
 if __name__ == '__main__':
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 6c1cfe7f2..6c70a98d1 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -14,12 +14,11 @@ from ..compat import (
     compat_chr,
     compat_HTTPError,
     compat_map as map,
-    compat_parse_qs,
     compat_str,
+    compat_urllib_parse,
+    compat_urllib_parse_parse_qs as compat_parse_qs,
     compat_urllib_parse_unquote_plus,
-    compat_urllib_parse_urlencode,
     compat_urllib_parse_urlparse,
-    compat_urlparse,
 )
 from ..jsinterp import JSInterpreter
 from ..utils import (
@@ -33,6 +32,7 @@ from ..utils import (
     mimetype2ext,
     parse_codecs,
     parse_duration,
+    parse_qs,
     qualities,
     remove_start,
     smuggle_url,
@@ -50,10 +50,6 @@ from ..utils import (
 )
 
 
-def parse_qs(url):
-    return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
-
-
 class YoutubeBaseInfoExtractor(InfoExtractor):
     """Provide base functions for Youtube extractors"""
     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
@@ -636,6 +632,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
                 'duration': 142,
                 'uploader': 'The Witcher',
+                'uploader_id': 'WitcherGame',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
                 'upload_date': '20140605',
                 'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
                 'age_limit': 18,
@@ -671,7 +669,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             },
         },
         {
-            'note': 'Age-gated video embedable only with clientScreen=EMBED',
+            'note': 'Age-gated video embeddable only with clientScreen=EMBED',
             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
             'info_dict': {
                 'id': 'Tq92D6wQ1mg',
@@ -1392,11 +1390,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
     @classmethod
     def suitable(cls, url):
-        # Hack for lazy extractors until more generic solution is implemented
-        # (see #28780)
-        from .youtube import parse_qs
-        qs = parse_qs(url)
-        if qs.get('list', [None])[0]:
+        if parse_qs(url).get('list', [None])[0]:
             return False
         return super(YoutubeIE, cls).suitable(url)
 
@@ -1546,7 +1540,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         if player_url.startswith('//'):
             player_url = 'https:' + player_url
         elif not re.match(r'https?://', player_url):
-            player_url = compat_urlparse.urljoin(
+            player_url = compat_urllib_parse.urljoin(
                 'https://www.youtube.com', player_url)
         return player_url
 
@@ -1628,9 +1622,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
     def _unthrottle_format_urls(self, video_id, player_url, formats):
         for fmt in formats:
-            parsed_fmt_url = compat_urlparse.urlparse(fmt['url'])
-            qs = compat_urlparse.parse_qs(parsed_fmt_url.query)
-            n_param = qs.get('n')
+            parsed_fmt_url = compat_urllib_parse.urlparse(fmt['url'])
+            n_param = compat_parse_qs(parsed_fmt_url.query).get('n')
             if not n_param:
                 continue
             n_param = n_param[-1]
@@ -1638,9 +1631,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if n_response is None:
                 # give up if descrambling failed
                 break
-            qs['n'] = [n_response]
-            fmt['url'] = compat_urlparse.urlunparse(
-                parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+            fmt['url'] = update_url(
+                parsed_fmt_url, query_update={'n': [n_response]})
 
     # from yt-dlp, with tweaks
     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
@@ -1669,20 +1661,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
         if not playback_url:
             return
-        parsed_playback_url = compat_urlparse.urlparse(playback_url)
-        qs = compat_urlparse.parse_qs(parsed_playback_url.query)
 
         # cpn generation algorithm is reverse engineered from base.js.
         # In fact it works even with dummy cpn.
         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
 
-        qs.update({
-            'ver': ['2'],
-            'cpn': [cpn],
-        })
-        playback_url = compat_urlparse.urlunparse(
-            parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+        playback_url = update_url(
+            playback_url, query_update={
+                'ver': ['2'],
+                'cpn': [cpn],
+            })
 
         self._download_webpage(
             playback_url, video_id, 'Marking watched',
@@ -2075,9 +2064,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         thumbnails = []
         for container in (video_details, microformat):
-            for thumbnail in (try_get(
+            for thumbnail in try_get(
                     container,
-                    lambda x: x['thumbnail']['thumbnails'], list) or []):
+                    lambda x: x['thumbnail']['thumbnails'], list) or []:
                 thumbnail_url = url_or_none(thumbnail.get('url'))
                 if not thumbnail_url:
                     continue
@@ -3287,11 +3276,7 @@ class YoutubePlaylistIE(InfoExtractor):
     def suitable(cls, url):
         if YoutubeTabIE.suitable(url):
             return False
-        # Hack for lazy extractors until more generic solution is implemented
-        # (see #28780)
-        from .youtube import parse_qs
-        qs = parse_qs(url)
-        if qs.get('v', [None])[0]:
+        if parse_qs(url).get('v', [None])[0]:
             return False
         return super(YoutubePlaylistIE, cls).suitable(url)
 
@@ -3430,9 +3415,9 @@ class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
     }]
 
     def _real_extract(self, url):
-        qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
-        query = (qs.get('search_query') or qs.get('q'))[0]
-        params = qs.get('sp', ('',))[0]
+        qs = parse_qs(url)
+        query = (qs.get('search_query') or qs.get('q'))[-1]
+        params = qs.get('sp', ('',))[-1]
         return self.playlist_result(self._search_results(query, params), query, query)
 
 

From e8198c517b70301dd5a459927b5d5976304d6482 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 8 Feb 2023 18:16:51 +0000
Subject: [PATCH 025/156] [YouTube] Fix tests

---
 youtube_dl/extractor/youtube.py | 55 ++++++++++++++++++++++++++++-----
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 6c70a98d1..ba0f5c8b6 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -27,6 +27,8 @@ from ..utils import (
     dict_get,
     error_to_compat_str,
     float_or_none,
+    extract_attributes,
+    get_element_by_attribute,
     int_or_none,
     js_to_json,
     mimetype2ext,
@@ -38,6 +40,7 @@ from ..utils import (
     smuggle_url,
     str_or_none,
     str_to_int,
+    traverse_obj,
     try_get,
     unescapeHTML,
     unified_strdate,
@@ -656,6 +659,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
                 'duration': 177,
                 'uploader': 'FlyingKitty',
+                'uploader_id': 'FlyingKitty900',
                 'upload_date': '20200408',
                 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
                 'age_limit': 18,
@@ -678,6 +682,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:17eccca93a786d51bc67646756894066',
                 'duration': 106,
                 'uploader': 'Projekt Melody',
+                'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
                 'upload_date': '20191227',
                 'age_limit': 18,
                 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
@@ -929,16 +934,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'lsguqyKfVQg',
                 'ext': 'mp4',
                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
-                'alt_title': 'Dark Walk - Position Music',
+                'alt_title': 'Dark Walk',
                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
                 'duration': 133,
                 'upload_date': '20151119',
                 'uploader_id': 'IronSoulElf',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
                 'uploader': 'IronSoulElf',
-                'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
-                'track': 'Dark Walk - Position Music',
-                'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
+                'creator': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
+                'track': 'Dark Walk',
+                'artist': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
             },
             'params': {
@@ -2091,7 +2096,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             or microformat.get('lengthSeconds')) \
             or parse_duration(search_meta('duration'))
         is_live = video_details.get('isLive')
-        owner_profile_url = microformat.get('ownerProfileUrl')
+
+        def gen_owner_profile_url():
+            yield microformat.get('ownerProfileUrl')
+            yield extract_attributes(self._search_regex(
+                r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')url\2[^>]*>)''',
+                get_element_by_attribute('itemprop', 'author', webpage),
+                'owner_profile_url', default='')).get('href')
+
+        owner_profile_url = next(
+            (x for x in map(url_or_none, gen_owner_profile_url()) if x),
+            None)
 
         if not player_url:
             player_url = self._extract_player_url(webpage)
@@ -2176,6 +2191,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         info[d_k] = parse_duration(query[k][0])
 
         if video_description:
+            # Youtube Music Auto-generated description
             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
             if mobj:
                 release_year = mobj.group('release_year')
@@ -2250,7 +2266,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
                             info['location'] = stl
                         else:
-                            mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
+                            # •? doesn't match, but [•]? does; \xa0 = non-breaking space
+                            mobj = re.search(r'([^\xa0\s].*?)[\xa0\s]*S(\d+)[\xa0\s]*[•]?[\xa0\s]*E(\d+)', stl)
                             if mobj:
                                 info.update({
                                     'series': mobj.group(1),
@@ -2261,7 +2278,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             vpir,
                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
                             list) or []):
-                        tbr = tlb.get('toggleButtonRenderer') or {}
+                        tbr = traverse_obj(tlb, ('segmentedLikeDislikeButtonRenderer', 'likeButton', 'toggleButtonRenderer'), 'toggleButtonRenderer') or {}
                         for getter, regex in [(
                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
@@ -2315,6 +2332,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             elif mrr_title == 'Song':
                                 info['track'] = mrr_contents_text
 
+            # this is not extraction but spelunking!
+            carousel_lockups = traverse_obj(
+                initial_data,
+                ('engagementPanels', Ellipsis, 'engagementPanelSectionListRenderer',
+                 'content', 'structuredDescriptionContentRenderer', 'items', Ellipsis,
+                 'videoDescriptionMusicSectionRenderer', 'carouselLockups', Ellipsis),
+                expected_type=dict) or []
+            # try to reproduce logic from metadataRowContainerRenderer above (if it still is)
+            fields = (('ALBUM', 'album'), ('ARTIST', 'artist'), ('SONG', 'track'), ('LICENSES', 'license'))
+            # multiple_songs ?
+            if len(carousel_lockups) > 1:
+                fields = fields[-1:]
+            for info_row in traverse_obj(
+                    carousel_lockups,
+                    (0, 'carouselLockupRenderer', 'infoRows', Ellipsis, 'infoRowRenderer'),
+                    expected_type=dict):
+                row_title = traverse_obj(info_row, ('title', 'simpleText'))
+                row_text = traverse_obj(info_row, 'defaultMetadata', 'expandedMetadata', expected_type=get_text)
+                if not row_text:
+                    continue
+                for name, field in fields:
+                    if name == row_title and not info.get(field):
+                        info[field] = row_text
+
         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
             v = info.get(s_k)
             if v:

From f33923cba7670ea2e82f233c1f88210eb41f7c3b Mon Sep 17 00:00:00 2001
From: Valentin Metz <31850924+Valentin-Metz@users.noreply.github.com>
Date: Thu, 9 Feb 2023 12:25:28 +0100
Subject: [PATCH 026/156] [rbgtum] Add new extractor (#31305)

* [rbgtum] Add new extractor

* Small update, force CI

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |  4 ++
 youtube_dl/extractor/rbgtum.py     | 97 ++++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 youtube_dl/extractor/rbgtum.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 96b27b179..dfaef0cc3 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1010,6 +1010,10 @@ from .raywenderlich import (
     RayWenderlichIE,
     RayWenderlichCourseIE,
 )
+from .rbgtum import (
+    RbgTumIE,
+    RbgTumCourseIE,
+)
 from .rbmaradio import RBMARadioIE
 from .rds import RDSIE
 from .redbulltv import (
diff --git a/youtube_dl/extractor/rbgtum.py b/youtube_dl/extractor/rbgtum.py
new file mode 100644
index 000000000..da48ebbc4
--- /dev/null
+++ b/youtube_dl/extractor/rbgtum.py
@@ -0,0 +1,97 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class RbgTumIE(InfoExtractor):
+    _VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
+    _TESTS = [{
+        # Combined view
+        'url': 'https://live.rbg.tum.de/w/cpp/22128',
+        'md5': '53a5e7b3e07128e33bbf36687fe1c08f',
+        'info_dict': {
+            'id': 'cpp/22128',
+            'ext': 'mp4',
+            'title': 'Lecture: October 18. 2022',
+            'series': 'Concepts of C++ programming (IN2377)',
+        }
+    }, {
+        # Presentation only
+        'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES',
+        'md5': '36c584272179f3e56b0db5d880639cba',
+        'info_dict': {
+            'id': 'I2DL/12349/PRES',
+            'ext': 'mp4',
+            'title': 'Lecture 3: Introduction to Neural Networks',
+            'series': 'Introduction to Deep Learning (IN2346)',
+        }
+    }, {
+        # Camera only
+        'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM',
+        'md5': 'e04189d92ff2f56aedf5cede65d37aad',
+        'info_dict': {
+            'id': 'fvv-info/16130/CAM',
+            'ext': 'mp4',
+            'title': 'Fachschaftsvollversammlung',
+            'series': 'Fachschaftsvollversammlung Informatik',
+        }
+    }, ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
+        lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
+        lecture_series_title = self._html_search_regex(
+            r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')
+
+        formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': lecture_title,
+            'series': lecture_series_title,
+            'formats': formats,
+        }
+
+
+class RbgTumCourseIE(InfoExtractor):
+    _VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
+    _TESTS = [{
+        'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
+        'info_dict': {
+            'title': 'Funktionale Programmierung und Verifikation (IN0003)',
+            'id': '2022/S/fpv',
+        },
+        'params': {
+            'noplaylist': False,
+        },
+        'playlist_count': 13,
+    }, {
+        'url': 'https://live.rbg.tum.de/course/2022/W/set',
+        'info_dict': {
+            'title': 'SET FSMPIC',
+            'id': '2022/W/set',
+        },
+        'params': {
+            'noplaylist': False,
+        },
+        'playlist_count': 6,
+    }, ]
+
+    def _real_extract(self, url):
+        course_id = self._match_id(url)
+        webpage = self._download_webpage(url, course_id)
+
+        lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
+
+        lecture_urls = []
+        for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
+            lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))
+
+        return self.playlist_result(lecture_urls, course_id, lecture_series_title)

From 33db85c571304bbd6863e3407ad8d08764c9e53b Mon Sep 17 00:00:00 2001
From: teddy171 <teddy171@qq.com>
Date: Fri, 10 Feb 2023 04:19:27 +0800
Subject: [PATCH 027/156] [feat]: Add support to external downloader aria2p
 (#31500)

* feat: add class Aria2pFD

* feat: create call_downloader function

* feat: a colorful download interface to aria2pFD

* feat: change value name

* Apply suggestions from code review

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Typo in suggestion

* fix: remove unused value

* fix: add not function to return value(0 is normal); add total_seconds to download.eta(timedelta object); add waiting status when hook progress

* fix: remove unuse method ..utils.format_bytes

* fix: be up to flake8

* fix: be up to flake8

* Apply suggestions from code review

* [feat] test external downloader aria2p

* [feat] test external downloader aria2p

* [fix] test_external_downloader.py

* Apply suggestions from code review

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Apply suggestions from code review

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Update test/test_external_downloader.py

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Update test/test_external_downloader.py

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Update youtube_dl/downloader/external.py

Co-authored-by: dirkf <fieldhouse@gmx.net>

* refactoring code and fix bugs

* Apply suggestions from code review

* Rename test_external_downloader.py to test_downloader_external.py

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 test/helper.py                    |  11 +++
 test/test_downloader_external.py  | 115 ++++++++++++++++++++++++++++++
 test/test_downloader_http.py      |  17 ++---
 test/test_http.py                 |  16 ++---
 youtube_dl/downloader/external.py |  58 +++++++++++++++
 5 files changed, 193 insertions(+), 24 deletions(-)
 create mode 100644 test/test_downloader_external.py

diff --git a/test/helper.py b/test/helper.py
index c6a2f0667..883b2e877 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -89,6 +89,17 @@ class FakeYDL(YoutubeDL):
         self.report_warning = types.MethodType(report_warning, self)
 
 
+class FakeLogger(object):
+    def debug(self, msg):
+        pass
+
+    def warning(self, msg):
+        pass
+
+    def error(self, msg):
+        pass
+
+
 def gettestcases(include_onlymatching=False):
     for ie in youtube_dl.extractor.gen_extractors():
         for tc in ie.get_testcases(include_onlymatching):
diff --git a/test/test_downloader_external.py b/test/test_downloader_external.py
new file mode 100644
index 000000000..c0239502b
--- /dev/null
+++ b/test/test_downloader_external.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# coding: utf-8
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import re
+import sys
+import subprocess
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import (
+    FakeLogger,
+    http_server_port,
+    try_rm,
+)
+from youtube_dl import YoutubeDL
+from youtube_dl.compat import compat_http_server
+from youtube_dl.utils import encodeFilename
+from youtube_dl.downloader.external import Aria2pFD
+import threading
+
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+TEST_SIZE = 10 * 1024
+
+
+class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+    def log_message(self, format, *args):
+        pass
+
+    def send_content_range(self, total=None):
+        range_header = self.headers.get('Range')
+        start = end = None
+        if range_header:
+            mobj = re.match(r'bytes=(\d+)-(\d+)', range_header)
+            if mobj:
+                start, end = (int(mobj.group(i)) for i in (1, 2))
+        valid_range = start is not None and end is not None
+        if valid_range:
+            content_range = 'bytes %d-%d' % (start, end)
+            if total:
+                content_range += '/%d' % total
+            self.send_header('Content-Range', content_range)
+        return (end - start + 1) if valid_range else total
+
+    def serve(self, range=True, content_length=True):
+        self.send_response(200)
+        self.send_header('Content-Type', 'video/mp4')
+        size = TEST_SIZE
+        if range:
+            size = self.send_content_range(TEST_SIZE)
+        if content_length:
+            self.send_header('Content-Length', size)
+        self.end_headers()
+        self.wfile.write(b'#' * size)
+
+    def do_GET(self):
+        if self.path == '/regular':
+            self.serve()
+        elif self.path == '/no-content-length':
+            self.serve(content_length=False)
+        elif self.path == '/no-range':
+            self.serve(range=False)
+        elif self.path == '/no-range-no-content-length':
+            self.serve(range=False, content_length=False)
+        else:
+            assert False, 'unrecognised server path'
+
+
+@unittest.skipUnless(Aria2pFD.available(), 'aria2p module not found')
+class TestAria2pFD(unittest.TestCase):
+    def setUp(self):
+        self.httpd = compat_http_server.HTTPServer(
+            ('127.0.0.1', 0), HTTPTestRequestHandler)
+        self.port = http_server_port(self.httpd)
+        self.server_thread = threading.Thread(target=self.httpd.serve_forever)
+        self.server_thread.daemon = True
+        self.server_thread.start()
+
+    def download(self, params, ep):
+        with subprocess.Popen(
+            ['aria2c', '--enable-rpc'],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL
+        ) as process:
+            if not process.poll():
+                filename = 'testfile.mp4'
+                params['logger'] = FakeLogger()
+                params['outtmpl'] = filename
+                ydl = YoutubeDL(params)
+                try_rm(encodeFilename(filename))
+                self.assertEqual(ydl.download(['http://127.0.0.1:%d/%s' % (self.port, ep)]), 0)
+                self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
+                try_rm(encodeFilename(filename))
+            process.kill()
+
+    def download_all(self, params):
+        for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
+            self.download(params, ep)
+
+    def test_regular(self):
+        self.download_all({'external_downloader': 'aria2p'})
+
+    def test_chunked(self):
+        self.download_all({
+            'external_downloader': 'aria2p',
+            'http_chunk_size': 1000,
+        })
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
index 750472281..4e6d7a2a0 100644
--- a/test/test_downloader_http.py
+++ b/test/test_downloader_http.py
@@ -9,7 +9,11 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import http_server_port, try_rm
+from test.helper import (
+    FakeLogger,
+    http_server_port,
+    try_rm,
+)
 from youtube_dl import YoutubeDL
 from youtube_dl.compat import compat_http_server
 from youtube_dl.downloader.http import HttpFD
@@ -66,17 +70,6 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
             assert False
 
 
-class FakeLogger(object):
-    def debug(self, msg):
-        pass
-
-    def warning(self, msg):
-        pass
-
-    def error(self, msg):
-        pass
-
-
 class TestHttpFD(unittest.TestCase):
     def setUp(self):
         self.httpd = compat_http_server.HTTPServer(
diff --git a/test/test_http.py b/test/test_http.py
index 3ee0a5dda..487a9bc77 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -8,7 +8,10 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import http_server_port
+from test.helper import (
+    FakeLogger,
+    http_server_port,
+)
 from youtube_dl import YoutubeDL
 from youtube_dl.compat import compat_http_server, compat_urllib_request
 import ssl
@@ -52,17 +55,6 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
             assert False
 
 
-class FakeLogger(object):
-    def debug(self, msg):
-        pass
-
-    def warning(self, msg):
-        pass
-
-    def error(self, msg):
-        pass
-
-
 class TestHTTP(unittest.TestCase):
     def setUp(self):
         self.httpd = compat_http_server.HTTPServer(
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index a06ab2e50..bffcd10b6 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -200,6 +200,64 @@ class Aria2cFD(ExternalFD):
         return cmd
 
 
+class Aria2pFD(ExternalFD):
+    ''' Aria2pFD class
+    This class support to use aria2p as downloader.
+    (Aria2p, a command-line tool and Python library to interact with an aria2c daemon process
+    through JSON-RPC.)
+    It can help you to get download progress more easily.
+    To use aria2p as downloader, you need to install aria2c and aria2p, aria2p can download with pip.
+    Then run aria2c in the background and enable with the --enable-rpc option.
+    '''
+    try:
+        import aria2p
+        __avail = True
+    except ImportError:
+        __avail = False
+
+    @classmethod
+    def available(cls):
+        return cls.__avail
+
+    def _call_downloader(self, tmpfilename, info_dict):
+        aria2 = self.aria2p.API(
+            self.aria2p.Client(
+                host='http://localhost',
+                port=6800,
+                secret=''
+            )
+        )
+
+        options = {
+            'min-split-size': '1M',
+            'max-connection-per-server': 4,
+            'auto-file-renaming': 'false',
+        }
+        options['dir'] = os.path.dirname(tmpfilename) or os.path.abspath('.')
+        options['out'] = os.path.basename(tmpfilename)
+        options['header'] = []
+        for key, val in info_dict['http_headers'].items():
+            options['header'].append('{0}: {1}'.format(key, val))
+        download = aria2.add_uris([info_dict['url']], options)
+        status = {
+            'status': 'downloading',
+            'tmpfilename': tmpfilename,
+        }
+        started = time.time()
+        while download.status in ['active', 'waiting']:
+            download = aria2.get_download(download.gid)
+            status.update({
+                'downloaded_bytes': download.completed_length,
+                'total_bytes': download.total_length,
+                'elapsed': time.time() - started,
+                'eta': download.eta.total_seconds(),
+                'speed': download.download_speed,
+            })
+            self._hook_progress(status)
+            time.sleep(.5)
+        return download.status != 'complete'
+
+
 class HttpieFD(ExternalFD):
     @classmethod
     def available(cls):

From 822f19f05d0ab1a4a945a85f691f2079f7cb3bbb Mon Sep 17 00:00:00 2001
From: fonkap <fonk666@gmail.com>
Date: Sat, 11 Feb 2023 03:37:45 +0100
Subject: [PATCH 028/156] [FileMoonIE] Add extractor for filemoon.sx (#31515)

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/filemoon.py   | 43 ++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 youtube_dl/extractor/filemoon.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index dfaef0cc3..f63a2e030 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -376,6 +376,7 @@ from .fc2 import (
     FC2EmbedIE,
 )
 from .fczenit import FczenitIE
+from .filemoon import FileMoonIE
 from .fifa import FifaIE
 from .filmon import (
     FilmOnIE,
diff --git a/youtube_dl/extractor/filemoon.py b/youtube_dl/extractor/filemoon.py
new file mode 100644
index 000000000..654df9b69
--- /dev/null
+++ b/youtube_dl/extractor/filemoon.py
@@ -0,0 +1,43 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    decode_packed_codes,
+    js_to_json,
+)
+
+
+class FileMoonIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?filemoon\.sx/./(?P<id>\w+)'
+    _TEST = {
+        'url': 'https://filemoon.sx/e/dw40rxrzruqz',
+        'md5': '5a713742f57ac4aef29b74733e8dda01',
+        'info_dict': {
+            'id': 'dw40rxrzruqz',
+            'title': 'dw40rxrzruqz',
+            'ext': 'mp4'
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        matches = re.findall(r'(?s)(eval.*?)</script>', webpage)
+        packed = matches[-1]
+        unpacked = decode_packed_codes(packed)
+        jwplayer_sources = self._parse_json(
+            self._search_regex(
+                r'(?s)player\s*\.\s*setup\s*\(\s*\{\s*sources\s*:\s*(.*?])', unpacked, 'jwplayer sources'),
+            video_id, transform_source=js_to_json)
+
+        formats = self._parse_jwplayer_formats(jwplayer_sources, video_id)
+
+        return {
+            'id': video_id,
+            'title': self._generic_title(url) or video_id,
+            'formats': formats
+        }

From de48105dd870e353af468bfb8d49b14d9894e649 Mon Sep 17 00:00:00 2001
From: fonkap <fonk666@gmail.com>
Date: Sat, 11 Feb 2023 03:47:43 +0100
Subject: [PATCH 029/156] [KommunetvIE] Add extractor for kommunetv.no (#31516)

* Add extractor for kommunetv.no
* Using utils.update_url instead of regex

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/kommunetv.py  | 35 ++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 youtube_dl/extractor/kommunetv.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index f63a2e030..d8428f46f 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -557,6 +557,7 @@ from .khanacademy import (
 from .kickstarter import KickStarterIE
 from .kinja import KinjaEmbedIE
 from .kinopoisk import KinoPoiskIE
+from .kommunetv import KommunetvIE
 from .konserthusetplay import KonserthusetPlayIE
 from .krasview import KrasViewIE
 from .kth import KTHIE
diff --git a/youtube_dl/extractor/kommunetv.py b/youtube_dl/extractor/kommunetv.py
new file mode 100644
index 000000000..91d06a74f
--- /dev/null
+++ b/youtube_dl/extractor/kommunetv.py
@@ -0,0 +1,35 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import update_url
+
+
+class KommunetvIE(InfoExtractor):
+    _VALID_URL = r'https://(\w+).kommunetv.no/archive/(?P<id>\w+)'
+    _TEST = {
+        'url': 'https://oslo.kommunetv.no/archive/921',
+        'md5': '5f102be308ee759be1e12b63d5da4bbc',
+        'info_dict': {
+            'id': '921',
+            'title': 'Bystyremøte',
+            'ext': 'mp4'
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        headers = {
+            'Accept': 'application/json'
+        }
+        data = self._download_json('https://oslo.kommunetv.no/api/streams?streamType=1&id=%s' % video_id, video_id, headers=headers)
+        title = data['stream']['title']
+        file = data['playlist'][0]['playlist'][0]['file']
+        url = update_url(file, query=None, fragment=None)
+        formats = self._extract_m3u8_formats(url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+        self._sort_formats(formats)
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': title
+        }

From 6f8c2635a573c84ef66c02f73b4aeff1cc36ae4e Mon Sep 17 00:00:00 2001
From: fonkap <fonk666@gmail.com>
Date: Sat, 11 Feb 2023 03:54:45 +0100
Subject: [PATCH 030/156] [StreamsbIE] Add extractor for streamsb.com
 (viewsb.com) (#31517)

* Add extractor for streamsb.com (viewsb.com)

* make data url using app.js version

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/streamsb.py   | 61 ++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 youtube_dl/extractor/streamsb.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index d8428f46f..3a87f9e33 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1206,6 +1206,7 @@ from .storyfire import (
 from .streamable import StreamableIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
+from .streamsb import StreamsbIE
 from .streetvoice import StreetVoiceIE
 from .stretchinternet import StretchInternetIE
 from .stv import STVPlayerIE
diff --git a/youtube_dl/extractor/streamsb.py b/youtube_dl/extractor/streamsb.py
new file mode 100644
index 000000000..bffcb3de1
--- /dev/null
+++ b/youtube_dl/extractor/streamsb.py
@@ -0,0 +1,61 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import binascii
+import random
+import re
+import string
+
+from .common import InfoExtractor
+from ..utils import urljoin, url_basename
+
+
+def to_ascii_hex(str1):
+    return binascii.hexlify(str1.encode('utf-8')).decode('ascii')
+
+
+def generate_random_string(length):
+    return ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(length))
+
+
+class StreamsbIE(InfoExtractor):
+    _DOMAINS = ('viewsb.com', )
+    _VALID_URL = r'https://(?P<domain>%s)/(?P<id>.+)' % '|'.join(_DOMAINS)
+    _TEST = {
+        'url': 'https://viewsb.com/dxfvlu4qanjx',
+        'md5': '488d111a63415369bf90ea83adc8a325',
+        'info_dict': {
+            'id': 'dxfvlu4qanjx',
+            'ext': 'mp4',
+            'title': 'Sintel'
+        }
+    }
+
+    def _real_extract(self, url):
+        domain, video_id = re.match(self._VALID_URL, url).group('domain', 'id')
+        webpage = self._download_webpage(url, video_id)
+
+        iframe_rel_url = self._search_regex(r'''(?i)<iframe\b[^>]+\bsrc\s*=\s*('|")(?P<path>/.*\.html)\1''', webpage, 'iframe', group='path')
+        iframe_url = urljoin('https://' + domain, iframe_rel_url)
+
+        iframe_data = self._download_webpage(iframe_url, video_id)
+        app_version = self._search_regex(r'''<script\b[^>]+\bsrc\s*=\s*["|'].*/app\.min\.(\d+)\.js''', iframe_data, 'app version', fatal=False) or '50'
+
+        video_code = url_basename(iframe_url).rsplit('.')[0]
+
+        length = 12
+        req = '||'.join((generate_random_string(length), video_code, generate_random_string(length), 'streamsb'))
+        ereq = 'https://{0}/sources{1}/{2}'.format(domain, app_version, to_ascii_hex(req))
+
+        video_data = self._download_webpage(ereq, video_id, headers={
+            'Referer': iframe_url,
+            'watchsb': 'sbstream',
+        })
+        player_data = self._parse_json(video_data, video_id)
+        title = player_data['stream_data']['title']
+        formats = self._extract_m3u8_formats(player_data['stream_data']['file'], video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': title,
+        }

From 42b098dd79e91295376ca98f394876555481a3eb Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 14 Feb 2023 02:47:09 +0000
Subject: [PATCH 031/156] [InfoExtractor] Handle unquoted values in OpenGraph
 searches

---
 test/test_InfoExtractor.py     | 2 ++
 youtube_dl/extractor/common.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index dd69a681b..4db5c93f1 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -62,6 +62,7 @@ class TestInfoExtractor(unittest.TestCase):
             <meta name="og:test1" content='foo > < bar'/>
             <meta name="og:test2" content="foo >//< bar"/>
             <meta property=og-test3 content='Ill-formatted opengraph'/>
+            <meta property=og:test4 content=unquoted-value/>
             '''
         self.assertEqual(ie._og_search_title(html), 'Foo')
         self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
@@ -74,6 +75,7 @@ class TestInfoExtractor(unittest.TestCase):
         self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
         self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
         self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
+        self.assertEqual(ie._og_search_property('test4', html), 'unquoted-value')
 
     def test_html_search_meta(self):
         ie = self.ie
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index a0a796d7b..7244e5df6 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1087,7 +1087,7 @@ class InfoExtractor(object):
     # Helper functions for extracting OpenGraph info
     @staticmethod
     def _og_regexes(prop):
-        content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
+        content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?)(?=\s|/?>))'
         property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
                        % {'prop': re.escape(prop)})
         template = r'<meta[^>]+?%s[^>]+?%s'

From dd9aa74beefc179f943051c4e19eecad87ab1124 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 14 Feb 2023 16:33:01 +0000
Subject: [PATCH 032/156] [test] Avoid name TestIE which causes a pytest
 warning

See: https://github.com/yt-dlp/yt-dlp/commit/060ac76257a8c1f7370a8a571821c1d73377701f
---
 test/test_InfoExtractor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 4db5c93f1..6d25441db 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -35,13 +35,13 @@ class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler)
             assert False
 
 
-class TestIE(InfoExtractor):
+class DummyIE(InfoExtractor):
     pass
 
 
 class TestInfoExtractor(unittest.TestCase):
     def setUp(self):
-        self.ie = TestIE(FakeYDL())
+        self.ie = DummyIE(FakeYDL())
 
     def test_ie_key(self):
         self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)

From 2dd6c6edd8e0fc5e45865b8e6d865e35147de772 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 17 Feb 2023 11:16:54 +0000
Subject: [PATCH 033/156] [YouTube] Avoid crash if uploader_id extraction fails

See #31530.
---
 youtube_dl/extractor/youtube.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index ba0f5c8b6..66b0257df 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2122,7 +2122,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 microformat.get('uploadDate')
                 or search_meta('uploadDate')),
             'uploader': video_details['author'],
-            'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
+            'uploader_id': self._search_regex(
+                r'/(?:channel|user)/([^/?&#]+)', owner_profile_url,
+                'uploader id', fatal=False) if owner_profile_url else None,
             'uploader_url': owner_profile_url,
             'channel_id': channel_id,
             'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,

From 57802e632f5a741df6fd9b30a455c32632944489 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 19 Feb 2023 13:47:49 +0000
Subject: [PATCH 034/156] [jsinterp] Fix dict comprehension for Py2.6

Resolves #31600
---
 youtube_dl/jsinterp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 60fa2b1b9..a3bc42a61 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -262,7 +262,7 @@ class JSInterpreter(object):
         if not expr:
             return
         # collections.Counter() is ~10% slower in both 2.7 and 3.9
-        counters = {k: 0 for k in _MATCHING_PARENS.values()}
+        counters = dict((k, 0) for k in _MATCHING_PARENS.values())
         start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
         in_quote, escaping, skipping = None, False, 0
         after_op, in_regex_char_group, skip_re = True, False, 0

From 6067451e432fb65d487a8a67bb5cff52efb9ccf4 Mon Sep 17 00:00:00 2001
From: df <fieldhouse@gmx.net>
Date: Mon, 20 Feb 2023 01:41:46 +0000
Subject: [PATCH 035/156] [Vimeo] Fix e19ec52 for tween-age Pythons

* a check in older Pythons in the 2.7 and earlier, 3.3, 3.4 series caused "sre_constants.error: nothing to repeat"
* satisfy the check by avoiding nested qualifiers that can match empty string

Resolves #31597
---
 youtube_dl/extractor/vimeo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 7f2731d83..8e1a805f6 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -286,7 +286,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                              /(?!videos|likes)[^/?#]+/?|
                              (?(q)|/(?P<unlisted_hash>[\da-f]{10}))?
                          )
-                         (?:(?(q)[&]|(?(u)|/?)[?]).*?)?(?:[#].*)?$
+                         (?:(?(q)[&]|(?(u)|/?)[?]).+?)?(?:[#].*)?$
                  '''
     IE_NAME = 'vimeo'
     _TESTS = [

From 1d3751c3fe50b203d3e2bff71d866c8c500f8288 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Tue, 1 Jun 2021 18:05:41 +0530
Subject: [PATCH 036/156] Escape URLs in `sanitized_Request`, not
 `sanitize_url` d2558234cf5dd12d6896eed5427b7dcdb3ab7b5a added escaping of
 URLs while sanitizing. However, `sanitize_url` may not always receive an
 actual URL. Eg: When using `youtube-dl "search query" --default-search
 ytsearch`, `search query` gets escaped to `search%20query` before being
 prefixed with `ytsearch:` which is not the intended behavior. So the escaping
 is moved to `sanitized_Request` instead.

---
 test/test_utils.py              |  1 +
 youtube_dl/extractor/generic.py | 19 +++++++++++++++++++
 youtube_dl/utils.py             |  4 ++--
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index 9d364c863..ea2b96ed2 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -250,6 +250,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
         self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
         self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
+        self.assertEqual(sanitize_url('foo bar'), 'foo bar')
 
     def test_expand_path(self):
         def env(var):
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 0e473e952..b01900afa 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -2320,6 +2320,25 @@ class GenericIE(InfoExtractor):
                 'height': 720,
                 'age_limit': 18,
             },
+        }, {
+            # would like to use the yt-dl test video but searching for
+            # '"\'/\\ä↭𝕐' fails, so using an old vid from YouTube Korea
+            'note': 'Test default search',
+            'url': 'Shorts로 허락 필요없이 놀자! (BTS편)',
+            'info_dict': {
+                'id': 'usDGO4Zb-dc',
+                'ext': 'mp4',
+                'title': 'YouTube Shorts로 허락 필요없이 놀자! (BTS편)',
+                'description': 'md5:96e31607eba81ab441567b5e289f4716',
+                'upload_date': '20211107',
+                'uploader': 'YouTube Korea',
+                'location': '대한민국',
+            },
+            'params': {
+                'default_search': 'ytsearch',
+                'skip_download': True,
+            },
+            'expected_warnings': ['uploader id'],
         },
     ]
 
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 4edbfa27b..761edcd49 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2176,11 +2176,11 @@ def sanitize_url(url):
     for mistake, fixup in COMMON_TYPOS:
         if re.match(mistake, url):
             return re.sub(mistake, fixup, url)
-    return escape_url(url)
+    return url
 
 
 def sanitized_Request(url, *args, **kwargs):
-    return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
+    return compat_urllib_request.Request(escape_url(sanitize_url(url)), *args, **kwargs)
 
 
 def expand_path(s):

From e67e52a8f8fd7e76253e416da76570af8da200d0 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 24 Feb 2023 02:32:40 +0000
Subject: [PATCH 037/156] [test] Support test-case with volatile ID (eg live
 show)

Signalled by regexp ID value, eg: `'id': r're:[\da-zA-Z_-]{8,}'`
---
 test/test_download.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/test/test_download.py b/test/test_download.py
index 19936969f..d50008307 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -148,6 +148,7 @@ def generator(test_case, tname):
                 try_rm(tc_filename)
                 try_rm(tc_filename + '.part')
                 try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
+
         try_rm_tcs_files()
         try:
             try_num = 1
@@ -213,7 +214,15 @@ def generator(test_case, tname):
                 # First, check test cases' data against extracted data alone
                 expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
                 # Now, check downloaded file consistency
+                # support test-case with volatile ID, signalled by regexp value
+                if tc.get('info_dict', {}).get('id', '').startswith('re:'):
+                    test_id = tc['info_dict']['id']
+                    tc['info_dict']['id'] = tc_res_dict['id']
+                else:
+                    test_id = None
                 tc_filename = get_tc_filename(tc)
+                if test_id:
+                    tc['info_dict']['id'] = test_id
                 if not test_case.get('params', {}).get('skip_download', False):
                     self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
                     self.assertTrue(tc_filename in finished_hook_called)

From f7ce98a21e15cb094c772e9082796d009c61578b Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 24 Feb 2023 02:48:37 +0000
Subject: [PATCH 038/156] [YouTube] Support @owner format in uploader_id etc

* implement https://github.com/ytdl-org/youtube-dl/issues/31530#issuecomment-1435734719
* update affected tests
* misc clean-ups
---
 youtube_dl/extractor/youtube.py | 319 +++++++++++++++++++-------------
 1 file changed, 194 insertions(+), 125 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 66b0257df..4246d84f9 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -31,6 +31,7 @@ from ..utils import (
     get_element_by_attribute,
     int_or_none,
     js_to_json,
+    merge_dicts,
     mimetype2ext,
     parse_codecs,
     parse_duration,
@@ -400,6 +401,62 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                 break
             data['continuation'] = token
 
+    @staticmethod
+    def _owner_endpoints_path():
+        return [
+            Ellipsis,
+            lambda k, _: k.endswith('SecondaryInfoRenderer'),
+            ('owner', 'videoOwner'), 'videoOwnerRenderer', 'title',
+            'runs', Ellipsis]
+
+    def _extract_channel_id(self, webpage, videodetails={}, metadata={}, renderers=[]):
+        channel_id = None
+        if any((videodetails, metadata, renderers)):
+            channel_id = (
+                traverse_obj(videodetails, 'channelId')
+                or traverse_obj(metadata, 'externalChannelId', 'externalId')
+                or traverse_obj(renderers,
+                                self._owner_endpoints_path() + [
+                                    'navigationEndpoint', 'browseEndpoint', 'browseId'],
+                                get_all=False)
+            )
+        return channel_id or self._html_search_meta(
+            'channelId', webpage, 'channel id', default=None)
+
+    def _extract_author_var(self, webpage, var_name,
+                            videodetails={}, metadata={}, renderers=[]):
+        result = None
+        paths = {
+            #       (HTML, videodetails, metadata, renderers)
+            'name': ('content', 'author', (('ownerChannelName', None), 'title'), ['text']),
+            'url': ('href', 'ownerProfileUrl', 'vanityChannelUrl',
+                    ['navigationEndpoint', 'browseEndpoint', 'canonicalBaseUrl'])
+        }
+        if any((videodetails, metadata, renderers)):
+            result = (
+                traverse_obj(videodetails, paths[var_name][1], get_all=False)
+                or traverse_obj(metadata, paths[var_name][2], get_all=False)
+                or traverse_obj(renderers,
+                                self._owner_endpoints_path() + paths[var_name][3],
+                                get_all=False)
+            )
+        return result or traverse_obj(
+            extract_attributes(self._search_regex(
+                r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')%s\2[^>]*>)'''
+                % re.escape(var_name),
+                get_element_by_attribute('itemprop', 'author', webpage) or '',
+                'author link', default='')),
+            paths[var_name][0])
+
+    @staticmethod
+    def _yt_urljoin(url_or_path):
+        return urljoin('https://www.youtube.com', url_or_path)
+
+    def _extract_uploader_id(self, uploader_url):
+        return self._search_regex(
+            r'/(?:(?:channel|user)/|(?=@))([^/?&#]+)', uploader_url or '',
+            'uploader id', default=None)
+
 
 class YoutubeIE(YoutubeBaseInfoExtractor):
     IE_DESC = 'YouTube.com'
@@ -516,8 +573,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
                 'uploader': 'Philipp Hagemeister',
-                'uploader_id': 'phihag',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
+                'uploader_id': '@PhilippHagemeister',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@PhilippHagemeister',
                 'channel': 'Philipp Hagemeister',
                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
@@ -557,8 +614,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
                 'uploader': 'Philipp Hagemeister',
-                'uploader_id': 'phihag',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
+                'uploader_id': '@PhilippHagemeister',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@PhilippHagemeister',
                 'upload_date': '20121002',
                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
                 'categories': ['Science & Technology'],
@@ -588,7 +645,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'youtube_include_dash_manifest': True,
                 'format': '141',
             },
-            'skip': 'format 141 not served anymore',
+            'skip': 'format 141 not served any more',
         },
         # DASH manifest with encrypted signature
         {
@@ -600,7 +657,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
                 'duration': 244,
                 'uploader': 'AfrojackVEVO',
-                'uploader_id': 'AfrojackVEVO',
+                'uploader_id': '@AfrojackVEVO',
                 'upload_date': '20131011',
                 'abr': 129.495,
             },
@@ -618,8 +675,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'duration': 219,
                 'upload_date': '20100909',
                 'uploader': 'Amazing Atheist',
-                'uploader_id': 'TheAmazingAtheist',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
+                'uploader_id': '@theamazingatheist',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@theamazingatheist',
                 'title': 'Burning Everyone\'s Koran',
                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
             }
@@ -635,8 +692,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
                 'duration': 142,
                 'uploader': 'The Witcher',
-                'uploader_id': 'WitcherGame',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
+                'uploader_id': '@thewitcher',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@thewitcher',
                 'upload_date': '20140605',
                 'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
                 'age_limit': 18,
@@ -659,7 +716,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
                 'duration': 177,
                 'uploader': 'FlyingKitty',
-                'uploader_id': 'FlyingKitty900',
+                'uploader_id': '@FlyingKitty900',
                 'upload_date': '20200408',
                 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
                 'age_limit': 18,
@@ -682,7 +739,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:17eccca93a786d51bc67646756894066',
                 'duration': 106,
                 'uploader': 'Projekt Melody',
-                'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
+                'uploader_id': '@ProjektMelody',
                 'upload_date': '20191227',
                 'age_limit': 18,
                 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
@@ -704,10 +761,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
                 'duration': 210,
-                'uploader': 'Herr Lurik',
-                'uploader_id': 'st3in234',
                 'upload_date': '20130730',
-                'uploader_url': 'http://www.youtube.com/user/st3in234',
+                'uploader': 'Herr Lurik',
+                'uploader_id': '@HerrLurik',
+                'uploader_url': 'http://www.youtube.com/@HerrLurik',
                 'age_limit': 0,
                 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/hqdefault.jpg',
                 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
@@ -740,8 +797,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'duration': 266,
                 'upload_date': '20100430',
-                'uploader_id': 'deadmau5',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
+                'uploader_id': '@deadmau5',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@deadmau5',
                 'creator': 'deadmau5',
                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
                 'uploader': 'deadmau5',
@@ -762,8 +819,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': r're:(?s)(?:.+\s)?HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games\s*',
                 'duration': 6085,
                 'upload_date': '20150827',
-                'uploader_id': 'olympic',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
+                'uploader_id': '@Olympics',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@Olympics',
                 'uploader': r're:Olympics?',
                 'age_limit': 0,
                 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
@@ -785,8 +842,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'stretched_ratio': 16 / 9.,
                 'duration': 85,
                 'upload_date': '20110310',
-                'uploader_id': 'AllenMeow',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
+                'uploader_id': '@AllenMeow',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@AllenMeow',
                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
                 'uploader': '孫ᄋᄅ',
                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
@@ -824,7 +881,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'dorappi2000',
                 'formats': 'mincount:31',
             },
-            'skip': 'not actual anymore',
+            'skip': 'not actual any more',
         },
         # DASH manifest with segment_list
         {
@@ -905,6 +962,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'params': {
                 'skip_download': True,
             },
+            'skip': 'Not multifeed any more',
         },
         {
             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
@@ -914,7 +972,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
             },
             'playlist_count': 2,
-            'skip': 'Not multifeed anymore',
+            'skip': 'Not multifeed any more',
         },
         {
             'url': 'https://vid.plus/FlRa-iH7PGw',
@@ -938,8 +996,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
                 'duration': 133,
                 'upload_date': '20151119',
-                'uploader_id': 'IronSoulElf',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
+                'uploader_id': '@IronSoulElf',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@IronSoulElf',
                 'uploader': 'IronSoulElf',
                 'creator': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
                 'track': 'Dark Walk',
@@ -987,8 +1045,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
                 'duration': 721,
                 'upload_date': '20150127',
-                'uploader_id': 'BerkmanCenter',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
+                'uploader_id': '@BKCHarvard',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@BKCHarvard',
                 'uploader': 'The Berkman Klein Center for Internet & Society',
                 'license': 'Creative Commons Attribution license (reuse allowed)',
             },
@@ -1007,8 +1065,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'duration': 4060,
                 'upload_date': '20151119',
                 'uploader': 'Bernie Sanders',
-                'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
+                'uploader_id': '@BernieSanders',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@BernieSanders',
                 'license': 'Creative Commons Attribution license (reuse allowed)',
             },
             'params': {
@@ -1054,8 +1112,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'duration': 2085,
                 'upload_date': '20170118',
                 'uploader': 'Vsauce',
-                'uploader_id': 'Vsauce',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
+                'uploader_id': '@Vsauce',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@Vsauce',
                 'series': 'Mind Field',
                 'season_number': 1,
                 'episode_number': 1,
@@ -1134,7 +1192,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'skip_download': True,
                 'youtube_include_dash_manifest': False,
             },
-            'skip': 'not actual anymore',
+            'skip': 'not actual any more',
         },
         {
             # Youtube Music Auto-generated description
@@ -1191,8 +1249,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'IMG 3456',
                 'description': '',
                 'upload_date': '20170613',
-                'uploader_id': 'ElevageOrVert',
                 'uploader': 'ElevageOrVert',
+                'uploader_id': '@ElevageOrVert',
             },
             'params': {
                 'skip_download': True,
@@ -1210,8 +1268,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'Part 77   Sort a list of simple types in c#',
                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
                 'upload_date': '20130831',
-                'uploader_id': 'kudvenkat',
                 'uploader': 'kudvenkat',
+                'uploader_id': '@Csharp-video-tutorialsBlogspot',
             },
             'params': {
                 'skip_download': True,
@@ -1263,8 +1321,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
                 'upload_date': '20201120',
                 'uploader': 'Walk around Japan',
-                'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
+                'uploader_id': '@walkaroundjapan7124',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@walkaroundjapan7124',
             },
             'params': {
                 'skip_download': True,
@@ -1276,11 +1334,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'info_dict': {
                 'id': '4L2J27mJ3Dc',
                 'ext': 'mp4',
+                'title': 'Midwest Squid Game #Shorts',
+                'description': 'md5:976512b8a29269b93bbd8a61edc45a6d',
                 'upload_date': '20211025',
                 'uploader': 'Charlie Berens',
-                'description': 'md5:976512b8a29269b93bbd8a61edc45a6d',
-                'uploader_id': 'fivedlrmilkshake',
-                'title': 'Midwest Squid Game #Shorts',
+                'uploader_id': '@CharlieBerens',
             },
             'params': {
                 'skip_download': True,
@@ -2088,25 +2146,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 thumbnails = [{'url': thumbnail}]
 
         category = microformat.get('category') or search_meta('genre')
-        channel_id = video_details.get('channelId') \
-            or microformat.get('externalChannelId') \
-            or search_meta('channelId')
+        channel_id = self._extract_channel_id(
+            webpage, videodetails=video_details, metadata=microformat)
         duration = int_or_none(
             video_details.get('lengthSeconds')
             or microformat.get('lengthSeconds')) \
             or parse_duration(search_meta('duration'))
         is_live = video_details.get('isLive')
 
-        def gen_owner_profile_url():
-            yield microformat.get('ownerProfileUrl')
-            yield extract_attributes(self._search_regex(
-                r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')url\2[^>]*>)''',
-                get_element_by_attribute('itemprop', 'author', webpage),
-                'owner_profile_url', default='')).get('href')
+        owner_profile_url = self._yt_urljoin(self._extract_author_var(
+            webpage, 'url', videodetails=video_details, metadata=microformat))
 
-        owner_profile_url = next(
-            (x for x in map(url_or_none, gen_owner_profile_url()) if x),
-            None)
+        uploader = self._extract_author_var(
+            webpage, 'name', videodetails=video_details, metadata=microformat)
 
         if not player_url:
             player_url = self._extract_player_url(webpage)
@@ -2121,13 +2173,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'upload_date': unified_strdate(
                 microformat.get('uploadDate')
                 or search_meta('uploadDate')),
-            'uploader': video_details['author'],
-            'uploader_id': self._search_regex(
-                r'/(?:channel|user)/([^/?&#]+)', owner_profile_url,
-                'uploader id', fatal=False) if owner_profile_url else None,
-            'uploader_url': owner_profile_url,
+            'uploader': uploader,
             'channel_id': channel_id,
-            'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
             'duration': duration,
             'view_count': int_or_none(
                 video_details.get('viewCount')
@@ -2257,6 +2304,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 initial_data,
                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
                 list) or []
+            if not info['channel_id']:
+                channel_id = self._extract_channel_id('', renderers=contents)
+            if not info['uploader']:
+                info['uploader'] = self._extract_author_var('', 'name', renderers=contents)
+            if not owner_profile_url:
+                owner_profile_url = self._yt_urljoin(self._extract_author_var('', 'url', renderers=contents))
+
             for content in contents:
                 vpir = content.get('videoPrimaryInfoRenderer')
                 if vpir:
@@ -2304,10 +2358,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         })
                 vsir = content.get('videoSecondaryInfoRenderer')
                 if vsir:
-                    info['channel'] = get_text(try_get(
-                        vsir,
-                        lambda x: x['owner']['videoOwnerRenderer']['title'],
-                        dict))
                     rows = try_get(
                         vsir,
                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
@@ -2365,7 +2415,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         self.mark_watched(video_id, player_response)
 
-        return info
+        return merge_dicts(
+            info, {
+                'uploader_id': self._extract_uploader_id(owner_profile_url),
+                'uploader_url': owner_profile_url,
+                'channel_id': channel_id,
+                'channel_url': channel_id and self._yt_urljoin('/channel/' + channel_id),
+                'channel': info['uploader'],
+            })
 
 
 class YoutubeTabIE(YoutubeBaseInfoExtractor):
@@ -2394,6 +2451,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'description': 'Short clips from Super Cooper Sundays!',
             'id': 'UCKMA8kHZ8bPYpnMNaUSxfEQ',
             'title': 'Super Cooper Shorts - Shorts',
+            'uploader': 'Super Cooper Shorts',
+            'uploader_id': '@SuperCooperShorts',
         }
     }, {
         # Channel that does not have a Shorts tab. Test should just download videos on Home tab instead
@@ -2404,14 +2463,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'title': 'Emergency Awesome - Home',
         },
         'playlist_mincount': 5,
+        'skip': 'new test page needed to replace `Emergency Awesome - Shorts`',
     }, {
         # playlists, multipage
         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
         'playlist_mincount': 94,
         'info_dict': {
             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
-            'title': 'Игорь Клейнер - Playlists',
+            'title': 'Igor Kleiner - Playlists',
             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
+            'uploader': 'Igor Kleiner',
+            'uploader_id': '@IgorDataScience',
         },
     }, {
         # playlists, multipage, different order
@@ -2419,8 +2481,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         'playlist_mincount': 94,
         'info_dict': {
             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
-            'title': 'Игорь Клейнер - Playlists',
+            'title': 'Igor Kleiner - Playlists',
             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
+            'uploader': 'Igor Kleiner',
+            'uploader_id': '@IgorDataScience',
         },
     }, {
         # playlists, series
@@ -2430,6 +2494,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'id': 'UCYO_jab_esuFRV4b17AJtAw',
             'title': '3Blue1Brown - Playlists',
             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
+            'uploader': '3Blue1Brown',
+            'uploader_id': '@3blue1brown',
         },
     }, {
         # playlists, singlepage
@@ -2439,6 +2505,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
             'title': 'ThirstForScience - Playlists',
             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
+            'uploader': 'ThirstForScience',
+            'uploader_id': '@ThirstForScience',
         }
     }, {
         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
@@ -2447,20 +2515,22 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         # basic, single video playlist
         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
         'info_dict': {
-            'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
-            'uploader': 'Sergey M.',
             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
             'title': 'youtube-dl public playlist',
+            'uploader': 'Sergey M.',
+            'uploader_id': '@sergeym.6173',
+            'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
         },
         'playlist_count': 1,
     }, {
         # empty playlist
         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
         'info_dict': {
-            'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
-            'uploader': 'Sergey M.',
             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
             'title': 'youtube-dl empty playlist',
+            'uploader': 'Sergey M.',
+            'uploader_id': '@sergeym.6173',
+            'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
         },
         'playlist_count': 0,
     }, {
@@ -2470,6 +2540,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'title': 'lex will - Home',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+            'uploader': 'lex will',
+            'uploader_id': '@lexwill718',
         },
         'playlist_mincount': 2,
     }, {
@@ -2479,6 +2551,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'title': 'lex will - Videos',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+            'uploader': 'lex will',
+            'uploader_id': '@lexwill718',
         },
         'playlist_mincount': 975,
     }, {
@@ -2488,6 +2562,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'title': 'lex will - Videos',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+            'uploader': 'lex will',
+            'uploader_id': '@lexwill718',
         },
         'playlist_mincount': 199,
     }, {
@@ -2497,6 +2573,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'title': 'lex will - Playlists',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+            'uploader': 'lex will',
+            'uploader_id': '@lexwill718',
         },
         'playlist_mincount': 17,
     }, {
@@ -2506,6 +2584,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'title': 'lex will - Community',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+            'uploader': 'lex will',
+            'uploader_id': '@lexwill718',
         },
         'playlist_mincount': 18,
     }, {
@@ -2515,8 +2595,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'title': 'lex will - Channels',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+            'uploader': 'lex will',
+            'uploader_id': '@lexwill718',
         },
-        'playlist_mincount': 138,
+        'playlist_mincount': 75,
     }, {
         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
         'only_matching': True,
@@ -2533,7 +2615,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'title': '29C3: Not my department',
             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
             'uploader': 'Christiaan008',
-            'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
+            'uploader_id': '@ChRiStIaAn008',
+            'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
         },
         'playlist_count': 96,
     }, {
@@ -2543,7 +2626,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'title': 'Uploads from Cauchemar',
             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
             'uploader': 'Cauchemar',
-            'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
+            'uploader_id': '@Cauchemar89',
+            'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
         },
         'playlist_mincount': 1123,
     }, {
@@ -2557,7 +2641,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'title': 'Uploads from Interstellar Movie',
             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
             'uploader': 'Interstellar Movie',
-            'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
+            'uploader_id': '@InterstellarMovie',
+            'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
         },
         'playlist_mincount': 21,
     }, {
@@ -2566,8 +2651,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         'info_dict': {
             'title': 'Data Analysis with Dr Mike Pound',
             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
-            'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
             'uploader': 'Computerphile',
+            'uploader_id': '@Computerphile',
+            'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
         },
         'playlist_mincount': 11,
     }, {
@@ -2605,14 +2691,14 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
     }, {
         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
         'info_dict': {
-            'id': '9Auq9mYxFEE',
+            'id': r're:[\da-zA-Z_-]{8,}',
             'ext': 'mp4',
-            'title': 'Watch Sky News live',
+            'title': r're:(?s)[A-Z].{20,}',
             'uploader': 'Sky News',
-            'uploader_id': 'skynews',
-            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
-            'upload_date': '20191102',
-            'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
+            'uploader_id': '@SkyNews',
+            'uploader_url': r're:https?://(?:www\.)?youtube\.com/@SkyNews',
+            'upload_date': r're:\d{8}',
+            'description': r're:(?s)(?:.*\n)+SUBSCRIBE to our YouTube channel for more videos: http://www\.youtube\.com/skynews *\n.*',
             'categories': ['News & Politics'],
             'tags': list,
             'like_count': int,
@@ -2701,34 +2787,22 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
     }, {
         'note': 'Search tab',
         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
-        'playlist_mincount': 40,
+        'playlist_mincount': 20,
         'info_dict': {
             'id': 'UCYO_jab_esuFRV4b17AJtAw',
             'title': '3Blue1Brown - Search - linear algebra',
             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
             'uploader': '3Blue1Brown',
-            'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
+            'uploader_id': '@3blue1brown',
+            'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
         }
     }]
 
     @classmethod
     def suitable(cls, url):
-        return False if YoutubeIE.suitable(url) else super(
+        return not YoutubeIE.suitable(url) and super(
             YoutubeTabIE, cls).suitable(url)
 
-    def _extract_channel_id(self, webpage):
-        channel_id = self._html_search_meta(
-            'channelId', webpage, 'channel id', default=None)
-        if channel_id:
-            return channel_id
-        channel_url = self._html_search_meta(
-            ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
-             'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
-             'twitter:app:url:googleplay'), webpage, 'channel url')
-        return self._search_regex(
-            r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
-            channel_url, 'channel id')
-
     @staticmethod
     def _extract_grid_item_renderer(item):
         assert isinstance(item, dict)
@@ -3116,27 +3190,18 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         else:
             raise ExtractorError('Unable to find selected tab')
 
-    @staticmethod
-    def _extract_uploader(data):
+    def _extract_uploader(self, metadata, data):
         uploader = {}
-        sidebar_renderer = try_get(
-            data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
-        if sidebar_renderer:
-            for item in sidebar_renderer:
-                if not isinstance(item, dict):
-                    continue
-                renderer = item.get('playlistSidebarSecondaryInfoRenderer')
-                if not isinstance(renderer, dict):
-                    continue
-                owner = try_get(
-                    renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
-                if owner:
-                    uploader['uploader'] = owner.get('text')
-                    uploader['uploader_id'] = try_get(
-                        owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
-                    uploader['uploader_url'] = urljoin(
-                        'https://www.youtube.com/',
-                        try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
+        renderers = traverse_obj(data,
+                                 ('sidebar', 'playlistSidebarRenderer', 'items'))
+        uploader['channel_id'] = self._extract_channel_id('', metadata=metadata, renderers=renderers)
+        uploader['uploader'] = (
+            self._extract_author_var('', 'name', renderers=renderers)
+            or self._extract_author_var('', 'name', metadata=metadata))
+        uploader['uploader_url'] = self._yt_urljoin(
+            self._extract_author_var('', 'url', metadata=metadata, renderers=renderers))
+        uploader['uploader_id'] = self._extract_uploader_id(uploader['uploader_url'])
+        uploader['channel'] = uploader['uploader']
         return uploader
 
     @staticmethod
@@ -3187,8 +3252,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             self._entries(selected_tab, item_id, webpage),
             playlist_id=playlist_id, playlist_title=title,
             playlist_description=description)
-        playlist.update(self._extract_uploader(data))
-        return playlist
+        return merge_dicts(playlist, self._extract_uploader(renderer, data))
 
     def _extract_from_playlist(self, item_id, url, data, playlist):
         title = playlist.get('title') or try_get(
@@ -3275,8 +3339,9 @@ class YoutubePlaylistIE(InfoExtractor):
         'info_dict': {
             'title': '[OLD]Team Fortress 2 (Class-based LP)',
             'id': 'PLBB231211A4F62143',
-            'uploader': 'Wickydoo',
-            'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
+            'uploader': 'Wickman',
+            'uploader_id': '@WickmanVT',
+            'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
         },
         'playlist_mincount': 29,
     }, {
@@ -3290,21 +3355,25 @@ class YoutubePlaylistIE(InfoExtractor):
     }, {
         'note': 'embedded',
         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
-        'playlist_count': 4,
+        # TODO: full playlist requires _reload_with_unavailable_videos()
+        # 'playlist_count': 4,
+        'playlist_mincount': 1,
         'info_dict': {
             'title': 'JODA15',
             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
             'uploader': 'milan',
-            'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
+            'uploader_id': '@milan5503',
+            'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
         }
     }, {
         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
-        'playlist_mincount': 982,
+        'playlist_mincount': 455,
         'info_dict': {
             'title': '2018 Chinese New Singles (11/6 updated)',
             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
             'uploader': 'LBK',
-            'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
+            'uploader_id': '@music_king',
+            'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
         }
     }, {
         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
@@ -3342,8 +3411,8 @@ class YoutubeYtBeIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Small Scale Baler and Braiding Rugs',
             'uploader': 'Backus-Page House Museum',
-            'uploader_id': 'backuspagemuseum',
-            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
+            'uploader_id': '@backuspagemuseum',
+            'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
             'upload_date': '20161008',
             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
             'categories': ['Nonprofits & Activism'],

From 3da17834a49fad2a97c308fdd89aa26781ef4d60 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Tue, 28 Feb 2023 23:03:44 +0530
Subject: [PATCH 039/156] [Youtube] Construct dash formats with `range` query

See yt-dlp/yt_dlp#6369
---
 youtube_dl/extractor/youtube.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 4246d84f9..89711c84e 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1694,8 +1694,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if n_response is None:
                 # give up if descrambling failed
                 break
-            fmt['url'] = update_url(
-                parsed_fmt_url, query_update={'n': [n_response]})
+            for fmt_dct in traverse_obj(fmt, (None, (None, ('fragments', Ellipsis))), expected_type=dict):
+                fmt_dct['url'] = update_url(
+                    fmt_dct['url'], query_update={'n': [n_response]})
 
     # from yt-dlp, with tweaks
     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
@@ -2047,10 +2048,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if no_video:
                 dct['abr'] = tbr
             if no_audio or no_video:
-                dct['downloader_options'] = {
-                    # Youtube throttles chunks >~10M
-                    'http_chunk_size': 10485760,
-                }
+                CHUNK_SIZE = 10 << 20
+                # avoid Youtube throttling
+                dct.update({
+                    'protocol': 'http_dash_segments',
+                    'fragments': [{
+                        'url': update_url_query(dct['url'], {
+                            'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, dct['filesize']))
+                        })
+                    } for range_start in range(0, dct['filesize'], CHUNK_SIZE)]
+                } if dct['filesize'] else {
+                    'downloader_options': {'http_chunk_size': CHUNK_SIZE}  # No longer useful?
+                })
+
                 if dct.get('ext'):
                     dct['container'] = dct['ext'] + '_dash'
             formats.append(dct)

From 3e92c60fcd94c37428d57153dbdd14cd0a1f9226 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Fri, 3 Mar 2023 16:48:54 +0530
Subject: [PATCH 040/156] [jsinterp] Handle `Date` at epoch 0

See yt-dlp/yt_dlp#6400
---
 test/test_youtube_signature.py | 4 ++++
 youtube_dl/jsinterp.py         | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index ac37ffa45..decf7ee38 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -67,6 +67,10 @@ _SIG_TESTS = [
 ]
 
 _NSIG_TESTS = [
+    (
+        'https://www.youtube.com/s/player/7862ca1f/player_ias.vflset/en_US/base.js',
+        'X_LCxVDjAavgE5t', 'yxJ1dM6iz5ogUg',
+    ),
     (
         'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js',
         'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w',
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index a3bc42a61..e28670a3f 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -405,7 +405,7 @@ class JSInterpreter(object):
                 left, right = self._separate_at_paren(obj[len(klass):])
                 argvals = self.interpret_iter(left, local_vars, allow_recursion)
                 expr = konstr(*argvals)
-                if not expr:
+                if expr is None:
                     raise self.Exception('Failed to parse {klass} {left!r:.100}'.format(**locals()), expr=expr)
                 expr = self._dump(expr, local_vars) + right
                 break

From 040271022709c4d20d33c604d1dbc72dc2da472d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 5 Mar 2023 23:07:07 +0000
Subject: [PATCH 041/156] [jsinterp] Fix regexp parsing and .replace[All]
 method

 * For performance, make regexp object instantiation lazy
 * Other small performance improvements
---
 test/test_jsinterp.py  | 46 ++++++++++++++++++-----
 youtube_dl/jsinterp.py | 84 ++++++++++++++++++++++++++++--------------
 2 files changed, 93 insertions(+), 37 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index b5962356c..5d129433d 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -139,21 +139,16 @@ class TestJSInterpreter(unittest.TestCase):
         self.assertTrue(math.isnan(jsi.call_function('x')))
 
     def test_Date(self):
-        jsi = JSInterpreter('''
-        function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 86000)
-
         jsi = JSInterpreter('''
         function x(dt) { return new Date(dt) - 0; }
         ''')
         self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
 
         # date format m/d/y
-        jsi = JSInterpreter('''
-        function x() { return new Date('12/31/1969 18:01:26 MDT') - 0; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 86000)
+        self.assertEqual(jsi.call_function('x', '12/31/1969 18:01:26 MDT'), 86000)
+
+        # epoch 0
+        self.assertEqual(jsi.call_function('x', '1 January 1970 00:00:00 UTC'), 0)
 
     def test_call(self):
         jsi = JSInterpreter('''
@@ -445,7 +440,7 @@ class TestJSInterpreter(unittest.TestCase):
         self.assertIs(jsi.call_function('x'), None)
 
         jsi = JSInterpreter('''
-        function x() { let a=/,,[/,913,/](,)}/; return a; }
+        function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
         ''')
         attrs = set(('findall', 'finditer', 'flags', 'groupindex',
                      'groups', 'match', 'pattern', 'scanner',
@@ -457,6 +452,31 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
 
+        jsi = JSInterpreter(r'''
+        function x() { let a="data-name".replace("data-", ""); return a }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 'name')
+
+        jsi = JSInterpreter(r'''
+        function x() { let a="data-name".replace(new RegExp("^.+-"), ""); return a; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 'name')
+
+        jsi = JSInterpreter(r'''
+        function x() { let a="data-name".replace(/^.+-/, ""); return a; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 'name')
+
+        jsi = JSInterpreter(r'''
+        function x() { let a="data-name".replace(/a/g, "o"); return a; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 'doto-nome')
+
+        jsi = JSInterpreter(r'''
+        function x() { let a="data-name".replaceAll("a", "o"); return a; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 'doto-nome')
+
         jsi = JSInterpreter(r'''
         function x() { let a=[/[)\\]/]; return a[0]; }
         ''')
@@ -485,6 +505,12 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function x(){return 1236566549 << 5}')
         self.assertEqual(jsi.call_function('x'), 915423904)
 
+    """ # fails so far
+    def test_packed(self):
+        jsi = JSInterpreter('''function x(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''')
+        self.assertEqual(jsi.call_function('x', '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|')))
+    """
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index e28670a3f..ab7d6f926 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -12,9 +12,11 @@ from .utils import (
     js_to_json,
     remove_quotes,
     unified_timestamp,
+    variadic,
 )
 from .compat import (
     compat_basestring,
+    compat_chr,
     compat_collections_chain_map as ChainMap,
     compat_itertools_zip_longest as zip_longest,
     compat_str,
@@ -205,10 +207,10 @@ class JSInterpreter(object):
             super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
 
     class JS_RegExp(object):
-        _RE_FLAGS = {
+        RE_FLAGS = {
             # special knowledge: Python's re flags are bitmask values, current max 128
             # invent new bitmask values well above that for literal parsing
-            # TODO: new pattern class to execute matches with these flags
+            # TODO: execute matches with these flags (remaining: d, y)
             'd': 1024,  # Generate indices for substring matches
             'g': 2048,  # Global search
             'i': re.I,  # Case-insensitive search
@@ -218,12 +220,19 @@ class JSInterpreter(object):
             'y': 4096,  # Perform a "sticky" search that matches starting at the current position in the target string
         }
 
-        def __init__(self, pattern_txt, flags=''):
+        def __init__(self, pattern_txt, flags=0):
             if isinstance(flags, compat_str):
                 flags, _ = self.regex_flags(flags)
-            # Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern
             # First, avoid https://github.com/python/cpython/issues/74534
-            self.__self = re.compile(pattern_txt.replace('[[', r'[\['), flags)
+            self.__self = None
+            self.__pattern_txt = pattern_txt.replace('[[', r'[\[')
+            self.__flags = flags
+
+        def __instantiate(self):
+            if self.__self:
+                return
+            self.__self = re.compile(self.__pattern_txt, self.__flags)
+            # Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern
             for name in dir(self.__self):
                 # Only these? Obviously __class__, __init__.
                 # PyPy creates a __weakref__ attribute with value None
@@ -232,15 +241,21 @@ class JSInterpreter(object):
                     continue
                 setattr(self, name, getattr(self.__self, name))
 
+        def __getattr__(self, name):
+            self.__instantiate()
+            if hasattr(self, name):
+                return getattr(self, name)
+            return super(JSInterpreter.JS_RegExp, self).__getattr__(name)
+
         @classmethod
         def regex_flags(cls, expr):
             flags = 0
             if not expr:
                 return flags, expr
             for idx, ch in enumerate(expr):
-                if ch not in cls._RE_FLAGS:
+                if ch not in cls.RE_FLAGS:
                     break
-                flags |= cls._RE_FLAGS[ch]
+                flags |= cls.RE_FLAGS[ch]
             return flags, expr[idx + 1:]
 
     @classmethod
@@ -265,17 +280,17 @@ class JSInterpreter(object):
         counters = dict((k, 0) for k in _MATCHING_PARENS.values())
         start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
         in_quote, escaping, skipping = None, False, 0
-        after_op, in_regex_char_group, skip_re = True, False, 0
+        after_op, in_regex_char_group = True, False
 
         for idx, char in enumerate(expr):
-            if skip_re > 0:
-                skip_re -= 1
-                continue
+            paren_delta = 0
             if not in_quote:
                 if char in _MATCHING_PARENS:
                     counters[_MATCHING_PARENS[char]] += 1
+                    paren_delta = 1
                 elif char in counters:
                     counters[char] -= 1
+                    paren_delta = -1
             if not escaping:
                 if char in _QUOTES and in_quote in (char, None):
                     if in_quote or after_op or char != '/':
@@ -283,7 +298,7 @@ class JSInterpreter(object):
                 elif in_quote == '/' and char in '[]':
                     in_regex_char_group = char == '['
             escaping = not escaping and in_quote and char == '\\'
-            after_op = not in_quote and (char in cls.OP_CHARS or (char.isspace() and after_op))
+            after_op = not in_quote and (char in cls.OP_CHARS or paren_delta > 0 or (after_op and char.isspace()))
 
             if char != delim[pos] or any(counters.values()) or in_quote:
                 pos = skipping = 0
@@ -293,7 +308,7 @@ class JSInterpreter(object):
                 continue
             elif pos == 0 and skip_delims:
                 here = expr[idx:]
-                for s in skip_delims if isinstance(skip_delims, (list, tuple)) else [skip_delims]:
+                for s in variadic(skip_delims):
                     if here.startswith(s) and s:
                         skipping = len(s) - 1
                         break
@@ -316,7 +331,7 @@ class JSInterpreter(object):
         separated = list(cls._separate(expr, delim, 1))
 
         if len(separated) < 2:
-            raise cls.Exception('No terminating paren {delim} in {expr}'.format(**locals()))
+            raise cls.Exception('No terminating paren {delim} in {expr!r:.5500}'.format(**locals()))
         return separated[0][1:].strip(), separated[1].strip()
 
     @staticmethod
@@ -361,6 +376,20 @@ class JSInterpreter(object):
         except TypeError:
             return self._named_object(namespace, obj)
 
+    # used below
+    _VAR_RET_THROW_RE = re.compile(r'''(?x)
+        (?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["'])|$)|(?P<throw>throw\s+)
+        ''')
+    _COMPOUND_RE = re.compile(r'''(?x)
+        (?P<try>try)\s*\{|
+        (?P<if>if)\s*\(|
+        (?P<switch>switch)\s*\(|
+        (?P<for>for)\s*\(|
+        (?P<while>while)\s*\(
+        ''')
+    _FINALLY_RE = re.compile(r'finally\s*\{')
+    _SWITCH_RE = re.compile(r'switch\s*\(')
+
     def interpret_statement(self, stmt, local_vars, allow_recursion=100):
         if allow_recursion < 0:
             raise self.Exception('Recursion limit reached')
@@ -375,7 +404,7 @@ class JSInterpreter(object):
             if should_return:
                 return ret, should_return
 
-        m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["\'])|$)|(?P<throw>throw\s+)', stmt)
+        m = self._VAR_RET_THROW_RE.match(stmt)
         if m:
             expr = stmt[len(m.group(0)):].strip()
             if m.group('throw'):
@@ -447,13 +476,7 @@ class JSInterpreter(object):
                 for item in self._separate(inner)])
             expr = name + outer
 
-        m = re.match(r'''(?x)
-                (?P<try>try)\s*\{|
-                (?P<if>if)\s*\(|
-                (?P<switch>switch)\s*\(|
-                (?P<for>for)\s*\(|
-                (?P<while>while)\s*\(
-                ''', expr)
+        m = self._COMPOUND_RE.match(expr)
         md = m.groupdict() if m else {}
         if md.get('if'):
             cndn, expr = self._separate_at_paren(expr[m.end() - 1:])
@@ -512,7 +535,7 @@ class JSInterpreter(object):
                     err = None
                     pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion)
 
-            m = re.match(r'finally\s*\{', expr)
+            m = self._FINALLY_RE.match(expr)
             if m:
                 sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
                 ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
@@ -531,7 +554,7 @@ class JSInterpreter(object):
             if remaining.startswith('{'):
                 body, expr = self._separate_at_paren(remaining)
             else:
-                switch_m = re.match(r'switch\s*\(', remaining)  # FIXME
+                switch_m = self._SWITCH_RE.match(remaining)  # FIXME
                 if switch_m:
                     switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:])
                     body, expr = self._separate_at_paren(remaining, '}')
@@ -735,7 +758,7 @@ class JSInterpreter(object):
                 if obj == compat_str:
                     if member == 'fromCharCode':
                         assertion(argvals, 'takes one or more arguments')
-                        return ''.join(map(chr, argvals))
+                        return ''.join(map(compat_chr, argvals))
                     raise self.Exception('Unsupported string method ' + member, expr=expr)
                 elif obj == float:
                     if member == 'pow':
@@ -808,10 +831,17 @@ class JSInterpreter(object):
                     if idx >= len(obj):
                         return None
                     return ord(obj[idx])
-                elif member == 'replace':
+                elif member in ('replace', 'replaceAll'):
                     assertion(isinstance(obj, compat_str), 'must be applied on a string')
                     assertion(len(argvals) == 2, 'takes exactly two arguments')
-                    return re.sub(argvals[0], argvals[1], obj)
+                    # TODO: argvals[1] callable, other Py vs JS edge cases
+                    if isinstance(argvals[0], self.JS_RegExp):
+                        count = 0 if argvals[0].flags & self.JS_RegExp.RE_FLAGS['g'] else 1
+                        assertion(member != 'replaceAll' or count == 0,
+                                  'replaceAll must be called with a global RegExp')
+                        return argvals[0].sub(argvals[1], obj, count=count)
+                    count = ('replaceAll', 'replace').index(member)
+                    return re.sub(re.escape(argvals[0]), argvals[1], obj, count=count)
 
                 idx = int(member) if isinstance(obj, list) else member
                 return obj[idx](argvals, allow_recursion=allow_recursion)

From 27d41d73655b8fbf2dedf88cac96220520d526b5 Mon Sep 17 00:00:00 2001
From: Sophira <github@theblob.org>
Date: Tue, 7 Mar 2023 15:49:31 +0000
Subject: [PATCH 042/156] [doc] Recommend "Get cookies.txt LOCALLY" extension
 in README.md (#31763)

* remove link to suspect "Get cookies.txt" extension, dropped from Chrome store
* link to new Manifest V3-compatible open-source "Get cookies.txt LOCALLY" extension.

Fixes #31465.
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 6e07ddb1c..227e34046 100644
--- a/README.md
+++ b/README.md
@@ -918,7 +918,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
 
 Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
 
-In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
+In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt LOCALLY](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
 
 Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
 

From 8c86fd33dca48ebb505ed04150d9e35993b9fe7e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 9 Mar 2023 16:40:30 +0000
Subject: [PATCH 043/156] [doc] Improve "guidance" on bug reporting

---
 README.md | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 227e34046..14a3d6c86 100644
--- a/README.md
+++ b/README.md
@@ -1408,7 +1408,11 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
 
 # BUGS
 
-Bugs and suggestions should be reported at: <https://github.com/ytdl-org/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
+Bugs and suggestions should be reported in the issue tracker: <https://github.com/ytdl-org/youtube-dl/issues> (<https://yt-dl.org/bug> is an alias for this). Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
+
+## Opening a bug report or suggestion
+
+Be sure to follow instructions provided **below** and **in the issue tracker**. Complete the appropriate issue template fully. Consider whether your problem is covered by an existing issue: if so, follow the discussion there. Avoid commenting on existing duplicate issues as such comments do not add to the discussion of the issue and are liable to be treated as spam.
 
 **Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
 ```
@@ -1428,17 +1432,17 @@ $ youtube-dl -v <your command line>
 
 The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
 
-Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
+Finally please review your issue to avoid various common mistakes (you can and should use this as a checklist) listed below.
 
 ### Is the description of the issue itself sufficient?
 
-We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts.
+We often get issue reports that are hard to understand. To avoid subsequent clarifications, and to assist participants who are not native English speakers, please elaborate on what feature you are requesting, or what bug you want to be fixed.
 
-So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious
+Make sure that it's obvious
 
 - What the problem is
 - How it could be fixed
-- How your proposed solution would look like
+- How your proposed solution would look
 
 If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
 
@@ -1448,14 +1452,14 @@ If your server has multiple IPs or you suspect censorship, adding `--call-home`
 
 **Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
 
+###  Is the issue already documented?
+
+Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. Initially, at least, use the search term `-label:duplicate` to focus on active issues. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
+
 ###  Are you using the latest version?
 
 Before reporting any issue, type `youtube-dl -U`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well.
 
-###  Is the issue already documented?
-
-Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
-
 ###  Why are existing options not enough?
 
 Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.

From 5c985d4f81a43ada75dafb23233e7fe39913907a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 11 Mar 2023 12:09:55 +0000
Subject: [PATCH 044/156] [downloader] Let _ffmpeg_ handle DASH segments

Fixes https://github.com/ytdl-org/youtube-dl/issues/31792 after 3da1783.
---
 youtube_dl/downloader/external.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index bffcd10b6..1b6bd1fa2 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -273,7 +273,7 @@ class HttpieFD(ExternalFD):
 class FFmpegFD(ExternalFD):
     @classmethod
     def supports(cls, info_dict):
-        return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
+        return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms', 'http_dash_segments')
 
     @classmethod
     def available(cls):

From baa6c5e95cb307e7d716645780ff8aef22de6aca Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 11 Mar 2023 12:17:00 +0000
Subject: [PATCH 045/156] [FragmentFD] Respect `--no-continue`

* discard partial fragment on `--no-continue`
* continue with correct progress display otherwise

Resolves #21467
---
 youtube_dl/downloader/common.py   | 24 +++++++++++-----
 youtube_dl/downloader/dash.py     | 10 +++----
 youtube_dl/downloader/fragment.py | 46 +++++++++++++++++++++----------
 youtube_dl/downloader/http.py     | 15 ++++------
 4 files changed, 58 insertions(+), 37 deletions(-)

diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index 1cdba89cd..c86ce2aa5 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -88,17 +88,21 @@ class FileDownloader(object):
             return '---.-%'
         return '%6s' % ('%3.1f%%' % percent)
 
-    @staticmethod
-    def calc_eta(start, now, total, current):
+    @classmethod
+    def calc_eta(cls, start_or_rate, now_or_remaining, *args):
+        if len(args) < 2:
+            rate, remaining = (start_or_rate, now_or_remaining)
+            if None in (rate, remaining):
+                return None
+            return int(float(remaining) / rate)
+        start, now = (start_or_rate, now_or_remaining)
+        total, current = args
         if total is None:
             return None
         if now is None:
             now = time.time()
-        dif = now - start
-        if current == 0 or dif < 0.001:  # One millisecond
-            return None
-        rate = float(current) / dif
-        return int((float(total) - float(current)) / rate)
+        rate = cls.calc_speed(start, now, current)
+        return rate and int((float(total) - float(current)) / rate)
 
     @staticmethod
     def format_eta(eta):
@@ -123,6 +127,12 @@ class FileDownloader(object):
     def format_retries(retries):
         return 'inf' if retries == float('inf') else '%.0f' % retries
 
+    @staticmethod
+    def filesize_or_none(unencoded_filename):
+        fn = encodeFilename(unencoded_filename)
+        if os.path.isfile(fn):
+            return os.path.getsize(fn)
+
     @staticmethod
     def best_block_size(elapsed_time, bytes):
         new_min = max(bytes / 2.0, 1.0)
diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
index c6d674bc6..cc30485f8 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@@ -38,8 +38,7 @@ class DashSegmentsFD(FragmentFD):
             # In DASH, the first segment contains necessary headers to
             # generate a valid MP4 file, so always abort for the first segment
             fatal = i == 0 or not skip_unavailable_fragments
-            count = 0
-            while count <= fragment_retries:
+            for count in range(fragment_retries + 1):
                 try:
                     fragment_url = fragment.get('url')
                     if not fragment_url:
@@ -57,9 +56,8 @@ class DashSegmentsFD(FragmentFD):
                     # is usually enough) thus allowing to download the whole file successfully.
                     # To be future-proof we will retry all fragments that fail with any
                     # HTTP error.
-                    count += 1
-                    if count <= fragment_retries:
-                        self.report_retry_fragment(err, frag_index, count, fragment_retries)
+                    if count < fragment_retries:
+                        self.report_retry_fragment(err, frag_index, count + 1, fragment_retries)
                 except DownloadError:
                     # Don't retry fragment if error occurred during HTTP downloading
                     # itself since it has own retry settings
@@ -68,7 +66,7 @@ class DashSegmentsFD(FragmentFD):
                         break
                     raise
 
-            if count > fragment_retries:
+            if count >= fragment_retries:
                 if not fatal:
                     self.report_skip_fragment(frag_index)
                     continue
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
index 35c76feba..913e91b64 100644
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@@ -71,7 +71,7 @@ class FragmentFD(FileDownloader):
 
     @staticmethod
     def __do_ytdl_file(ctx):
-        return not ctx['live'] and not ctx['tmpfilename'] == '-'
+        return ctx['live'] is not True and ctx['tmpfilename'] != '-'
 
     def _read_ytdl_file(self, ctx):
         assert 'ytdl_corrupt' not in ctx
@@ -101,6 +101,13 @@ class FragmentFD(FileDownloader):
             'url': frag_url,
             'http_headers': headers or info_dict.get('http_headers'),
         }
+        frag_resume_len = 0
+        if ctx['dl'].params.get('continuedl', True):
+            frag_resume_len = self.filesize_or_none(
+                self.temp_name(fragment_filename))
+        fragment_info_dict['frag_resume_len'] = frag_resume_len
+        ctx['frag_resume_len'] = frag_resume_len or 0
+
         success = ctx['dl'].download(fragment_filename, fragment_info_dict)
         if not success:
             return False, None
@@ -124,9 +131,7 @@ class FragmentFD(FileDownloader):
             del ctx['fragment_filename_sanitized']
 
     def _prepare_frag_download(self, ctx):
-        if 'live' not in ctx:
-            ctx['live'] = False
-        if not ctx['live']:
+        if not ctx.setdefault('live', False):
             total_frags_str = '%d' % ctx['total_frags']
             ad_frags = ctx.get('ad_frags', 0)
             if ad_frags:
@@ -136,10 +141,11 @@ class FragmentFD(FileDownloader):
         self.to_screen(
             '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
         self.report_destination(ctx['filename'])
+        continuedl = self.params.get('continuedl', True)
         dl = HttpQuietDownloader(
             self.ydl,
             {
-                'continuedl': True,
+                'continuedl': continuedl,
                 'quiet': True,
                 'noprogress': True,
                 'ratelimit': self.params.get('ratelimit'),
@@ -150,12 +156,11 @@ class FragmentFD(FileDownloader):
         )
         tmpfilename = self.temp_name(ctx['filename'])
         open_mode = 'wb'
-        resume_len = 0
 
         # Establish possible resume length
-        if os.path.isfile(encodeFilename(tmpfilename)):
+        resume_len = self.filesize_or_none(tmpfilename) or 0
+        if resume_len > 0:
             open_mode = 'ab'
-            resume_len = os.path.getsize(encodeFilename(tmpfilename))
 
         # Should be initialized before ytdl file check
         ctx.update({
@@ -164,7 +169,8 @@ class FragmentFD(FileDownloader):
         })
 
         if self.__do_ytdl_file(ctx):
-            if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
+            ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
+            if continuedl and ytdl_file_exists:
                 self._read_ytdl_file(ctx)
                 is_corrupt = ctx.get('ytdl_corrupt') is True
                 is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
@@ -178,7 +184,12 @@ class FragmentFD(FileDownloader):
                     if 'ytdl_corrupt' in ctx:
                         del ctx['ytdl_corrupt']
                     self._write_ytdl_file(ctx)
+
             else:
+                if not continuedl:
+                    if ytdl_file_exists:
+                        self._read_ytdl_file(ctx)
+                    ctx['fragment_index'] = resume_len = 0
                 self._write_ytdl_file(ctx)
                 assert ctx['fragment_index'] == 0
 
@@ -209,6 +220,7 @@ class FragmentFD(FileDownloader):
         start = time.time()
         ctx.update({
             'started': start,
+            'fragment_started': start,
             # Amount of fragment's bytes downloaded by the time of the previous
             # frag progress hook invocation
             'prev_frag_downloaded_bytes': 0,
@@ -218,6 +230,9 @@ class FragmentFD(FileDownloader):
             if s['status'] not in ('downloading', 'finished'):
                 return
 
+            if not total_frags and ctx.get('fragment_count'):
+                state['fragment_count'] = ctx['fragment_count']
+
             time_now = time.time()
             state['elapsed'] = time_now - start
             frag_total_bytes = s.get('total_bytes') or 0
@@ -232,16 +247,17 @@ class FragmentFD(FileDownloader):
                 ctx['fragment_index'] = state['fragment_index']
                 state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
                 ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
+                ctx['speed'] = state['speed'] = self.calc_speed(
+                    ctx['fragment_started'], time_now, frag_total_bytes)
+                ctx['fragment_started'] = time.time()
                 ctx['prev_frag_downloaded_bytes'] = 0
             else:
                 frag_downloaded_bytes = s['downloaded_bytes']
                 state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
+                ctx['speed'] = state['speed'] = self.calc_speed(
+                    ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx['frag_resume_len'])
                 if not ctx['live']:
-                    state['eta'] = self.calc_eta(
-                        start, time_now, estimated_size - resume_len,
-                        state['downloaded_bytes'] - resume_len)
-                state['speed'] = s.get('speed') or ctx.get('speed')
-                ctx['speed'] = state['speed']
+                    state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
                 ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
             self._hook_progress(state)
 
@@ -268,7 +284,7 @@ class FragmentFD(FileDownloader):
                         os.utime(ctx['filename'], (time.time(), filetime))
                     except Exception:
                         pass
-            downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
+            downloaded_bytes = self.filesize_or_none(ctx['filename']) or 0
 
         self._hook_progress({
             'downloaded_bytes': downloaded_bytes,
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index d8ac41dcc..440471aa0 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -58,9 +58,9 @@ class HttpFD(FileDownloader):
 
         if self.params.get('continuedl', True):
             # Establish possible resume length
-            if os.path.isfile(encodeFilename(ctx.tmpfilename)):
-                ctx.resume_len = os.path.getsize(
-                    encodeFilename(ctx.tmpfilename))
+            ctx.resume_len = info_dict.get('frag_resume_len')
+            if ctx.resume_len is None:
+                ctx.resume_len = self.filesize_or_none(ctx.tmpfilename) or 0
 
         ctx.is_resume = ctx.resume_len > 0
 
@@ -115,9 +115,9 @@ class HttpFD(FileDownloader):
                         raise RetryDownload(err)
                     raise err
                 # When trying to resume, Content-Range HTTP header of response has to be checked
-                # to match the value of requested Range HTTP header. This is due to a webservers
+                # to match the value of requested Range HTTP header. This is due to webservers
                 # that don't support resuming and serve a whole file with no Content-Range
-                # set in response despite of requested Range (see
+                # set in response despite requested Range (see
                 # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
                 if has_range:
                     content_range = ctx.data.headers.get('Content-Range')
@@ -293,10 +293,7 @@ class HttpFD(FileDownloader):
 
                 # Progress message
                 speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
-                if ctx.data_len is None:
-                    eta = None
-                else:
-                    eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
+                eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - ctx.resume_len))
 
                 self._hook_progress({
                     'status': 'downloading',

From e8de54bce50f6f77a4d7e8e80675f7003d5bf630 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 13 Mar 2023 19:45:54 +0000
Subject: [PATCH 046/156] [core] Handle `/../` sequences in HTTP URLs

* use Python's RFC implementation for embedded sequences
* hack: strip unbalanced leading `../` from path, like eg Firefox

See https://github.com/yt-dlp/yt-dlp/issues/3355
---
 youtube_dl/YoutubeDL.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 8e8546596..bcf781744 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -39,6 +39,7 @@ from .compat import (
     compat_str,
     compat_tokenize_tokenize,
     compat_urllib_error,
+    compat_urllib_parse,
     compat_urllib_request,
     compat_urllib_request_DataHandler,
 )
@@ -60,6 +61,7 @@ from .utils import (
     format_bytes,
     formatSeconds,
     GeoRestrictedError,
+    HEADRequest,
     int_or_none,
     ISO3166Utils,
     locked_file,
@@ -74,6 +76,7 @@ from .utils import (
     preferredencoding,
     prepend_extension,
     process_communicate_or_kill,
+    PUTRequest,
     register_socks_protocols,
     render_table,
     replace_extension,
@@ -2297,6 +2300,27 @@ class YoutubeDL(object):
         """ Start an HTTP download """
         if isinstance(req, compat_basestring):
             req = sanitized_Request(req)
+        # an embedded /../ sequence is not automatically handled by urllib2
+        # see https://github.com/yt-dlp/yt-dlp/issues/3355
+        url = req.get_full_url()
+        parts = url.partition('/../')
+        if parts[1]:
+            url = compat_urllib_parse.urljoin(parts[0] + parts[1][:1], parts[1][1:] + parts[2])
+        if url:
+            # worse, URL path may have initial /../ against RFCs: work-around
+            # by stripping such prefixes, like eg Firefox
+            parts = compat_urllib_parse.urlsplit(url)
+            path = parts.path
+            while path.startswith('/../'):
+                path = path[3:]
+            url = parts._replace(path=path).geturl()
+            # get a new Request with the munged URL
+            if url != req.get_full_url():
+                req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get(
+                    req.get_method(), compat_urllib_request.Request)
+                req = req_type(
+                    url, data=req.data, headers=dict(req.header_items()),
+                    origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
         return self._opener.open(req, timeout=self._socket_timeout)
 
     def print_debug_header(self):

From 70ff01391068c98b4377c5cc17a8d00d5645e734 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 14 Mar 2023 00:58:59 +0000
Subject: [PATCH 047/156] [devscripts] Add a hack to convert command-line
 options to API options

---
 devscripts/cli_to_api.py | 64 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100755 devscripts/cli_to_api.py

diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py
new file mode 100755
index 000000000..2f4d6a458
--- /dev/null
+++ b/devscripts/cli_to_api.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+"""
+This script displays the API parameters corresponding to a yt-dl command line
+
+Example:
+$ ./cli_to_api.py -f best
+{u'format': 'best'}
+$
+"""
+
+# Allow direct execution
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import youtube_dl
+from types import MethodType
+
+
+def cli_to_api(*opts):
+    YDL = youtube_dl.YoutubeDL
+
+    # to extract the parsed options, break out of YoutubeDL instantiation
+
+    # return options via this Exception
+    class ParseYTDLResult(Exception):
+        def __init__(self, result):
+            super(ParseYTDLResult, self).__init__('result')
+            self.opts = result
+
+    # replacement constructor that raises ParseYTDLResult
+    def ytdl_init(ydl, ydl_opts):
+        super(YDL, ydl).__init__(ydl_opts)
+        raise ParseYTDLResult(ydl_opts)
+
+    # patch in the constructor
+    YDL.__init__ = MethodType(ytdl_init, YDL)
+
+    # core parser
+    def parsed_options(argv):
+        try:
+            youtube_dl._real_main(list(argv))
+        except ParseYTDLResult as result:
+            return result.opts
+
+    # from https://github.com/yt-dlp/yt-dlp/issues/5859#issuecomment-1363938900
+    default = parsed_options([])
+    diff = dict((k, v) for k, v in parsed_options(opts).items() if default[k] != v)
+    if 'postprocessors' in diff:
+        diff['postprocessors'] = [pp for pp in diff['postprocessors'] if pp not in default['postprocessors']]
+    return diff
+
+
+def main():
+    from pprint import pprint
+    pprint(cli_to_api(*sys.argv))
+
+
+if __name__ == '__main__':
+    main()

From 6fece0a96b3cd8677f5c1185a57c6e21403fcb44 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 14 Mar 2023 13:01:32 +0000
Subject: [PATCH 048/156] [AENetworksBaseIE] Report missing show data instead
 of crash

---
 youtube_dl/extractor/aenetworks.py | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py
index 2a1f08e39..59fbe048a 100644
--- a/youtube_dl/extractor/aenetworks.py
+++ b/youtube_dl/extractor/aenetworks.py
@@ -8,6 +8,8 @@ from ..utils import (
     ExtractorError,
     GeoRestrictedError,
     int_or_none,
+    remove_start,
+    traverse_obj,
     update_url_query,
     urlencode_postdata,
 )
@@ -33,14 +35,17 @@ class AENetworksBaseIE(ThePlatformIE):
     }
 
     def _extract_aen_smil(self, smil_url, video_id, auth=None):
-        query = {'mbr': 'true'}
+        query = {
+            'mbr': 'true',
+            'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
+        }
         if auth:
             query['auth'] = auth
         TP_SMIL_QUERY = [{
             'assetTypes': 'high_video_ak',
-            'switch': 'hls_high_ak'
+            'switch': 'hls_high_ak',
         }, {
-            'assetTypes': 'high_video_s3'
+            'assetTypes': 'high_video_s3',
         }, {
             'assetTypes': 'high_video_s3',
             'switch': 'hls_high_fastly',
@@ -75,7 +80,14 @@ class AENetworksBaseIE(ThePlatformIE):
         requestor_id, brand = self._DOMAIN_MAP[domain]
         result = self._download_json(
             'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
-            filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
+            filter_value, query={'filter[%s]' % filter_key: filter_value})
+        result = traverse_obj(
+            result, ('results',
+                     lambda k, v: k == 0 and v[filter_key] == filter_value),
+            get_all=False)
+        if not result:
+            raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
+                                 video_id=remove_start(filter_value, '/'))
         title = result['title']
         video_id = result['id']
         media_url = result['publicUrl']
@@ -126,7 +138,7 @@ class AENetworksIE(AENetworksBaseIE):
             'skip_download': True,
         },
         'add_ie': ['ThePlatform'],
-        'skip': 'This video is only available for users of participating TV providers.',
+        'skip': 'Geo-restricted - This content is not available in your location.'
     }, {
         'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
         'info_dict': {
@@ -143,6 +155,7 @@ class AENetworksIE(AENetworksBaseIE):
             'skip_download': True,
         },
         'add_ie': ['ThePlatform'],
+        'skip': 'This video is only available for users of participating TV providers.',
     }, {
         'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
         'only_matching': True

From 45495228b7a6728b7e764bbcf1f38490cd3d8697 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 19 Mar 2023 00:51:44 +0000
Subject: [PATCH 049/156] [downloader/http] Only check for resumability when
 actually resuming

---
 test/test_downloader_http.py  | 2 +-
 youtube_dl/downloader/http.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
index 4e6d7a2a0..6af86ae48 100644
--- a/test/test_downloader_http.py
+++ b/test/test_downloader_http.py
@@ -88,7 +88,7 @@ class TestHttpFD(unittest.TestCase):
         self.assertTrue(downloader.real_download(filename, {
             'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
         }))
-        self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
+        self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep)
         try_rm(encodeFilename(filename))
 
     def download_all(self, params):
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index 440471aa0..28a49b9e8 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -141,7 +141,8 @@ class HttpFD(FileDownloader):
                     # Content-Range is either not present or invalid. Assuming remote webserver is
                     # trying to send the whole file, resume is not possible, so wiping the local file
                     # and performing entire redownload
-                    self.report_unable_to_resume()
+                    if range_start > 0:
+                        self.report_unable_to_resume()
                     ctx.resume_len = 0
                     ctx.open_mode = 'wb'
                 ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))

From f35b757c826027ab5263d431bbe363c6403bd66d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 19 Mar 2023 02:27:46 +0000
Subject: [PATCH 050/156] [utils] Ensure `allow_types` for `variadic()` is a
 tuple

---
 test/test_utils.py  | 1 +
 youtube_dl/utils.py | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/test/test_utils.py b/test/test_utils.py
index ea2b96ed2..b85d397d0 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1563,6 +1563,7 @@ Line 1
         self.assertEqual(variadic(None), (None, ))
         self.assertEqual(variadic('spam'), ('spam', ))
         self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
+        self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
 
     def test_traverse_obj(self):
         _TEST_DATA = {
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 761edcd49..f3c7af437 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4213,6 +4213,8 @@ def multipart_encode(data, boundary=None):
 
 
 def variadic(x, allowed_types=(compat_str, bytes, dict)):
+    if not isinstance(allowed_types, tuple) and isinstance(allowed_types, compat_collections_abc.Iterable):
+        allowed_types = tuple(allowed_types)
     return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,)
 
 

From 88f28f620bcae7ba7302f8b049b74f0f8a12831f Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 12 Mar 2023 14:46:09 +0530
Subject: [PATCH 051/156] [extractor/youtube] Construct fragment list lazily

Ref: yt-dlp/yt-dlp/commit/e389d17
See: yt-dlp/yt-dlp#6517
---
 youtube_dl/extractor/youtube.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 89711c84e..6b153193c 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -31,6 +31,7 @@ from ..utils import (
     get_element_by_attribute,
     int_or_none,
     js_to_json,
+    LazyList,
     merge_dicts,
     mimetype2ext,
     parse_codecs,
@@ -1986,9 +1987,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         itags = []
         itag_qualities = {}
         q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
+        CHUNK_SIZE = 10 << 20
+
         streaming_data = player_response.get('streamingData') or {}
         streaming_formats = streaming_data.get('formats') or []
         streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
+
+        def build_fragments(f):
+            return LazyList({
+                'url': update_url_query(f['url'], {
+                    'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, f['filesize']))
+                })
+            } for range_start in range(0, f['filesize'], CHUNK_SIZE))
+
         for fmt in streaming_formats:
             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
                 continue
@@ -2048,15 +2059,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if no_video:
                 dct['abr'] = tbr
             if no_audio or no_video:
-                CHUNK_SIZE = 10 << 20
                 # avoid Youtube throttling
                 dct.update({
                     'protocol': 'http_dash_segments',
-                    'fragments': [{
-                        'url': update_url_query(dct['url'], {
-                            'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, dct['filesize']))
-                        })
-                    } for range_start in range(0, dct['filesize'], CHUNK_SIZE)]
+                    'fragments': build_fragments(dct),
                 } if dct['filesize'] else {
                     'downloader_options': {'http_chunk_size': CHUNK_SIZE}  # No longer useful?
                 })

From 3f6d2bd76f3393eef90896dfabc2d8dde37c2009 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 9 Mar 2023 22:09:23 +0530
Subject: [PATCH 052/156] [extractor/youtube] Bypass throttling for `-f17`

and related cleanup

Thanks @AudricV for the finding

Ref: yt-dlp/yt-dlp/commit/c9abebb
---
 youtube_dl/extractor/youtube.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 6b153193c..ae3416b20 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2052,13 +2052,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 if mobj:
                     dct['ext'] = mimetype2ext(mobj.group(1))
                     dct.update(parse_codecs(mobj.group(2)))
-            no_audio = dct.get('acodec') == 'none'
-            no_video = dct.get('vcodec') == 'none'
-            if no_audio:
-                dct['vbr'] = tbr
-            if no_video:
-                dct['abr'] = tbr
-            if no_audio or no_video:
+            single_stream = 'none' in (dct.get(c) for c in ('acodec', 'vcodec'))
+            if single_stream and dct.get('ext'):
+                dct['container'] = dct['ext'] + '_dash'
+            if single_stream or itag == '17':
                 # avoid Youtube throttling
                 dct.update({
                     'protocol': 'http_dash_segments',
@@ -2067,8 +2064,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'downloader_options': {'http_chunk_size': CHUNK_SIZE}  # No longer useful?
                 })
 
-                if dct.get('ext'):
-                    dct['container'] = dct['ext'] + '_dash'
             formats.append(dct)
 
         hls_manifest_url = streaming_data.get('hlsManifestUrl')

From cdf40b6aa651d949ce01e9bec1a11f792e8af899 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 3 Apr 2023 21:07:10 +0100
Subject: [PATCH 053/156] [test] Update tests for Ubuntu 20.04 * 18.04 test
 runner was withdrawn * for now, disable Py 3.3/3.4 tests

---
 .github/workflows/ci.yml | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a609f3704..51abdce1d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -7,9 +7,10 @@ jobs:
     strategy:
       fail-fast: true
       matrix:
-        os: [ubuntu-18.04]
+        os: [ubuntu-20.04]
         # TODO: python 2.6
-        python-version: [2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
+        # TODO: restore support for 3.3, 3.4
+        python-version: [2.7, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
         python-impl: [cpython]
         ytdl-test-set: [core, download]
         run-tests-ext: [sh]
@@ -26,26 +27,27 @@ jobs:
           ytdl-test-set: download
           run-tests-ext: bat
         # jython
-        - os: ubuntu-18.04
+        - os: ubuntu-20.04
           python-impl: jython
           ytdl-test-set: core
           run-tests-ext: sh
-        - os: ubuntu-18.04
+        - os: ubuntu-20.04
           python-impl: jython
           ytdl-test-set: download
           run-tests-ext: sh
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
-      if: ${{ matrix.python-impl == 'cpython' }}
+    - uses: actions/checkout@v3
+    - name: Set up supported Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      if: ${{ matrix.python-impl == 'cpython' && ! contains(fromJSON('["3.3", "3.4"]'), matrix.python-version) }}
       with:
         python-version: ${{ matrix.python-version }}
     - name: Set up Java 8
       if: ${{ matrix.python-impl == 'jython' }}
-      uses: actions/setup-java@v1
+      uses: actions/setup-java@v2
       with:
         java-version: 8
+        distribution: 'zulu'
     - name: Install Jython
       if: ${{ matrix.python-impl == 'jython' }}
       run: |
@@ -70,9 +72,9 @@ jobs:
     name: Linter
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
     - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: 3.9
     - name: Install flake8

From 557dbac173c30a51acd284b46f2d5460e539f51a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 5 Apr 2023 18:29:24 +0100
Subject: [PATCH 054/156] [FragmentFD] Fix iteration with infinite limit

* fixes ytdl-org/youtube-dl/baa6c5e
* resolves #31885
---
 youtube_dl/downloader/dash.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
index cc30485f8..67a8e173f 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@@ -1,5 +1,7 @@
 from __future__ import unicode_literals
 
+import itertools
+
 from .fragment import FragmentFD
 from ..compat import compat_urllib_error
 from ..utils import (
@@ -30,15 +32,13 @@ class DashSegmentsFD(FragmentFD):
         fragment_retries = self.params.get('fragment_retries', 0)
         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
 
-        frag_index = 0
-        for i, fragment in enumerate(fragments):
-            frag_index += 1
+        for frag_index, fragment in enumerate(fragments, 1):
             if frag_index <= ctx['fragment_index']:
                 continue
             # In DASH, the first segment contains necessary headers to
             # generate a valid MP4 file, so always abort for the first segment
-            fatal = i == 0 or not skip_unavailable_fragments
-            for count in range(fragment_retries + 1):
+            fatal = frag_index == 1 or not skip_unavailable_fragments
+            for count in itertools.count():
                 try:
                     fragment_url = fragment.get('url')
                     if not fragment_url:
@@ -48,7 +48,6 @@ class DashSegmentsFD(FragmentFD):
                     if not success:
                         return False
                     self._append_fragment(ctx, frag_content)
-                    break
                 except compat_urllib_error.HTTPError as err:
                     # YouTube may often return 404 HTTP error for a fragment causing the
                     # whole download to fail. However if the same fragment is immediately
@@ -58,13 +57,14 @@ class DashSegmentsFD(FragmentFD):
                     # HTTP error.
                     if count < fragment_retries:
                         self.report_retry_fragment(err, frag_index, count + 1, fragment_retries)
+                        continue
                 except DownloadError:
                     # Don't retry fragment if error occurred during HTTP downloading
-                    # itself since it has own retry settings
-                    if not fatal:
-                        self.report_skip_fragment(frag_index)
-                        break
-                    raise
+                    # itself since it has its own retry settings
+                    if fatal:
+                        raise
+                    self.report_skip_fragment(frag_index)
+                break
 
             if count >= fragment_retries:
                 if not fatal:

From 78da22489b483988e198a8352893df9c6cf34032 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 5 Apr 2023 18:39:54 +0100
Subject: [PATCH 055/156] [compat] Add and use `compat_open()` like Py3
 `open()`

* resolves FIXME: ytdl-org/youtube-dl/commit/dfe5fa4
---
 youtube_dl/compat.py  | 11 +++++++++++
 youtube_dl/options.py |  6 ++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 39551f810..fe62caf80 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -3127,6 +3127,16 @@ else:
         return ctypes.WINFUNCTYPE(*args, **kwargs)
 
 
+if sys.version_info < (3, 0):
+    # open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None
+    def compat_open(file_, *args, **kwargs):
+        if len(args) > 6 or 'opener' in kwargs:
+            raise ValueError('open: unsupported argument "opener"')
+        return io.open(file_, *args, **kwargs)
+else:
+    compat_open = open
+
+
 legacy = [
     'compat_HTMLParseError',
     'compat_HTMLParser',
@@ -3185,6 +3195,7 @@ __all__ = [
     'compat_kwargs',
     'compat_map',
     'compat_numeric_types',
+    'compat_open',
     'compat_ord',
     'compat_os_name',
     'compat_os_path_expanduser',
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index f6d2b0898..7b059b51e 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -11,6 +11,7 @@ from .compat import (
     compat_get_terminal_size,
     compat_getenv,
     compat_kwargs,
+    compat_open as open,
     compat_shlex_split,
 )
 from .utils import (
@@ -41,14 +42,11 @@ def _hide_login_info(opts):
 def parseOpts(overrideArguments=None):
     def _readOptions(filename_bytes, default=[]):
         try:
-            optionf = open(filename_bytes)
+            optionf = open(filename_bytes, encoding=preferredencoding())
         except IOError:
             return default  # silently skip if file is not present
         try:
-            # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
             contents = optionf.read()
-            if sys.version_info < (3,):
-                contents = contents.decode(preferredencoding())
             res = compat_shlex_split(contents, comments=True)
         finally:
             optionf.close()

From 25124bd640acf2fbae71b2a52738ee41da548fb1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 5 Apr 2023 18:47:49 +0100
Subject: [PATCH 056/156] [devscripts] Improve hack to convert command-line
 options to API options

* define equality for DateRange
* don't show default DateRange
---
 devscripts/cli_to_api.py | 25 ++++++++++++++++++++++---
 youtube_dl/utils.py      |  4 ++++
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py
index 2f4d6a458..9fb1d2ba8 100755
--- a/devscripts/cli_to_api.py
+++ b/devscripts/cli_to_api.py
@@ -49,15 +49,34 @@ def cli_to_api(*opts):
 
     # from https://github.com/yt-dlp/yt-dlp/issues/5859#issuecomment-1363938900
     default = parsed_options([])
-    diff = dict((k, v) for k, v in parsed_options(opts).items() if default[k] != v)
+
+    def neq_opt(a, b):
+        if a == b:
+            return False
+        if a is None and repr(type(object)).endswith(".utils.DateRange'>"):
+            return '0001-01-01 - 9999-12-31' != '{0}'.format(b)
+        return a != b
+
+    diff = dict((k, v) for k, v in parsed_options(opts).items() if neq_opt(default[k], v))
     if 'postprocessors' in diff:
         diff['postprocessors'] = [pp for pp in diff['postprocessors'] if pp not in default['postprocessors']]
     return diff
 
 
 def main():
-    from pprint import pprint
-    pprint(cli_to_api(*sys.argv))
+    from pprint import PrettyPrinter
+
+    pprint = PrettyPrinter()
+    super_format = pprint.format
+
+    def format(object, context, maxlevels, level):
+        if repr(type(object)).endswith(".utils.DateRange'>"):
+            return '{0}: {1}>'.format(repr(object)[:-2], object), True, False
+        return super_format(object, context, maxlevels, level)
+
+    pprint.format = format
+
+    pprint.pprint(cli_to_api(*sys.argv))
 
 
 if __name__ == '__main__':
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index f3c7af437..d80ceb007 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -3190,6 +3190,10 @@ class DateRange(object):
     def __str__(self):
         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
 
+    def __eq__(self, other):
+        return (isinstance(other, DateRange)
+                and self.start == other.start and self.end == other.end)
+
 
 def platform_name():
     """ Returns the platform name as a compat_str """

From 9f4d83ff4255d8840c0fa9b367722c129ebecdb2 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 5 Apr 2023 18:50:25 +0100
Subject: [PATCH 057/156] [options] Add --mtime option, unsets default
 --no-mtime

* resolves #1709 (!)
---
 youtube_dl/options.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 7b059b51e..d802b7e59 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -731,9 +731,13 @@ def parseOpts(overrideArguments=None):
         '--no-part',
         action='store_true', dest='nopart', default=False,
         help='Do not use .part files - write directly into output file')
+    filesystem.add_option(
+        '--mtime',
+        action='store_true', dest='updatetime', default=True,
+        help='Use the Last-modified header to set the file modification time (default)')
     filesystem.add_option(
         '--no-mtime',
-        action='store_false', dest='updatetime', default=True,
+        action='store_false', dest='updatetime',
         help='Do not use the Last-modified header to set the file modification time')
     filesystem.add_option(
         '--write-description',

From d6ae3b77cd50083ef245c28f904ee0b70a77d5c6 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 6 Apr 2023 14:11:18 +0100
Subject: [PATCH 058/156] [core] Avoid deepcopy of ctx dict (fix f35b757)

* may now contain `LazyList`s
* resolves #31999
---
 youtube_dl/YoutubeDL.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index bcf781744..2c0d4926c 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1399,7 +1399,7 @@ class YoutubeDL(object):
             filters = [self._build_format_filter(f) for f in selector.filters]
 
             def final_selector(ctx):
-                ctx_copy = copy.deepcopy(ctx)
+                ctx_copy = dict(ctx)
                 for _filter in filters:
                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
                 return selector_function(ctx_copy)

From f8253a528935f78e1a3b724db8c1f0089f99314a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 6 Apr 2023 19:42:36 +0100
Subject: [PATCH 059/156] [core] Avoid deepcopy of ctx dict (fix f35b757) (Pt
 2)

---
 youtube_dl/YoutubeDL.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 2c0d4926c..927b19417 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1389,11 +1389,10 @@ class YoutubeDL(object):
                         'abr': formats_info[1].get('abr'),
                         'ext': output_ext,
                     }
-                video_selector, audio_selector = map(_build_selector_function, selector.selector)
 
                 def selector_function(ctx):
-                    for pair in itertools.product(
-                            video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
+                    selector_fn = lambda x: _build_selector_function(x)(ctx)
+                    for pair in itertools.product(*map(selector_fn, selector.selector)):
                         yield _merge(pair)
 
             filters = [self._build_format_filter(f) for f in selector.filters]

From 213d1d91bfc4a00fefc72fa2730555d51060b42d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 6 Apr 2023 19:49:46 +0100
Subject: [PATCH 060/156] [core] No longer importing copy

---
 youtube_dl/YoutubeDL.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 927b19417..2a1e59bf8 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -5,7 +5,6 @@ from __future__ import absolute_import, unicode_literals
 
 import collections
 import contextlib
-import copy
 import datetime
 import errno
 import fileinput

From fe7e13066c20b10fe48bc154431440da36baec53 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 10 Apr 2023 17:12:31 +0100
Subject: [PATCH 061/156] [core] Add and use sanitize_info() method from yt-dlp

---
 youtube_dl/YoutubeDL.py | 38 ++++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 2a1e59bf8..2719d546f 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -30,9 +30,12 @@ from string import ascii_letters
 from .compat import (
     compat_basestring,
     compat_cookiejar,
+    compat_filter as filter,
     compat_get_terminal_size,
     compat_http_client,
+    compat_integer_types,
     compat_kwargs,
+    compat_map as map,
     compat_numeric_types,
     compat_os_name,
     compat_str,
@@ -64,6 +67,7 @@ from .utils import (
     int_or_none,
     ISO3166Utils,
     locked_file,
+    LazyList,
     make_HTTPS_handler,
     MaxDownloadsReached,
     orderedSet,
@@ -2109,10 +2113,36 @@ class YoutubeDL(object):
         return self._download_retcode
 
     @staticmethod
-    def filter_requested_info(info_dict):
-        return dict(
-            (k, v) for k, v in info_dict.items()
-            if k not in ['requested_formats', 'requested_subtitles'])
+    def sanitize_info(info_dict, remove_private_keys=False):
+        ''' Sanitize the infodict for converting to json '''
+        if info_dict is None:
+            return info_dict
+
+        if remove_private_keys:
+            reject = lambda k, v: (v is None
+                                   or k.startswith('__')
+                                   or k in ('requested_formats',
+                                            'requested_subtitles'))
+        else:
+            reject = lambda k, v: False
+
+        def filter_fn(obj):
+            if isinstance(obj, dict):
+                return dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v))
+            elif isinstance(obj, (list, tuple, set, LazyList)):
+                return list(map(filter_fn, obj))
+            elif obj is None or any(isinstance(obj, c)
+                                    for c in (compat_integer_types,
+                                              (compat_str, float, bool))):
+                return obj
+            else:
+                return repr(obj)
+
+        return filter_fn(info_dict)
+
+    @classmethod
+    def filter_requested_info(cls, info_dict):
+        return cls.sanitize_info(info_dict, True)
 
     def post_process(self, filename, ie_info):
         """Run all the postprocessors on the given file."""

From 735e87adfc44b284dcdb4d9a0155ce0616e3af97 Mon Sep 17 00:00:00 2001
From: Gabriel Nagy <gabrielnagy@me.com>
Date: Thu, 13 Apr 2023 01:40:38 +0300
Subject: [PATCH 062/156] [core] Sanitize info dict before dumping JSON (fixes
 fe7e130)  (#32032)

* follow up to fe7e130 which didn't fix everything.

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/YoutubeDL.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 2719d546f..117f1c513 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1777,7 +1777,7 @@ class YoutubeDL(object):
             self.to_stdout(formatSeconds(info_dict['duration']))
         print_mandatory('format')
         if self.params.get('forcejson', False):
-            self.to_stdout(json.dumps(info_dict))
+            self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
 
     def process_info(self, info_dict):
         """Process a single resolved IE result."""
@@ -2091,7 +2091,7 @@ class YoutubeDL(object):
                 raise
             else:
                 if self.params.get('dump_single_json', False):
-                    self.to_stdout(json.dumps(res))
+                    self.to_stdout(json.dumps(self.sanitize_info(res)))
 
         return self._download_retcode
 
@@ -2100,6 +2100,7 @@ class YoutubeDL(object):
                 [info_filename], mode='r',
                 openhook=fileinput.hook_encoded('utf-8'))) as f:
             # FileInput doesn't have a read method, we can't call json.load
+            # TODO: let's use io.open(), then
             info = self.filter_requested_info(json.loads('\n'.join(f)))
         try:
             self.process_ie_result(info, download=True)

From 2da3fa04a68ff0652f49d6874d82b7a0edb85ea3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 11 Apr 2023 17:36:27 +0100
Subject: [PATCH 063/156] [YouTube] Simplify signature patterns

---
 youtube_dl/extractor/youtube.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index ae3416b20..80fff7ada 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -19,6 +19,7 @@ from ..compat import (
     compat_urllib_parse_parse_qs as compat_parse_qs,
     compat_urllib_parse_unquote_plus,
     compat_urllib_parse_urlparse,
+    compat_zip as zip,
 )
 from ..jsinterp import JSInterpreter
 from ..utils import (
@@ -1555,17 +1556,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
-             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
-             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
+             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
              # Obsolete patterns
-             r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+             r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
             jscode, 'Initial JS player signature function name', group='sig')
 

From 26035bde46c0acc30dc053618451d9aeca4b7709 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 13 Apr 2023 00:15:07 +0100
Subject: [PATCH 064/156] [DashSegmentsFD] Correctly detect errors when
 `fragment_retries` == 0

* use the success flag instead of the retry count
* establish the fragment_url outside the retry loop
* only report skipping a fragment once.
* resolves #32033
---
 youtube_dl/downloader/dash.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
index 67a8e173f..2800d4260 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@@ -38,12 +38,13 @@ class DashSegmentsFD(FragmentFD):
             # In DASH, the first segment contains necessary headers to
             # generate a valid MP4 file, so always abort for the first segment
             fatal = frag_index == 1 or not skip_unavailable_fragments
+            fragment_url = fragment.get('url')
+            if not fragment_url:
+                assert fragment_base_url
+                fragment_url = urljoin(fragment_base_url, fragment['path'])
+            success = False
             for count in itertools.count():
                 try:
-                    fragment_url = fragment.get('url')
-                    if not fragment_url:
-                        assert fragment_base_url
-                        fragment_url = urljoin(fragment_base_url, fragment['path'])
                     success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
                     if not success:
                         return False
@@ -63,14 +64,13 @@ class DashSegmentsFD(FragmentFD):
                     # itself since it has its own retry settings
                     if fatal:
                         raise
-                    self.report_skip_fragment(frag_index)
                 break
 
-            if count >= fragment_retries:
+            if not success:
                 if not fatal:
                     self.report_skip_fragment(frag_index)
                     continue
-                self.report_error('giving up after %s fragment retries' % fragment_retries)
+                self.report_error('giving up after %s fragment retries' % count)
                 return False
 
         self._finish_frag_download(ctx)

From 211cbfd5d46025a8e4d8f9f3d424aaada4698974 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 21 Apr 2023 14:04:30 +0100
Subject: [PATCH 065/156] [jsinterp] Minimally handle arithmetic operator
 precedence

Resolves #32066
---
 test/test_jsinterp.py  | 11 +++++++++++
 youtube_dl/jsinterp.py | 40 +++++++++++++++++++++++++++++++++++++---
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 5d129433d..e121358d7 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -505,6 +505,17 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function x(){return 1236566549 << 5}')
         self.assertEqual(jsi.call_function('x'), 915423904)
 
+    def test_32066(self):
+        jsi = JSInterpreter("function x(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}")
+        self.assertEqual(jsi.call_function('x'), 70)
+
+    def test_unary_operators(self):
+        jsi = JSInterpreter('function f(){return 2  -  - - 2;}')
+        self.assertEqual(jsi.call_function('f'), 0)
+        # fails
+        # jsi = JSInterpreter('function f(){return 2 + - + - - 2;}')
+        # self.assertEqual(jsi.call_function('f'), 0)
+
     """ # fails so far
     def test_packed(self):
         jsi = JSInterpreter('''function x(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''')
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index ab7d6f926..a06fc4ff5 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -1,5 +1,6 @@
 from __future__ import unicode_literals
 
+from functools import update_wrapper
 import itertools
 import json
 import math
@@ -23,11 +24,23 @@ from .compat import (
 )
 
 
+def wraps_op(op):
+
+    def update_and_rename_wrapper(w):
+        f = update_wrapper(w, op)
+        # fn names are str in both Py 2/3
+        f.__name__ = str('JS_') + f.__name__
+        return f
+
+    return update_and_rename_wrapper
+
+
 def _js_bit_op(op):
 
     def zeroise(x):
         return 0 if x in (None, JS_Undefined) else x
 
+    @wraps_op(op)
     def wrapped(a, b):
         return op(zeroise(a), zeroise(b)) & 0xffffffff
 
@@ -36,6 +49,7 @@ def _js_bit_op(op):
 
 def _js_arith_op(op):
 
+    @wraps_op(op)
     def wrapped(a, b):
         if JS_Undefined in (a, b):
             return float('nan')
@@ -66,6 +80,7 @@ def _js_exp(a, b):
 
 def _js_eq_op(op):
 
+    @wraps_op(op)
     def wrapped(a, b):
         if set((a, b)) <= set((None, JS_Undefined)):
             return op(a, a)
@@ -76,6 +91,7 @@ def _js_eq_op(op):
 
 def _js_comp_op(op):
 
+    @wraps_op(op)
     def wrapped(a, b):
         if JS_Undefined in (a, b):
             return False
@@ -356,6 +372,7 @@ class JSInterpreter(object):
             return right_val
 
         try:
+            # print('Eval:', opfunc.__name__, left_val, right_val)
             return opfunc(left_val, right_val)
         except Exception as e:
             raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
@@ -395,6 +412,7 @@ class JSInterpreter(object):
             raise self.Exception('Recursion limit reached')
         allow_recursion -= 1
 
+        # print('At: ' + stmt[:60])
         should_return = False
         # fails on (eg) if (...) stmt1; else stmt2;
         sub_statements = list(self._separate(stmt, ';')) or ['']
@@ -702,9 +720,24 @@ class JSInterpreter(object):
                 continue
 
             right_expr = separated.pop()
-            while op == '-' and len(separated) > 1 and not separated[-1].strip():
-                right_expr = '-' + right_expr
-                separated.pop()
+            # handle operators that are both unary and binary, minimal BODMAS
+            if op in ('+', '-'):
+                undone = 0
+                while len(separated) > 1 and not separated[-1].strip():
+                    undone += 1
+                    separated.pop()
+                if op == '-' and undone % 2 != 0:
+                    right_expr = op + right_expr
+                left_val = separated[-1]
+                for dm_op in ('*', '%', '/', '**'):
+                    bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
+                    if len(bodmas) > 1 and not bodmas[-1].strip():
+                        expr = op.join(separated) + op + right_expr
+                        right_expr = None
+                        break
+                if right_expr is None:
+                    continue
+
             left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
             return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return
 
@@ -955,6 +988,7 @@ class JSInterpreter(object):
     def build_function(self, argnames, code, *global_stack):
         global_stack = list(global_stack) or [{}]
         argnames = tuple(argnames)
+        # import pdb; pdb.set_trace()
 
         def resf(args, kwargs={}, allow_recursion=100):
             global_stack[0].update(

From 64d6dd64c8b7a35a87655d27fc83f2e98ef6ce13 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 23 Apr 2023 22:58:35 +0100
Subject: [PATCH 066/156] [YouTube] Support Releases tab

---
 youtube_dl/extractor/youtube.py | 114 +++++++++++++++++++-------------
 youtube_dl/utils.py             |   9 ++-
 2 files changed, 74 insertions(+), 49 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 80fff7ada..0411c49f1 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -31,6 +31,7 @@ from ..utils import (
     extract_attributes,
     get_element_by_attribute,
     int_or_none,
+    join_nonempty,
     js_to_json,
     LazyList,
     merge_dicts,
@@ -45,6 +46,7 @@ from ..utils import (
     str_to_int,
     traverse_obj,
     try_get,
+    txt_or_none,
     unescapeHTML,
     unified_strdate,
     unsmuggle_url,
@@ -2608,6 +2610,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'uploader_id': '@lexwill718',
         },
         'playlist_mincount': 75,
+    }, {
+        # Releases tab
+        'url': 'https://www.youtube.com/@daftpunk/releases',
+        'info_dict': {
+            'id': 'UC_kRDKYrUlrbtrSiyu5Tflg',
+            'title': 'Daft Punk - Releases',
+            'description': 'Daft Punk (1993 - 2021) - Official YouTube Channel',
+            'uploader_id': '@daftpunk',
+            'uploader': 'Daft Punk',
+        },
+        'playlist_mincount': 36,
     }, {
         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
         'only_matching': True,
@@ -2822,6 +2835,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                 continue
             return renderer
 
+    @staticmethod
+    def _get_text(r, k):
+        return traverse_obj(
+            r, (k, 'runs', 0, 'text'), (k, 'simpleText'),
+            expected_type=txt_or_none)
+
     def _grid_entries(self, grid_renderer):
         for item in grid_renderer['items']:
             if not isinstance(item, dict):
@@ -2829,9 +2848,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             renderer = self._extract_grid_item_renderer(item)
             if not isinstance(renderer, dict):
                 continue
-            title = try_get(
-                renderer, (lambda x: x['title']['runs'][0]['text'],
-                           lambda x: x['title']['simpleText']), compat_str)
+            title = self._get_text(renderer, 'title')
             # playlist
             playlist_id = renderer.get('playlistId')
             if playlist_id:
@@ -2848,8 +2865,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             # channel
             channel_id = renderer.get('channelId')
             if channel_id:
-                title = try_get(
-                    renderer, lambda x: x['title']['simpleText'], compat_str)
+                title = self._get_text(renderer, 'title')
                 yield self.url_result(
                     'https://www.youtube.com/channel/%s' % channel_id,
                     ie=YoutubeTabIE.ie_key(), video_title=title)
@@ -2958,15 +2974,26 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 
     def _rich_grid_entries(self, contents):
         for content in contents:
-            video_renderer = try_get(
-                content,
-                (lambda x: x['richItemRenderer']['content']['videoRenderer'],
-                 lambda x: x['richItemRenderer']['content']['reelItemRenderer']),
-                dict)
+            content = traverse_obj(
+                content, ('richItemRenderer', 'content'),
+                expected_type=dict) or {}
+            video_renderer = traverse_obj(
+                content, 'videoRenderer', 'reelItemRenderer',
+                expected_type=dict)
             if video_renderer:
                 entry = self._video_entry(video_renderer)
                 if entry:
                     yield entry
+            # playlist
+            renderer = traverse_obj(
+                content, 'playlistRenderer', expected_type=dict) or {}
+            title = self._get_text(renderer, 'title')
+            playlist_id = renderer.get('playlistId')
+            if playlist_id:
+                yield self.url_result(
+                    'https://www.youtube.com/playlist?list=%s' % playlist_id,
+                    ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
+                    video_title=title)
 
     @staticmethod
     def _build_continuation_query(continuation, ctp=None):
@@ -3071,6 +3098,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                 return
             for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
                 yield entry
+
             continuation = self._extract_continuation(rich_grid_renderer)
 
         ytcfg = self._extract_ytcfg(item_id, webpage)
@@ -3213,50 +3241,41 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         uploader['channel'] = uploader['uploader']
         return uploader
 
-    @staticmethod
-    def _extract_alert(data):
+    @classmethod
+    def _extract_alert(cls, data):
         alerts = []
-        for alert in try_get(data, lambda x: x['alerts'], list) or []:
-            if not isinstance(alert, dict):
-                continue
-            alert_text = try_get(
-                alert, lambda x: x['alertRenderer']['text'], dict)
+        for alert in traverse_obj(data, ('alerts', Ellipsis), expected_type=dict):
+            alert_text = traverse_obj(
+                alert, (None, lambda x: x['alertRenderer']['text']), get_all=False)
             if not alert_text:
                 continue
-            text = try_get(
-                alert_text,
-                (lambda x: x['simpleText'], lambda x: x['runs'][0]['text']),
-                compat_str)
+            text = cls._get_text(alert_text, 'text')
             if text:
                 alerts.append(text)
         return '\n'.join(alerts)
 
     def _extract_from_tabs(self, item_id, webpage, data, tabs):
         selected_tab = self._extract_selected_tab(tabs)
-        renderer = try_get(
-            data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
+        renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'),
+                                expected_type=dict) or {}
         playlist_id = item_id
         title = description = None
         if renderer:
-            channel_title = renderer.get('title') or item_id
-            tab_title = selected_tab.get('title')
-            title = channel_title or item_id
-            if tab_title:
-                title += ' - %s' % tab_title
-            if selected_tab.get('expandedText'):
-                title += ' - %s' % selected_tab['expandedText']
-            description = renderer.get('description')
-            playlist_id = renderer.get('externalId')
+            channel_title = txt_or_none(renderer.get('title')) or item_id
+            tab_title = txt_or_none(selected_tab.get('title'))
+            title = join_nonempty(
+                channel_title or item_id, tab_title,
+                txt_or_none(selected_tab.get('expandedText')),
+                delim=' - ')
+            description = txt_or_none(renderer.get('description'))
+            playlist_id = txt_or_none(renderer.get('externalId')) or playlist_id
         else:
-            renderer = try_get(
-                data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
-            if renderer:
-                title = renderer.get('title')
-            else:
-                renderer = try_get(
-                    data, lambda x: x['header']['hashtagHeaderRenderer'], dict)
-                if renderer:
-                    title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
+            renderer = traverse_obj(data,
+                                    ('metadata', 'playlistMetadataRenderer'),
+                                    ('header', 'hashtagHeaderRenderer'),
+                                    expected_type=dict) or {}
+            title = traverse_obj(renderer, 'title', ('hashtag', 'simpleText'),
+                                 expected_type=txt_or_none)
         playlist = self.playlist_result(
             self._entries(selected_tab, item_id, webpage),
             playlist_id=playlist_id, playlist_title=title,
@@ -3264,15 +3283,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         return merge_dicts(playlist, self._extract_uploader(renderer, data))
 
     def _extract_from_playlist(self, item_id, url, data, playlist):
-        title = playlist.get('title') or try_get(
-            data, lambda x: x['titleText']['simpleText'], compat_str)
-        playlist_id = playlist.get('playlistId') or item_id
+        title = traverse_obj((playlist, data),
+                             (0, 'title'), (1, 'titleText', 'simpleText'),
+                             expected_type=txt_or_none)
+        playlist_id = txt_or_none(playlist.get('playlistId')) or item_id
         # Inline playlist rendition continuation does not always work
         # at Youtube side, so delegating regular tab-based playlist URL
         # processing whenever possible.
-        playlist_url = urljoin(url, try_get(
-            playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
-            compat_str))
+        playlist_url = urljoin(url, traverse_obj(
+            playlist, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
+            expected_type=url_or_none))
         if playlist_url and playlist_url != url:
             return self.url_result(
                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index d80ceb007..65ddb3b0f 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -3753,6 +3753,11 @@ def strip_or_none(v, default=None):
     return v.strip() if isinstance(v, compat_str) else default
 
 
+def txt_or_none(v, default=None):
+    """ Combine str/strip_or_none, disallow blank value (for traverse_obj) """
+    return default if v is None else (compat_str(v).strip() or default)
+
+
 def url_or_none(url):
     if not url or not isinstance(url, compat_str):
         return None
@@ -4096,8 +4101,8 @@ def escape_url(url):
     ).geturl()
 
 
-def parse_qs(url):
-    return compat_parse_qs(compat_urllib_parse.urlparse(url).query)
+def parse_qs(url, **kwargs):
+    return compat_parse_qs(compat_urllib_parse.urlparse(url).query, **kwargs)
 
 
 def read_batch_urls(batch_fd):

From 11cc3f3ad03a88d6cb1eab18a8e5dd6bf148ac54 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 11 May 2023 20:53:07 +0100
Subject: [PATCH 067/156] [utils] Fix `compiled_regex_type` in 249f2b6

---
 youtube_dl/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 65ddb3b0f..584581b6a 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -56,6 +56,7 @@ from .compat import (
     compat_kwargs,
     compat_os_name,
     compat_re_Match,
+    compat_re_Pattern,
     compat_shlex_quote,
     compat_str,
     compat_struct_pack,
@@ -86,7 +87,7 @@ def register_socks_protocols():
 
 
 # Unfavoured alias
-compiled_regex_type = compat_re_Match
+compiled_regex_type = compat_re_Pattern
 
 
 def random_user_agent():

From a85a875fef2e9b097c3f6f93f1d0cead06f84e43 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 11 May 2023 20:59:30 +0100
Subject: [PATCH 068/156] [jsinterp] Handle NaN in bitwise operators * also add
 _NaN * also pull function naming from yt-dlp

---
 test/test_jsinterp.py  | 11 +++++++++++
 youtube_dl/jsinterp.py | 41 ++++++++++++++++++++++++++++++++---------
 2 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index e121358d7..a8f312fde 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -18,6 +18,7 @@ class TestJSInterpreter(unittest.TestCase):
     def test_basic(self):
         jsi = JSInterpreter('function x(){;}')
         self.assertEqual(jsi.call_function('x'), None)
+        self.assertEqual(repr(jsi.extract_function('x')), 'F<x>')
 
         jsi = JSInterpreter('function x3(){return 42;}')
         self.assertEqual(jsi.call_function('x3'), 42)
@@ -505,6 +506,16 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function x(){return 1236566549 << 5}')
         self.assertEqual(jsi.call_function('x'), 915423904)
 
+    def test_bitwise_operators_madness(self):
+        jsi = JSInterpreter('function x(){return null << 5}')
+        self.assertEqual(jsi.call_function('x'), 0)
+
+        jsi = JSInterpreter('function x(){return undefined >> 5}')
+        self.assertEqual(jsi.call_function('x'), 0)
+
+        jsi = JSInterpreter('function x(){return 42 << NaN}')
+        self.assertEqual(jsi.call_function('x'), 42)
+
     def test_32066(self):
         jsi = JSInterpreter("function x(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}")
         self.assertEqual(jsi.call_function('x'), 70)
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index a06fc4ff5..bb406647a 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -1,12 +1,13 @@
 from __future__ import unicode_literals
 
-from functools import update_wrapper
 import itertools
 import json
 import math
 import operator
 import re
 
+from functools import update_wrapper
+
 from .utils import (
     error_to_compat_str,
     ExtractorError,
@@ -24,6 +25,22 @@ from .compat import (
 )
 
 
+# name JS functions
+class function_with_repr(object):
+    # from yt_dlp/utils.py, but in this module
+    # repr_ is always set
+    def __init__(self, func, repr_):
+        update_wrapper(self, func)
+        self.func, self.__repr = func, repr_
+
+    def __call__(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+
+    def __repr__(self):
+        return self.__repr
+
+
+# name JS operators
 def wraps_op(op):
 
     def update_and_rename_wrapper(w):
@@ -35,10 +52,13 @@ def wraps_op(op):
     return update_and_rename_wrapper
 
 
+_NaN = float('nan')
+
+
 def _js_bit_op(op):
 
     def zeroise(x):
-        return 0 if x in (None, JS_Undefined) else x
+        return 0 if x in (None, JS_Undefined, _NaN) else x
 
     @wraps_op(op)
     def wrapped(a, b):
@@ -52,7 +72,7 @@ def _js_arith_op(op):
     @wraps_op(op)
     def wrapped(a, b):
         if JS_Undefined in (a, b):
-            return float('nan')
+            return _NaN
         return op(a or 0, b or 0)
 
     return wrapped
@@ -60,13 +80,13 @@ def _js_arith_op(op):
 
 def _js_div(a, b):
     if JS_Undefined in (a, b) or not (a and b):
-        return float('nan')
+        return _NaN
     return operator.truediv(a or 0, b) if b else float('inf')
 
 
 def _js_mod(a, b):
     if JS_Undefined in (a, b) or not b:
-        return float('nan')
+        return _NaN
     return (a or 0) % b
 
 
@@ -74,7 +94,7 @@ def _js_exp(a, b):
     if not b:
         return 1  # even 0 ** 0 !!
     elif JS_Undefined in (a, b):
-        return float('nan')
+        return _NaN
     return (a or 0) ** b
 
 
@@ -285,6 +305,8 @@ class JSInterpreter(object):
     def _named_object(self, namespace, obj):
         self.__named_object_counter += 1
         name = '%s%d' % (self._OBJ_NAME, self.__named_object_counter)
+        if callable(obj) and not isinstance(obj, function_with_repr):
+            obj = function_with_repr(obj, 'F<%s>' % (self.__named_object_counter, ))
         namespace[name] = obj
         return name
 
@@ -693,7 +715,7 @@ class JSInterpreter(object):
         elif expr == 'undefined':
             return JS_Undefined, should_return
         elif expr == 'NaN':
-            return float('NaN'), should_return
+            return _NaN, should_return
 
         elif md.get('return'):
             return local_vars[m.group('name')], should_return
@@ -953,7 +975,9 @@ class JSInterpreter(object):
         return self.build_arglist(func_m.group('args')), code
 
     def extract_function(self, funcname):
-        return self.extract_function_from_code(*self.extract_function_code(funcname))
+        return function_with_repr(
+            self.extract_function_from_code(*self.extract_function_code(funcname)),
+            'F<%s>' % (funcname, ))
 
     def extract_function_from_code(self, argnames, code, *global_stack):
         local_vars = {}
@@ -988,7 +1012,6 @@ class JSInterpreter(object):
     def build_function(self, argnames, code, *global_stack):
         global_stack = list(global_stack) or [{}]
         argnames = tuple(argnames)
-        # import pdb; pdb.set_trace()
 
         def resf(args, kwargs={}, allow_recursion=100):
             global_stack[0].update(

From 6ed34338285f722d0da312ce0af3a15a077a3e2a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 11 May 2023 21:02:01 +0100
Subject: [PATCH 069/156] [jsinterp] Add short-cut evaluation for common
 expression * special handling for (d%e.length+e.length)%e.length speeds up
 ~6%

---
 youtube_dl/jsinterp.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index bb406647a..f837865c4 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -502,8 +502,15 @@ class JSInterpreter(object):
                 expr = self._dump(inner, local_vars) + outer
 
         if expr.startswith('('):
-            inner, outer = self._separate_at_paren(expr)
-            inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
+
+            m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr)
+            if m:
+                # short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig`
+                outer = None
+                inner, should_abort = self._offset_e_by_d(m.group('d'), m.group('e'), local_vars)
+            else:
+                inner, outer = self._separate_at_paren(expr)
+                inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
             if not outer or should_abort:
                 return inner, should_abort or should_return
             else:
@@ -957,6 +964,17 @@ class JSInterpreter(object):
 
         return obj
 
+    @staticmethod
+    def _offset_e_by_d(d, e, local_vars):
+        """ Short-cut eval: (d%e.length+e.length)%e.length """
+        try:
+            d = local_vars[d]
+            e = local_vars[e]
+            e = len(e)
+            return _js_mod(_js_mod(d, e) + e, e), False
+        except Exception:
+            return None, True
+
     def extract_function_code(self, funcname):
         """ @returns argnames, code """
         func_m = re.search(

From d1c6c5c4d618fa950813c0c71aede34a5ac851e9 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 11 May 2023 21:17:31 +0100
Subject: [PATCH 070/156] [core] Improve platform debug log, based on yt-dlp

---
 youtube_dl/YoutubeDL.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 117f1c513..212c04298 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -25,6 +25,7 @@ import tokenize
 import traceback
 import random
 
+from ssl import OPENSSL_VERSION
 from string import ascii_letters
 
 from .compat import (
@@ -66,6 +67,7 @@ from .utils import (
     HEADRequest,
     int_or_none,
     ISO3166Utils,
+    join_nonempty,
     locked_file,
     LazyList,
     make_HTTPS_handler,
@@ -2395,9 +2397,20 @@ class YoutubeDL(object):
                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
             return impl_name
 
-        self._write_string('[debug] Python version %s (%s) - %s\n' % (
-            platform.python_version(), python_implementation(),
-            platform_name()))
+        def libc_ver():
+            try:
+                return platform.libc_ver()
+            except OSError:  # We may not have access to the executable
+                return []
+
+        self._write_string('[debug] Python %s (%s %s) - %s (%s%s)\n' % (
+            platform.python_version(),
+            python_implementation(),
+            platform.architecture()[0],
+            platform_name(),
+            OPENSSL_VERSION,
+            ', %s' % (join_nonempty(*libc_ver(), delim=' ') or '-'),
+        ))
 
         exe_versions = FFmpegPostProcessor.get_versions(self)
         exe_versions['rtmpdump'] = rtmpdump_version()

From d89c2137ba4c1def185358a9ff48642e05ac65a2 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 19 May 2023 13:09:18 +0100
Subject: [PATCH 071/156] [jsinterp] Small updates for a85a875 * update
 signature tests * clarify NaN handling

---
 test/test_jsinterp.py          |  3 +++
 test/test_youtube_signature.py |  8 ++++++++
 youtube_dl/jsinterp.py         | 12 +++++-------
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index a8f312fde..1cc148b15 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -516,6 +516,9 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function x(){return 42 << NaN}')
         self.assertEqual(jsi.call_function('x'), 42)
 
+        jsi = JSInterpreter('function x(){return 42 << Infinity}')
+        self.assertEqual(jsi.call_function('x'), 42)
+
     def test_32066(self):
         jsi = JSInterpreter("function x(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}")
         self.assertEqual(jsi.call_function('x'), 70)
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index decf7ee38..d41d708a0 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -143,6 +143,14 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
         'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
     ),
+    (
+        'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js',
+        'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw',
+    ),
+    (
+        'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
+        'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index f837865c4..dc580943e 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -2,7 +2,6 @@ from __future__ import unicode_literals
 
 import itertools
 import json
-import math
 import operator
 import re
 
@@ -52,6 +51,10 @@ def wraps_op(op):
     return update_and_rename_wrapper
 
 
+# NB In principle NaN cannot be checked by membership.
+# Here all NaN values are actually this one, so _NaN is _NaN,
+# although _NaN != _NaN.
+
 _NaN = float('nan')
 
 
@@ -126,13 +129,8 @@ def _js_comp_op(op):
 
 def _js_ternary(cndn, if_true=True, if_false=False):
     """Simulate JS's ternary operator (cndn?if_true:if_false)"""
-    if cndn in (False, None, 0, '', JS_Undefined):
+    if cndn in (False, None, 0, '', JS_Undefined, _NaN):
         return if_false
-    try:
-        if math.isnan(cndn):  # NB: NaN cannot be checked by membership
-            return if_false
-    except TypeError:
-        pass
     return if_true
 
 

From 1f7c6f8b2ba5bedc9b4da279659688fbbf06a059 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 19 May 2023 13:12:59 +0100
Subject: [PATCH 072/156] [core] Further improve platform debug log * see
 d1c6c5c

---
 youtube_dl/YoutubeDL.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 212c04298..1b3ef94b4 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -102,6 +102,7 @@ from .utils import (
     YoutubeDLCookieProcessor,
     YoutubeDLHandler,
     YoutubeDLRedirectHandler,
+    ytdl_is_updateable,
 )
 from .cache import Cache
 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
@@ -2373,9 +2374,11 @@ class YoutubeDL(object):
                 self.get_encoding()))
         write_string(encoding_str, encoding=None)
 
-        self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
+        writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))
+
+        writeln_debug('youtube-dl version ', __version__, (' (single file build)' if ytdl_is_updateable() else ''))
         if _LAZY_LOADER:
-            self._write_string('[debug] Lazy loading extractors enabled' + '\n')
+            writeln_debug('Lazy loading extractors enabled')
         try:
             sp = subprocess.Popen(
                 ['git', 'rev-parse', '--short', 'HEAD'],
@@ -2384,7 +2387,7 @@ class YoutubeDL(object):
             out, err = process_communicate_or_kill(sp)
             out = out.decode().strip()
             if re.match('[0-9a-f]+', out):
-                self._write_string('[debug] Git HEAD: ' + out + '\n')
+                writeln_debug('Git HEAD: ', out)
         except Exception:
             try:
                 sys.exc_clear()
@@ -2403,13 +2406,15 @@ class YoutubeDL(object):
             except OSError:  # We may not have access to the executable
                 return []
 
-        self._write_string('[debug] Python %s (%s %s) - %s (%s%s)\n' % (
+        libc = join_nonempty(*libc_ver(), delim=' ')
+        writeln_debug('Python %s (%s %s %s) - %s - %s%s' % (
             platform.python_version(),
             python_implementation(),
+            platform.machine(),
             platform.architecture()[0],
             platform_name(),
             OPENSSL_VERSION,
-            ', %s' % (join_nonempty(*libc_ver(), delim=' ') or '-'),
+            (' - %s' % (libc, )) if libc else ''
         ))
 
         exe_versions = FFmpegPostProcessor.get_versions(self)
@@ -2422,17 +2427,17 @@ class YoutubeDL(object):
         )
         if not exe_str:
             exe_str = 'none'
-        self._write_string('[debug] exe versions: %s\n' % exe_str)
+        writeln_debug('exe versions: %s' % (exe_str, ))
 
         proxy_map = {}
         for handler in self._opener.handlers:
             if hasattr(handler, 'proxies'):
                 proxy_map.update(handler.proxies)
-        self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
+        writeln_debug('Proxy map: ', compat_str(proxy_map))
 
         if self.params.get('call_home', False):
             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
-            self._write_string('[debug] Public IP address: %s\n' % ipaddr)
+            writeln_debug('Public IP address: %s' % (ipaddr, ))
             latest_version = self.urlopen(
                 'https://yt-dl.org/latest/version').read().decode('utf-8')
             if version_tuple(latest_version) > version_tuple(__version__):

From ee731f3d00064f446faa9ffb4c21ce4ca388bf5d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 23 May 2023 16:19:55 +0100
Subject: [PATCH 073/156] [ITV] Fix UA capitalisation in 384f632

---
 youtube_dl/extractor/itv.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py
index 7026139ea..c64af3be6 100644
--- a/youtube_dl/extractor/itv.py
+++ b/youtube_dl/extractor/itv.py
@@ -59,7 +59,7 @@ class ITVBaseIE(InfoExtractor):
 
     @staticmethod
     def _vanilla_ua_header():
-        return {'User-agent': 'Mozilla/5.0'}
+        return {'User-Agent': 'Mozilla/5.0'}
 
     def _download_webpage_handle(self, url, video_id, *args, **kwargs):
         # specialised to (a) use vanilla UA (b) detect geo-block
@@ -69,7 +69,7 @@ class ITVBaseIE(InfoExtractor):
                 'user_agent' not in params
                 and not any(re.match(r'(?i)user-agent\s*:', h)
                             for h in (params.get('headers') or []))
-                and 'User-agent' not in (kwargs.get('headers') or {})):
+                and 'User-Agent' not in (kwargs.get('headers') or {})):
 
             kwargs.setdefault('headers', {})
             kwargs['headers'] = self._vanilla_ua_header()

From 2389c7cbd30813435c50848a9b276bcfe2a810db Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 23 May 2023 17:11:22 +0100
Subject: [PATCH 074/156] [compat] Fix casefold import __all__ syntax in
 a19855f

---
 youtube_dl/casefold.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/casefold.py b/youtube_dl/casefold.py
index 748c2d491..ad9c66f8e 100644
--- a/youtube_dl/casefold.py
+++ b/youtube_dl/casefold.py
@@ -1663,5 +1663,5 @@ def casefold(s):
 
 
 __all__ = [
-    casefold
+    'casefold',
 ]

From b8a86dcf1aa837577178ae25357d8241ab4ba6c1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 26 May 2023 20:25:25 +0100
Subject: [PATCH 075/156] [core] Revise 1f7c6f8 to help downstream merger
 (possibly)

---
 youtube_dl/YoutubeDL.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 1b3ef94b4..98b878fc1 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -2374,11 +2374,10 @@ class YoutubeDL(object):
                 self.get_encoding()))
         write_string(encoding_str, encoding=None)
 
-        writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))
-
-        writeln_debug('youtube-dl version ', __version__, (' (single file build)' if ytdl_is_updateable() else ''))
+        self._write_string('[debug] youtube-dl version ' + __version__ + (' (single file build)\n' if ytdl_is_updateable() else '\n'))
         if _LAZY_LOADER:
-            writeln_debug('Lazy loading extractors enabled')
+            self._write_string('[debug] Lazy loading extractors enabled\n')
+        writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))  # moved down for easier merge
         try:
             sp = subprocess.Popen(
                 ['git', 'rev-parse', '--short', 'HEAD'],

From a2534f7b888416e872d5afd1862eb3e30fc69fc7 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 11 Jun 2023 13:33:50 +0100
Subject: [PATCH 076/156] [jsinterp] Fix div bug breaking player 8c7583ff

Thx bashonly: https://github.com/ytdl-org/youtube-dl/issues/32292#issuecomment-1585639223
Fixes #32292
---
 test/test_jsinterp.py          | 49 ++++++++++++++++++++++++++++++++++
 test/test_youtube_signature.py |  4 +++
 youtube_dl/jsinterp.py         |  2 +-
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 1cc148b15..ecd6ab3c9 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -33,6 +33,55 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function x4(a){return 2*a+1;}')
         self.assertEqual(jsi.call_function('x4', 3), 7)
 
+    def test_add(self):
+        jsi = JSInterpreter('function f(){return 42 + 7;}')
+        self.assertEqual(jsi.call_function('f'), 49)
+        jsi = JSInterpreter('function f(){return 42 + undefined;}')
+        self.assertTrue(math.isnan(jsi.call_function('f')))
+        jsi = JSInterpreter('function f(){return 42 + null;}')
+        self.assertEqual(jsi.call_function('f'), 42)
+
+    def test_sub(self):
+        jsi = JSInterpreter('function f(){return 42 - 7;}')
+        self.assertEqual(jsi.call_function('f'), 35)
+        jsi = JSInterpreter('function f(){return 42 - undefined;}')
+        self.assertTrue(math.isnan(jsi.call_function('f')))
+        jsi = JSInterpreter('function f(){return 42 - null;}')
+        self.assertEqual(jsi.call_function('f'), 42)
+
+    def test_mul(self):
+        jsi = JSInterpreter('function f(){return 42 * 7;}')
+        self.assertEqual(jsi.call_function('f'), 294)
+        jsi = JSInterpreter('function f(){return 42 * undefined;}')
+        self.assertTrue(math.isnan(jsi.call_function('f')))
+        jsi = JSInterpreter('function f(){return 42 * null;}')
+        self.assertEqual(jsi.call_function('f'), 0)
+
+    def test_div(self):
+        jsi = JSInterpreter('function f(a, b){return a / b;}')
+        self.assertTrue(math.isnan(jsi.call_function('f', 0, 0)))
+        self.assertTrue(math.isnan(jsi.call_function('f', JS_Undefined, 1)))
+        self.assertTrue(math.isinf(jsi.call_function('f', 2, 0)))
+        self.assertEqual(jsi.call_function('f', 0, 3), 0)
+
+    def test_mod(self):
+        jsi = JSInterpreter('function f(){return 42 % 7;}')
+        self.assertEqual(jsi.call_function('f'), 0)
+        jsi = JSInterpreter('function f(){return 42 % 0;}')
+        self.assertTrue(math.isnan(jsi.call_function('f')))
+        jsi = JSInterpreter('function f(){return 42 % undefined;}')
+        self.assertTrue(math.isnan(jsi.call_function('f')))
+
+    def test_exp(self):
+        jsi = JSInterpreter('function f(){return 42 ** 2;}')
+        self.assertEqual(jsi.call_function('f'), 1764)
+        jsi = JSInterpreter('function f(){return 42 ** undefined;}')
+        self.assertTrue(math.isnan(jsi.call_function('f')))
+        jsi = JSInterpreter('function f(){return 42 ** null;}')
+        self.assertEqual(jsi.call_function('f'), 1)
+        jsi = JSInterpreter('function f(){return undefined ** 42;}')
+        self.assertTrue(math.isnan(jsi.call_function('f')))
+
     def test_empty_return(self):
         jsi = JSInterpreter('function f(){return; y()}')
         self.assertEqual(jsi.call_function('f'), None)
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index d41d708a0..e7bce9d68 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -151,6 +151,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
         'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
     ),
+    (
+        'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
+        'E2AQVN6y_zM7uN9w8z', '9A2dbY5GDZrt9A',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index dc580943e..9d4a5bc57 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -82,7 +82,7 @@ def _js_arith_op(op):
 
 
 def _js_div(a, b):
-    if JS_Undefined in (a, b) or not (a and b):
+    if JS_Undefined in (a, b) or not (a or b):
         return _NaN
     return operator.truediv(a or 0, b) if b else float('inf')
 

From ff75c300f52321dc7322e28d1df153cf0ea65a6d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 17 Jun 2023 15:34:11 +0100
Subject: [PATCH 077/156] [jsinterp] Fix test for failed match in
 extract_object()

---
 youtube_dl/jsinterp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 9d4a5bc57..c18c4fef1 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -985,9 +985,9 @@ class JSInterpreter(object):
                 \((?P<args>[^)]*)\)\s*
                 (?P<code>{.+})''' % {'name': re.escape(funcname)},
             self.code)
-        code, _ = self._separate_at_paren(func_m.group('code'))  # refine the match
         if func_m is None:
             raise self.Exception('Could not find JS function "{funcname}"'.format(**locals()))
+        code, _ = self._separate_at_paren(func_m.group('code'))  # refine the match
         return self.build_arglist(func_m.group('args')), code
 
     def extract_function(self, funcname):

From d6433cbb2c4440056a38846e35bb5a3efa9bcac2 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 17 Jun 2023 15:43:10 +0100
Subject: [PATCH 078/156] [jsinterp] Don't find unrelated objects

---
 youtube_dl/jsinterp.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index c18c4fef1..00f219440 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -941,15 +941,15 @@ class JSInterpreter(object):
         _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
         obj = {}
         obj_m = re.search(
-            r'''(?x)
-                (?<!this\.)%s\s*=\s*{\s*
-                    (?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
-                }\s*;
-            ''' % (re.escape(objname), _FUNC_NAME_RE),
+            r'''(?xs)
+                (?:{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
+                    (?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
+                }}\s*);
+            '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
             self.code)
-        if not obj_m:
+        fields = obj_m and obj_m.group('fields')
+        if fields is None:
             raise self.Exception('Could not find object ' + objname)
-        fields = obj_m.group('fields')
         # Currently, it only supports function definitions
         fields_m = re.finditer(
             r'''(?x)

From ae8ba2c31977b68b75221f80c488c0b12385269c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 17 Jun 2023 15:36:39 +0100
Subject: [PATCH 079/156] [YouTube] Fix `KeyError QV` in signature extraction
 failed * temporarily force missing global definition into sig JS * improve
 test: thanks
 https://github.com/yt-dlp/yt-dlp/issues/7327#issuecomment-1595274615 *
 resolves #32314

---
 test/test_youtube_signature.py  | 7 ++++++-
 youtube_dl/extractor/youtube.py | 6 +++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index e7bce9d68..4ba586e53 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -63,6 +63,11 @@ _SIG_TESTS = [
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
         '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
         '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
+    ),
+    (
+        'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
+        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+        'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
     )
 ]
 
@@ -231,7 +236,7 @@ def n_sig(jscode, sig_input):
 
 
 make_sig_test = t_factory(
-    'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
+    'signature', signature, re.compile(r'(?s).*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$'))
 for test_spec in _SIG_TESTS:
     make_sig_test(*test_spec)
 
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 0411c49f1..0bbce71a3 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1569,8 +1569,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
             jscode, 'Initial JS player signature function name', group='sig')
 
-        jsi = JSInterpreter(jscode)
+        # temporary (please) hack for player 6ed0d907 #32314
+        ah = 'var AH={LR:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c},QV:function(a){a.reverse()},pO:function(a,b){a.splice(0,b)}};'
+        jsi = JSInterpreter(ah + jscode)
+
         initial_function = jsi.extract_function(funcname)
+
         return lambda s: initial_function([s])
 
     def _decrypt_signature(self, s, video_id, player_url):

From 07af47960f3bb262ead02490ce65c8c45c01741e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 18 Jun 2023 00:52:18 +0100
Subject: [PATCH 080/156] [YouTube] Improve fix for ae8ba2c Thx:
 https://github.com/yt-dlp/yt-dlp/commit/01aba25

---
 youtube_dl/extractor/youtube.py |  4 +---
 youtube_dl/jsinterp.py          | 21 ++++++++++++---------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 0bbce71a3..1855fca7f 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1569,9 +1569,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
             jscode, 'Initial JS player signature function name', group='sig')
 
-        # temporary (please) hack for player 6ed0d907 #32314
-        ah = 'var AH={LR:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c},QV:function(a){a.reverse()},pO:function(a,b){a.splice(0,b)}};'
-        jsi = JSInterpreter(ah + jscode)
+        jsi = JSInterpreter(jscode)
 
         initial_function = jsi.extract_function(funcname)
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 00f219440..1ba9c3d67 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -940,15 +940,18 @@ class JSInterpreter(object):
     def extract_object(self, objname):
         _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
         obj = {}
-        obj_m = re.search(
-            r'''(?xs)
-                (?:{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
-                    (?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
-                }}\s*);
-            '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
-            self.code)
-        fields = obj_m and obj_m.group('fields')
-        if fields is None:
+        fields = None
+        for obj_m in re.finditer(
+                r'''(?xs)
+                    {0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
+                        (?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
+                    }}\s*;
+                '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
+                self.code):
+            fields = obj_m.group('fields')
+            if fields:
+                break
+        else:
             raise self.Exception('Could not find object ' + objname)
         # Currently, it only supports function definitions
         fields_m = re.finditer(

From 9112e668a5ea6376017718db9ff13b369d53ad7a Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 22 Jun 2023 13:23:31 +0530
Subject: [PATCH 081/156] [YouTube] Improve nsig function name extraction

Fixes player b7910ca8, using `,` vs `;`
See https://github.com/ytdl-org/youtube-dl/issues/32292#issuecomment-1602231170

Co-authored-by: dirkf
---
 test/test_youtube_signature.py  | 11 +++--------
 youtube_dl/extractor/youtube.py | 19 +++++++++++++------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 4ba586e53..5dcabaf95 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -63,11 +63,6 @@ _SIG_TESTS = [
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
         '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
         '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
-    ),
-    (
-        'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
-        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
-        'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
     )
 ]
 
@@ -157,8 +152,8 @@ _NSIG_TESTS = [
         'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
     ),
     (
-        'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
-        'E2AQVN6y_zM7uN9w8z', '9A2dbY5GDZrt9A',
+        'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
+        '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
     ),
 ]
 
@@ -236,7 +231,7 @@ def n_sig(jscode, sig_input):
 
 
 make_sig_test = t_factory(
-    'signature', signature, re.compile(r'(?s).*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$'))
+    'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
 for test_spec in _SIG_TESTS:
     make_sig_test(*test_spec)
 
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 1855fca7f..24e2efbd9 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1623,15 +1623,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
         if not idx:
             return nfunc
+
+        VAR_RE_TMPL = r'var\s+%s\s*=\s*(?P<name>\[(?P<alias>%s)\])[;,]'
+        note = 'Initial JS player n function {0} (%s[%s])' % (nfunc, idx)
+
+        def search_function_code(needle, group):
+            return self._search_regex(
+                VAR_RE_TMPL % (re.escape(nfunc), needle), jscode,
+                note.format(group), group=group)
+
         if int_or_none(idx) == 0:
-            real_nfunc = self._search_regex(
-                r'var %s\s*=\s*\[([a-zA-Z_$][\w$]*)\];' % (re.escape(nfunc), ), jscode,
-                'Initial JS player n function alias ({nfunc}[{idx}])'.format(**locals()))
+            real_nfunc = search_function_code(r'[a-zA-Z_$][\w$]*', group='alias')
             if real_nfunc:
                 return real_nfunc
-        return self._parse_json(self._search_regex(
-            r'var %s\s*=\s*(\[.+?\]);' % (re.escape(nfunc), ), jscode,
-            'Initial JS player n function name ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
+        return self._parse_json(
+            search_function_code('.+?', group='name'),
+            nfunc, transform_source=js_to_json)[int(idx)]
 
     def _extract_n_function(self, video_id, player_url):
         player_id = self._extract_player_info(player_url)

From ebdc82c58684b4e202fabc046f9a40fc73cccde5 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 22 Jun 2023 17:24:48 +0100
Subject: [PATCH 082/156] [workflows/ci.yml] Replace actions/setup-python for
 legacy Pythons

Thanks MatteoH2O1999: https://github.com/MatteoH2O1999/setup-python
---
 .github/workflows/ci.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 51abdce1d..9be4eaa89 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -38,10 +38,12 @@ jobs:
     steps:
     - uses: actions/checkout@v3
     - name: Set up supported Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
-      if: ${{ matrix.python-impl == 'cpython' && ! contains(fromJSON('["3.3", "3.4"]'), matrix.python-version) }}
+      # wrap broken actions/setup-python@v4
+      uses: ytdl-org/setup-python@v1
       with:
         python-version: ${{ matrix.python-version }}
+        cache-build: true
+        allow-build: info
     - name: Set up Java 8
       if: ${{ matrix.python-impl == 'jython' }}
       uses: actions/setup-java@v2

From fa7f0effbe4e14fcf70e1dc4496371c9862b64b9 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 22 Jun 2023 23:10:04 +0100
Subject: [PATCH 083/156] [YouTube] Avoid crash in author extraction

---
 youtube_dl/extractor/youtube.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 24e2efbd9..9c419c002 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -448,7 +448,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             extract_attributes(self._search_regex(
                 r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')%s\2[^>]*>)'''
                 % re.escape(var_name),
-                get_element_by_attribute('itemprop', 'author', webpage) or '',
+                get_element_by_attribute('itemprop', 'author', webpage or '') or '',
                 'author link', default='')),
             paths[var_name][0])
 

From 58fc5bde47215d9e7c60647dd21202a254b3b066 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 23 Jun 2023 00:15:06 +0100
Subject: [PATCH 084/156] [workflows/ci.yml] Restore test support for Py 3.3,
 3.4, and add 2.6

---
 .github/workflows/ci.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9be4eaa89..4008cc190 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -8,9 +8,7 @@ jobs:
       fail-fast: true
       matrix:
         os: [ubuntu-20.04]
-        # TODO: python 2.6
-        # TODO: restore support for 3.3, 3.4
-        python-version: [2.7, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
+        python-version: [2.6, 2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
         python-impl: [cpython]
         ytdl-test-set: [core, download]
         run-tests-ext: [sh]

From 2500300c2a5986ace34390aa473a8bd51f83622c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 29 Jun 2023 15:27:12 +0100
Subject: [PATCH 085/156] [workflows/ci.yml] Restore test support for Py 3.2

---
 .github/workflows/ci.yml           | 319 +++++++++++++++++++++++++++--
 devscripts/make_lazy_extractors.py |   4 +
 test/test_execution.py             |   8 +-
 test/test_unicode_literals.py      |   1 +
 youtube_dl/__init__.py             |   8 +-
 youtube_dl/compat.py               |  18 +-
 6 files changed, 328 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4008cc190..8d8e654fb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,73 +1,349 @@
 name: CI
-on: [push, pull_request]
+
+env:
+  # add 3.10+ after patching nose (https://github.com/nose-devs/nose/issues/1099)
+  # or switching to fork of https://github.com/mdmintz/pynose
+  all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9
+  main-cpython-versions: 2.7, 3.2, 3.5, 3.9
+  pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7
+  cpython-versions: all
+  # test-set: both
+  test-set: core
+
+on:
+  push:
+  pull_request:
+  workflow_dispatch:
+    inputs:
+      cpython-versions:
+        type: choice
+        description: CPython versions (main = 2.7, 3.2, 3.5, 3.9)
+        options:
+          - all
+          - main
+        required: true
+        default: main
+      test-set:
+        type: choice
+        description: core, download
+        options:
+          - both
+          - core
+          - download
+        required: true
+        default: core
+
+permissions:
+  contents: read
+
 jobs:
+  select:
+    name: Select tests from inputs
+    runs-on: ubuntu-latest
+    outputs:
+      cpython-versions: ${{ steps.run.outputs.cpython-versions }}
+      test-set: ${{ steps.run.outputs.test-set }}
+      own-pip-versions: ${{ steps.run.outputs.own-pip-versions }}
+    steps:
+    - id: run
+      run: |
+        # Make a JSON Array from comma/space-separated string (no extra escaping)
+        json_list() { \
+          ret=""; IFS="${IFS},"; set -- $*; \
+          for a in "$@"; do \
+            ret=$(printf '%s"%s"' "${ret}${ret:+, }" "$a"); \
+          done; \
+          printf '[%s]' "$ret"; }
+        tests="${{ inputs.test-set || env.test-set }}"
+        [ $tests = both ] && tests="core download"
+        printf 'test-set=%s\n' "$(json_list $tests)" >> "$GITHUB_OUTPUT"
+        versions="${{ inputs.cpython-versions || env.cpython-versions }}"
+        if [ "$versions" = all ]; then \
+          versions="${{ env.all-cpython-versions }}"; else \
+          versions="${{ env.main-cpython-versions }}"; \
+        fi
+        printf 'cpython-versions=%s\n' \
+          "$(json_list ${versions}${versions:+, }${{ env.pypy-versions }})" >> "$GITHUB_OUTPUT"
+        # versions with a special get-pip.py in a per-version subdirectory
+        printf 'own-pip-versions=%s\n' \
+          "$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT"
+
   tests:
-    name: Tests
+    name: Run tests
+    needs: select
+    permissions:
+      contents: read
+      packages: write
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: true
       matrix:
         os: [ubuntu-20.04]
-        python-version: [2.6, 2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
+        # outside steps, use github.env...., not env....
+        python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }}
         python-impl: [cpython]
-        ytdl-test-set: [core, download]
+        ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }}
         run-tests-ext: [sh]
         include:
-        # python 3.2 is only available on windows via setup-python
         - os: windows-2019
           python-version: 3.2
           python-impl: cpython
-          ytdl-test-set: core
+          ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
           run-tests-ext: bat
         - os: windows-2019
           python-version: 3.2
           python-impl: cpython
-          ytdl-test-set: download
+          ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download'  || 'nodownload' }}
           run-tests-ext: bat
         # jython
         - os: ubuntu-20.04
           python-impl: jython
-          ytdl-test-set: core
+          ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
           run-tests-ext: sh
         - os: ubuntu-20.04
           python-impl: jython
-          ytdl-test-set: download
+          ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download'  || 'nodownload' }}
           run-tests-ext: sh
     steps:
-    - uses: actions/checkout@v3
+    - name: Checkout
+      uses: actions/checkout@v3
+    #-------- Python 3 -----
     - name: Set up supported Python ${{ matrix.python-version }}
+      id: setup-python
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7'}}
       # wrap broken actions/setup-python@v4
       uses: ytdl-org/setup-python@v1
       with:
         python-version: ${{ matrix.python-version }}
         cache-build: true
         allow-build: info
+    - name: Locate supported Python ${{ matrix.python-version }}
+      if: ${{ env.pythonLocation }}
+      shell: bash
+      run: |
+        echo "PYTHONHOME=${pythonLocation}" >> "$GITHUB_ENV"
+        export expected="${{ steps.setup-python.outputs.python-path }}"
+        dirname() { printf '%s\n' \
+            'import os, sys' \
+            'print(os.path.dirname(sys.argv[1]))' \
+            | ${expected} - "$1"; }
+        expd="$(dirname "$expected")"
+        export python="$(command -v python)"
+        [ "$expd" = "$(dirname "$python")" ] || echo "PATH=$expd:${PATH}" >> "$GITHUB_ENV"
+        [ -x "$python" ] || printf '%s\n' \
+            'import os' \
+            'exp = os.environ["expected"]' \
+            'python = os.environ["python"]' \
+            'exps = os.path.split(exp)' \
+            'if python and (os.path.dirname(python) == exp[0]):' \
+            '    exit(0)' \
+            'exps[1] = "python" + os.path.splitext(exps[1])[1]' \
+            'python = os.path.join(*exps)' \
+            'try:' \
+            '    os.symlink(exp, python)' \
+            'except AttributeError:' \
+            '    os.rename(exp, python)' \
+            | ${expected} -
+        printf '%s\n' \
+            'import sys' \
+            'print(sys.path)' \
+            | ${expected} -
+    #-------- Python 2.7 --
+    - name: Set up Python 2.7
+      if: ${{ matrix.python-version == '2.7' }}
+      # install 2.7
+      run: |
+        sudo apt-get install -y python2 python-is-python2
+        echo "PYTHONHOME=/usr" >> "$GITHUB_ENV"
+    #-------- Python 2.6 --
+    - name: Set up Python 2.6 environment
+      if: ${{ matrix.python-version == '2.6' }}
+      run: |
+        openssl_name=openssl-1.0.2u
+        echo "openssl_name=${openssl_name}" >> "$GITHUB_ENV"
+        openssl_dir=$HOME/.local/opt/$openssl_name
+        echo "openssl_dir=${openssl_dir}" >> "$GITHUB_ENV"
+        PYENV_ROOT=$HOME/.local/share/pyenv
+        echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV"
+        sudo apt-get install -y openssl ca-certificates
+    - name: Cache Python 2.6
+      id: cache26
+      if: ${{ matrix.python-version == '2.6' }}
+      uses: actions/cache@v3
+      with:
+        key: python-2.6.9
+        path: |
+          ${{ env.openssl_dir }}
+          ${{ env.PYENV_ROOT }}
+    - name: Build and set up Python 2.6
+      if: ${{ matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }}
+      # dl and build locally
+      run: |
+        # Install build environment
+        sudo apt-get install -y build-essential llvm libssl-dev tk-dev  \
+                      libncursesw5-dev libreadline-dev libsqlite3-dev   \
+                      libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev
+        # Download and install OpenSSL 1.0.2, back in time
+        openssl_name=${{ env.openssl_name }}
+        openssl_targz=${openssl_name}.tar.gz
+        openssl_dir=${{ env.openssl_dir }}
+        openssl_inc=$openssl_dir/include
+        openssl_lib=$openssl_dir/lib
+        openssl_ssl=$openssl_dir/ssl
+        curl -L "https://www.openssl.org/source/$openssl_targz" -o $openssl_targz
+        tar -xf $openssl_targz
+        ( cd $openssl_name; \
+          ./config --prefix=$openssl_dir --openssldir=${openssl_dir}/ssl \
+            --libdir=lib -Wl,-rpath=${openssl_dir}/lib shared zlib-dynamic && \
+          make && \
+          make install )
+        rm -rf $openssl_name
+        rmdir $openssl_ssl/certs && ln -s /etc/ssl/certs $openssl_ssl/certs
+
+        # Download PyEnv from its GitHub repository.
+        export PYENV_ROOT=${{ env.PYENV_ROOT }}
+        export PATH=$PYENV_ROOT/bin:$PATH
+        git clone https://github.com/pyenv/pyenv.git $PYENV_ROOT
+        eval "$(pyenv init --path)"
+
+        # Prevent pyenv build trying (and failing) to update pip
+        export GET_PIP=get-pip-2.6.py
+        echo 'import sys; sys.exit(0)' > ${GET_PIP}
+        GET_PIP=$(realpath $GET_PIP)
+
+        # Build and install Python
+        export CFLAGS="-I$openssl_inc"
+        export LDFLAGS="-L$openssl_lib"
+        export LD_LIBRARY_PATH="$openssl_lib"
+        pyenv install 2.6.9
+        echo "PYTHONHOME=${PYENV_ROOT}" >> "$GITHUB_ENV"
+        echo "PATH=$PYENV_ROOT/bin:$PATH" >> "$GITHUB_ENV"
+    - name: Set up cached Python 2.6
+      if: ${{ steps.cache26.outputs.cache-hit }}
+      run: |
+        export PYENV_ROOT
+        export PATH=$PYENV_ROOT/bin:$PATH
+        eval "$(pyenv init --path)"
+        pyenv local 2.6.9
+        echo "PYTHONHOME=${PYENV_ROOT}" >> "$GITHUB_ENV"
+        echo "PATH=$PYENV_ROOT/bin:$PATH" >> "$GITHUB_ENV"
+    #-------- Jython ------
     - name: Set up Java 8
       if: ${{ matrix.python-impl == 'jython' }}
       uses: actions/setup-java@v2
       with:
         java-version: 8
         distribution: 'zulu'
-    - name: Install Jython
+    - name: Setup Jython environment
       if: ${{ matrix.python-impl == 'jython' }}
       run: |
-        wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
-        java -jar jython-installer.jar -s -d "$HOME/jython"
-        echo "$HOME/jython/bin" >> $GITHUB_PATH
-    - name: Install nose
-      if: ${{ matrix.python-impl != 'jython' }}
-      run: pip install nose
+        echo "JYTHON_ROOT=${HOME}/jython" >> "$GITHUB_ENV"
+    - name: Cache Jython
+      id: cachejy
+      if: ${{ matrix.python-impl == 'jython' }}
+      uses: actions/cache@v3
+      with:
+        # 2.7.3 now available, may solve SNI issue
+        key: jython-2.7.1
+        path: |
+          ${{ env.JYTHON_ROOT }}
+    - name: Install Jython
+      if: ${{ matrix.python-impl == 'jython' && ! steps.cachejy.outputs.cache-hit }}
+      run: |
+        JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
+        curl -L "https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar" -o jython-installer.jar
+        java -jar jython-installer.jar -s -d "${JYTHON_ROOT}"
+        echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH
+    - name: Set up cached Jython
+      if: ${{ steps.cachejy.outputs.cache-hit }}
+      run: |
+        JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
+        echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH
+    #-------- pip ---------
+    - name: Set up supported Python ${{ matrix.python-version }} pip
+      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.6' || matrix.python-version == '2.7' }}
+      # This step may run in either Linux or Windows
+      shell: bash
+      run: |
+        echo "$PATH"
+        echo "$PYTHONHOME"
+        # curl is available on both Windows and Linux, -L follows redirects, -O gets name
+        python -m ensurepip || python -m pip --version || { \
+          get_pip="${{ contains(needs.select.outputs.own-pip-versions, matrix.python-version) && format('{0}/', matrix.python-version) || '' }}"; \
+          curl -L -O "https://bootstrap.pypa.io/pip/${get_pip}get-pip.py"; \
+          python get-pip.py; }
+    - name: Set up other Python ${{ matrix.python-version }} pip
+      if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }}
+      shell: bash
+      run: |
+        # https://files.pythonhosted.org/packages/8a/e9/8468cd68b582b06ef554be0b96b59f59779627131aad48f8a5bce4b13450/wheel-0.29.0-py2.py3-none-any.whl
+        # https://files.pythonhosted.org/packages/06/4b/86a670fd21f7849adb092e40883c48dcd0d66b8a878fc8d63b7f0ea04213/setuptools-29.0.1-py2.py3-none-any.whl
+        python -m pip --version || { \
+          curl -L -O "https://bootstrap.pypa.io/pip/3.2/get-pip.py"; \
+          curl -L -O "https://files.pythonhosted.org/packages/b2/d0/cd115fe345dd6f07ec1c780020a7dfe74966fceeb171e0f20d1d4905b0b7/pip-7.1.2-py2.py3-none-any.whl"; \
+          python -v get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; }
+
+    #-------- nose --------
+    - name: Install nose for Python ${{ matrix.python-version }}
+      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.6' || matrix.python-version == '2.7' }}
+      shell: bash
+      run: |
+        echo "$PATH"
+        echo "$PYTHONHOME"
+        python --version
+        python -m pip --version
+        python -m pip nose --version || python -m pip install nose
+    - name: Install nose for other Python ${{ matrix.python-version }}
+      if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }}
+      shell: bash
+      run: |
+        python -m pip nose --version || { \
+          curl -L -O "https://files.pythonhosted.org/packages/15/d8/dd071918c040f50fa1cf80da16423af51ff8ce4a0f2399b7bf8de45ac3d9/nose-1.3.7-py3-none-any.whl"; \
+          python --version; \
+          printf '%s\n' \
+            'import sys' \
+            'print(sys.path)' \
+            | python -; \
+          python -m pip --version; \
+          python -m pip install nose-1.3.7-py3-none-any.whl; }
     - name: Install nose (Jython)
       if: ${{ matrix.python-impl == 'jython' }}
-      # Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
+      # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
       run: |
-        wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl
-        pip install nose-1.3.7-py2-none-any.whl
+        pip nose --version || { \
+          curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \
+          pip --version; \
+          pip install nose-1.3.7-py2-none-any.whl; }
+    - name: Set up nosetest test
+      if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }}
+      shell: bash
+      run: |
+        # define a test to validate the Python version used by nosetests
+        printf '%s\n' \
+          'from __future__ import unicode_literals' \
+          'import sys, os, platform, unittest' \
+          'class TestPython(unittest.TestCase):' \
+          '    def setUp(self):' \
+          '        self.ver = os.environ["PYTHON_VER"].split("-")' \
+          '    def test_python_ver(self):' \
+          '        self.assertEqual(sys.version[:3], self.ver[-1])' \
+          '        self.assertTrue(sys.version.startswith(self.ver[-1]))' \
+          '        self.assertIn(self.ver[0], sys.version.lower())' \
+          '    def test_python_impl(self):' \
+          '        self.assertIn(platform.python_implementation().lower(), (os.environ["PYTHON_IMPL"], self.ver[0]))' \
+          > test/test_python.py
+    #-------- TESTS -------
     - name: Run tests
+      if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }}
       continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
       env:
         YTDL_TEST_SET: ${{ matrix.ytdl-test-set }}
-      run: ./devscripts/run_tests.${{ matrix.run-tests-ext }}
+        PYTHON_VER: ${{ matrix.python-version }}
+        PYTHON_IMPL: ${{ matrix.python-impl }}
+
+      run: |
+        ./devscripts/run_tests.${{ matrix.run-tests-ext }}
+
   flake8:
     name: Linter
     runs-on: ubuntu-latest
@@ -81,3 +357,4 @@ jobs:
       run: pip install flake8
     - name: Run flake8
       run: flake8 .
+
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index edc19183d..4bddca047 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -6,6 +6,10 @@ import os
 from os.path import dirname as dirn
 import sys
 
+from youtube_dl.compat import compat_register_utf8
+
+compat_register_utf8()
+
 print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
 
 sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
diff --git a/test/test_execution.py b/test/test_execution.py
index 704e14612..1dee53a0f 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -10,10 +10,13 @@ import os
 import subprocess
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+from youtube_dl.compat import compat_register_utf8
+
 from youtube_dl.utils import encodeArgument
 
 rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
+compat_register_utf8()
 
 try:
     _DEV_NULL = subprocess.DEVNULL
@@ -25,13 +28,14 @@ class TestExecution(unittest.TestCase):
     def test_import(self):
         subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir)
 
+    @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution')
     def test_module_exec(self):
-        if sys.version_info >= (2, 7):  # Python 2.6 doesn't support package execution
-            subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+        subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL)
 
     def test_main_exec(self):
         subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
 
+    @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution')
     def test_cmdline_umlauts(self):
         p = subprocess.Popen(
             [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'],
diff --git a/test/test_unicode_literals.py b/test/test_unicode_literals.py
index 6c1b7ec91..c7c2252f5 100644
--- a/test/test_unicode_literals.py
+++ b/test/test_unicode_literals.py
@@ -15,6 +15,7 @@ IGNORED_FILES = [
     'setup.py',  # http://bugs.python.org/issue13943
     'conf.py',
     'buildserver.py',
+    'get-pip.py',
 ]
 
 IGNORED_DIRS = [
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index e1bd67919..cc8285eba 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -5,7 +5,6 @@ from __future__ import unicode_literals
 
 __license__ = 'Public Domain'
 
-import codecs
 import io
 import os
 import random
@@ -17,6 +16,7 @@ from .options import (
 )
 from .compat import (
     compat_getpass,
+    compat_register_utf8,
     compat_shlex_split,
     workaround_optparse_bug9161,
 )
@@ -46,10 +46,8 @@ from .YoutubeDL import YoutubeDL
 
 
 def _real_main(argv=None):
-    # Compatibility fixes for Windows
-    if sys.platform == 'win32':
-        # https://github.com/ytdl-org/youtube-dl/issues/820
-        codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
+    # Compatibility fix for Windows
+    compat_register_utf8()
 
     workaround_optparse_bug9161()
 
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index fe62caf80..0f4d3756f 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -31,13 +31,17 @@ try:
     compat_str, compat_basestring, compat_chr = (
         unicode, basestring, unichr
     )
-    from .casefold import casefold as compat_casefold
-
 except NameError:
     compat_str, compat_basestring, compat_chr = (
         str, str, chr
     )
+
+# casefold
+try:
+    compat_str.casefold
     compat_casefold = lambda s: s.casefold()
+except AttributeError:
+    from .casefold import casefold as compat_casefold
 
 try:
     import collections.abc as compat_collections_abc
@@ -3137,6 +3141,15 @@ else:
     compat_open = open
 
 
+# compat_register_utf8
+def compat_register_utf8():
+    if sys.platform == 'win32':
+        # https://github.com/ytdl-org/youtube-dl/issues/820
+        from codecs import register, lookup
+        register(
+            lambda name: lookup('utf-8') if name == 'cp65001' else None)
+
+
 legacy = [
     'compat_HTMLParseError',
     'compat_HTMLParser',
@@ -3203,6 +3216,7 @@ __all__ = [
     'compat_print',
     'compat_re_Match',
     'compat_re_Pattern',
+    'compat_register_utf8',
     'compat_setenv',
     'compat_shlex_quote',
     'compat_shlex_split',

From b08a58090635777f1001d5cde2cd141a5565177c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 30 Jun 2023 03:52:39 +0100
Subject: [PATCH 086/156] [workflows/ci.yml] Fix test support for Py 2.6

---
 .github/workflows/ci.yml           | 115 ++++++++++++++++++-----------
 devscripts/make_lazy_extractors.py |   8 +-
 test/test_execution.py             |  16 ++--
 3 files changed, 83 insertions(+), 56 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8d8e654fb..ce878c1b1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -6,9 +6,8 @@ env:
   all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9
   main-cpython-versions: 2.7, 3.2, 3.5, 3.9
   pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7
-  cpython-versions: all
-  # test-set: both
-  test-set: core
+  cpython-versions: main
+  test-set: both
 
 on:
   push:
@@ -75,6 +74,10 @@ jobs:
       contents: read
       packages: write
     runs-on: ${{ matrix.os }}
+    env:
+      PIP: python -m pip
+      PIP_DISABLE_PIP_VERSION_CHECK: true
+      PIP_NO_PYTHON_VERSION_WARNING: true
     strategy:
       fail-fast: true
       matrix:
@@ -152,12 +155,14 @@ jobs:
     - name: Set up Python 2.7
       if: ${{ matrix.python-version == '2.7' }}
       # install 2.7
+      shell: bash
       run: |
         sudo apt-get install -y python2 python-is-python2
         echo "PYTHONHOME=/usr" >> "$GITHUB_ENV"
     #-------- Python 2.6 --
     - name: Set up Python 2.6 environment
       if: ${{ matrix.python-version == '2.6' }}
+      shell: bash
       run: |
         openssl_name=openssl-1.0.2u
         echo "openssl_name=${openssl_name}" >> "$GITHUB_ENV"
@@ -178,6 +183,7 @@ jobs:
     - name: Build and set up Python 2.6
       if: ${{ matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }}
       # dl and build locally
+      shell: bash
       run: |
         # Install build environment
         sudo apt-get install -y build-essential llvm libssl-dev tk-dev  \
@@ -203,8 +209,7 @@ jobs:
         # Download PyEnv from its GitHub repository.
         export PYENV_ROOT=${{ env.PYENV_ROOT }}
         export PATH=$PYENV_ROOT/bin:$PATH
-        git clone https://github.com/pyenv/pyenv.git $PYENV_ROOT
-        eval "$(pyenv init --path)"
+        git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
 
         # Prevent pyenv build trying (and failing) to update pip
         export GET_PIP=get-pip-2.6.py
@@ -216,17 +221,14 @@ jobs:
         export LDFLAGS="-L$openssl_lib"
         export LD_LIBRARY_PATH="$openssl_lib"
         pyenv install 2.6.9
-        echo "PYTHONHOME=${PYENV_ROOT}" >> "$GITHUB_ENV"
-        echo "PATH=$PYENV_ROOT/bin:$PATH" >> "$GITHUB_ENV"
-    - name: Set up cached Python 2.6
-      if: ${{ steps.cache26.outputs.cache-hit }}
+    - name: Locate Python 2.6
+      if: ${{ matrix.python-version == '2.6' }}
+      shell: bash
       run: |
-        export PYENV_ROOT
-        export PATH=$PYENV_ROOT/bin:$PATH
-        eval "$(pyenv init --path)"
-        pyenv local 2.6.9
-        echo "PYTHONHOME=${PYENV_ROOT}" >> "$GITHUB_ENV"
-        echo "PATH=$PYENV_ROOT/bin:$PATH" >> "$GITHUB_ENV"
+        PYTHONHOME="${{ env.PYENV_ROOT }}/versions/2.6.9"
+        echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV"
+        echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV"
+        echo "LD_LIBRARY_PATH=${{ env.openssl_dir }}/lib${LD_LIBRARY_PATH:+:}${LD_LIBRARY_PATH}" >> "$GITHUB_ENV"
     #-------- Jython ------
     - name: Set up Java 8
       if: ${{ matrix.python-impl == 'jython' }}
@@ -236,8 +238,10 @@ jobs:
         distribution: 'zulu'
     - name: Setup Jython environment
       if: ${{ matrix.python-impl == 'jython' }}
+      shell: bash
       run: |
         echo "JYTHON_ROOT=${HOME}/jython" >> "$GITHUB_ENV"
+        echo "PIP=pip" >> "$GITHUB_ENV"
     - name: Cache Jython
       id: cachejy
       if: ${{ matrix.python-impl == 'jython' }}
@@ -249,19 +253,21 @@ jobs:
           ${{ env.JYTHON_ROOT }}
     - name: Install Jython
       if: ${{ matrix.python-impl == 'jython' && ! steps.cachejy.outputs.cache-hit }}
+      shell: bash
       run: |
         JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
         curl -L "https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar" -o jython-installer.jar
         java -jar jython-installer.jar -s -d "${JYTHON_ROOT}"
-        echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH
+        echo "${JYTHON_ROOT}/bin" >> "$GITHUB_PATH"
     - name: Set up cached Jython
       if: ${{ steps.cachejy.outputs.cache-hit }}
+      shell: bash
       run: |
         JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
         echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH
     #-------- pip ---------
     - name: Set up supported Python ${{ matrix.python-version }} pip
-      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.6' || matrix.python-version == '2.7' }}
+      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }}
       # This step may run in either Linux or Windows
       shell: bash
       run: |
@@ -272,48 +278,66 @@ jobs:
           get_pip="${{ contains(needs.select.outputs.own-pip-versions, matrix.python-version) && format('{0}/', matrix.python-version) || '' }}"; \
           curl -L -O "https://bootstrap.pypa.io/pip/${get_pip}get-pip.py"; \
           python get-pip.py; }
+    - name: Set up Python 2.6 pip
+      if: ${{ matrix.python-version == '2.6' }}
+      shell: bash
+      run: |
+        python -m pip --version || { \
+          curl -L -O "https://bootstrap.pypa.io/pip/2.6/get-pip.py"; \
+          curl -L -O "https://files.pythonhosted.org/packages/ac/95/a05b56bb975efa78d3557efa36acaf9cf5d2fd0ee0062060493687432e03/pip-9.0.3-py2.py3-none-any.whl"; \
+          python get-pip.py --no-setuptools --no-wheel pip-9.0.3-py2.py3-none-any.whl; }
+        # work-around to invoke pip module on 2.6: https://bugs.python.org/issue2751
+        echo "PIP=python -m pip.__main__" >> "$GITHUB_ENV"
     - name: Set up other Python ${{ matrix.python-version }} pip
       if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }}
       shell: bash
       run: |
-        # https://files.pythonhosted.org/packages/8a/e9/8468cd68b582b06ef554be0b96b59f59779627131aad48f8a5bce4b13450/wheel-0.29.0-py2.py3-none-any.whl
-        # https://files.pythonhosted.org/packages/06/4b/86a670fd21f7849adb092e40883c48dcd0d66b8a878fc8d63b7f0ea04213/setuptools-29.0.1-py2.py3-none-any.whl
         python -m pip --version || { \
           curl -L -O "https://bootstrap.pypa.io/pip/3.2/get-pip.py"; \
           curl -L -O "https://files.pythonhosted.org/packages/b2/d0/cd115fe345dd6f07ec1c780020a7dfe74966fceeb171e0f20d1d4905b0b7/pip-7.1.2-py2.py3-none-any.whl"; \
-          python -v get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; }
-
+          python get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; }
+    #-------- unittest ----
+    - name: Upgrade Unittest for Python 2.6
+      if: ${{ matrix.python-version == '2.6' }}
+      shell: bash
+      run: |
+        # see pip for Jython
+        $PIP -qq show unittest2 || { \
+          for u in "65/26/32b8464df2a97e6dd1b656ed26b2c194606c16fe163c695a992b36c11cdf/six-1.13.0-py2.py3-none-any.whl" \
+              "f2/94/3af39d34be01a24a6e65433d19e107099374224905f1e0cc6bbe1fd22a2f/argparse-1.4.0-py2.py3-none-any.whl" \
+              "c7/a3/c5da2a44c85bfbb6eebcfc1dde24933f8704441b98fdde6528f4831757a6/linecache2-1.0.0-py2.py3-none-any.whl" \
+              "17/0a/6ac05a3723017a967193456a2efa0aa9ac4b51456891af1e2353bb9de21e/traceback2-1.4.0-py2.py3-none-any.whl" \
+              "72/20/7f0f433060a962200b7272b8c12ba90ef5b903e218174301d0abfd523813/unittest2-1.1.0-py2.py3-none-any.whl"; do \
+            curl -L -O "https://files.pythonhosted.org/packages/${u}"; \
+            $PIP install ${u##*/}; \
+          done; }
+        # make tests use unittest2
+        for test in ./test/test_*.py; do
+          sed -r -i -e '/^import unittest$/s/test/test2 as unittest/' "$test"
+        done
     #-------- nose --------
     - name: Install nose for Python ${{ matrix.python-version }}
-      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.6' || matrix.python-version == '2.7' }}
+      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }}
       shell: bash
       run: |
         echo "$PATH"
         echo "$PYTHONHOME"
-        python --version
-        python -m pip --version
-        python -m pip nose --version || python -m pip install nose
-    - name: Install nose for other Python ${{ matrix.python-version }}
+        $PIP -qq show nose || $PIP install nose
+    - name: Install nose for other Python 2
+      if: ${{ matrix.python-impl == 'jython' || matrix.python-version == '2.6' }}
+      shell: bash
+      run: |
+        # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
+        $PIP -qq show nose || { \
+          curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \
+          $PIP install nose-1.3.7-py2-none-any.whl; }
+    - name: Install nose for other Python 3
       if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }}
       shell: bash
       run: |
-        python -m pip nose --version || { \
+        $PIP -qq show nose || { \
           curl -L -O "https://files.pythonhosted.org/packages/15/d8/dd071918c040f50fa1cf80da16423af51ff8ce4a0f2399b7bf8de45ac3d9/nose-1.3.7-py3-none-any.whl"; \
-          python --version; \
-          printf '%s\n' \
-            'import sys' \
-            'print(sys.path)' \
-            | python -; \
-          python -m pip --version; \
-          python -m pip install nose-1.3.7-py3-none-any.whl; }
-    - name: Install nose (Jython)
-      if: ${{ matrix.python-impl == 'jython' }}
-      # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
-      run: |
-        pip nose --version || { \
-          curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \
-          pip --version; \
-          pip install nose-1.3.7-py2-none-any.whl; }
+          $PIP install nose-1.3.7-py3-none-any.whl; }
     - name: Set up nosetest test
       if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }}
       shell: bash
@@ -321,7 +345,11 @@ jobs:
         # define a test to validate the Python version used by nosetests
         printf '%s\n' \
           'from __future__ import unicode_literals' \
-          'import sys, os, platform, unittest' \
+          'import sys, os, platform' \
+          'try:' \
+          '    import unittest2 as unittest' \
+          'except ImportError:' \
+          '    import unittest' \
           'class TestPython(unittest.TestCase):' \
           '    def setUp(self):' \
           '        self.ver = os.environ["PYTHON_VER"].split("-")' \
@@ -340,7 +368,6 @@ jobs:
         YTDL_TEST_SET: ${{ matrix.ytdl-test-set }}
         PYTHON_VER: ${{ matrix.python-version }}
         PYTHON_IMPL: ${{ matrix.python-impl }}
-
       run: |
         ./devscripts/run_tests.${{ matrix.run-tests-ext }}
 
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index 4bddca047..a8b6ff1b9 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -6,10 +6,6 @@ import os
 from os.path import dirname as dirn
 import sys
 
-from youtube_dl.compat import compat_register_utf8
-
-compat_register_utf8()
-
 print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
 
 sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
@@ -23,6 +19,10 @@ try:
 except OSError:
     pass
 
+from youtube_dl.compat import compat_register_utf8
+
+compat_register_utf8()
+
 from youtube_dl.extractor import _ALL_CLASSES
 from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
 
diff --git a/test/test_execution.py b/test/test_execution.py
index 1dee53a0f..35e7a5651 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -11,13 +11,12 @@ import subprocess
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from youtube_dl.compat import compat_register_utf8
-
 from youtube_dl.utils import encodeArgument
 
-rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-
 compat_register_utf8()
 
+rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
 try:
     _DEV_NULL = subprocess.DEVNULL
 except AttributeError:
@@ -33,21 +32,22 @@ class TestExecution(unittest.TestCase):
         subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL)
 
     def test_main_exec(self):
-        subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+        subprocess.check_call([sys.executable, os.path.normpath('youtube_dl/__main__.py'), '--version'], cwd=rootDir, stdout=_DEV_NULL)
 
     @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution')
     def test_cmdline_umlauts(self):
+        os.environ['PYTHONIOENCODING'] = 'utf-8'
         p = subprocess.Popen(
-            [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'],
+            [sys.executable, os.path.normpath('youtube_dl/__main__.py'), encodeArgument('ä'), '--version'],
             cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
         _, stderr = p.communicate()
         self.assertFalse(stderr)
 
     def test_lazy_extractors(self):
-        lazy_extractors = 'youtube_dl/extractor/lazy_extractors.py'
+        lazy_extractors = os.path.normpath('youtube_dl/extractor/lazy_extractors.py')
         try:
-            subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', lazy_extractors], cwd=rootDir, stdout=_DEV_NULL)
-            subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
+            subprocess.check_call([sys.executable, os.path.normpath('devscripts/make_lazy_extractors.py'), lazy_extractors], cwd=rootDir, stdout=_DEV_NULL)
+            subprocess.check_call([sys.executable, os.path.normpath('test/test_all_urls.py')], cwd=rootDir, stdout=_DEV_NULL)
         finally:
             for x in ['', 'c'] if sys.version_info[0] < 3 else ['']:
                 try:

From f24bc9272e9b74efc4c4af87c862f5f78921d424 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 4 Jul 2023 16:06:21 +0100
Subject: [PATCH 087/156] [Misc] Fixes for 2.6 compatibility

---
 test/test_jsinterp.py   | 10 ++++++----
 test/test_utils.py      |  2 +-
 youtube_dl/YoutubeDL.py |  6 +++++-
 youtube_dl/compat.py    | 12 ++++++++++++
 youtube_dl/jsinterp.py  | 13 ++++++++++++-
 youtube_dl/utils.py     |  3 ++-
 6 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index ecd6ab3c9..91b12f544 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -492,10 +492,12 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('''
         function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
         ''')
-        attrs = set(('findall', 'finditer', 'flags', 'groupindex',
-                     'groups', 'match', 'pattern', 'scanner',
-                     'search', 'split', 'sub', 'subn'))
-        self.assertTrue(set(dir(jsi.call_function('x'))) > attrs)
+        attrs = set(('findall', 'finditer', 'match', 'scanner', 'search',
+                     'split', 'sub', 'subn'))
+        if sys.version_info >= (2, 7):
+            # documented for 2.6 but may not be found
+            attrs.update(('flags', 'groupindex', 'groups', 'pattern'))
+        self.assertSetEqual(set(dir(jsi.call_function('x'))) & attrs, attrs)
 
         jsi = JSInterpreter('''
         function x() { let a=/,,[/,913,/](,)}/i; return a; }
diff --git a/test/test_utils.py b/test/test_utils.py
index b85d397d0..5fab05f7c 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1612,7 +1612,7 @@ Line 1
         self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
                          [_TEST_DATA['urls']],
                          msg='function as query key should perform a filter based on (key, value)')
-        self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), {'str'},
+        self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), ('str',),
                               msg='exceptions in the query function should be caught')
 
         # Test alternative paths
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 98b878fc1..068029d3e 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -25,7 +25,11 @@ import tokenize
 import traceback
 import random
 
-from ssl import OPENSSL_VERSION
+try:
+    from ssl import OPENSSL_VERSION
+except ImportError:
+    # Must be Python 2.6, should be built against 1.0.2
+    OPENSSL_VERSION = 'OpenSSL 1.0.2(?)'
 from string import ascii_letters
 
 from .compat import (
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 0f4d3756f..2554fd1c3 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -1,10 +1,12 @@
 # coding: utf-8
 from __future__ import unicode_literals
+from __future__ import division
 
 import base64
 import binascii
 import collections
 import ctypes
+import datetime
 import email
 import getpass
 import io
@@ -3150,6 +3152,15 @@ def compat_register_utf8():
             lambda name: lookup('utf-8') if name == 'cp65001' else None)
 
 
+# compat_datetime_timedelta_total_seconds
+try:
+    compat_datetime_timedelta_total_seconds = datetime.timedelta.total_seconds
+except AttributeError:
+    # Py 2.6
+    def compat_datetime_timedelta_total_seconds(td):
+        return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
+
+
 legacy = [
     'compat_HTMLParseError',
     'compat_HTMLParser',
@@ -3187,6 +3198,7 @@ __all__ = [
     'compat_chr',
     'compat_collections_abc',
     'compat_collections_chain_map',
+    'compat_datetime_timedelta_total_seconds',
     'compat_http_cookiejar',
     'compat_http_cookiejar_Cookie',
     'compat_http_cookies',
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 1ba9c3d67..882432b80 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -277,9 +277,20 @@ class JSInterpreter(object):
 
         def __getattr__(self, name):
             self.__instantiate()
+            # make Py 2.6 conform to its lying documentation
+            if name == 'flags':
+                self.flags = self.__flags
+            elif name == 'pattern':
+                self.pattern = self.__pattern_txt
+            elif name in ('groupindex', 'groups'):
+                # in case these get set after a match?
+                if hasattr(self.__self, name):
+                    setattr(self, name, getattr(self.__self, name))
+                else:
+                    return 0 if name == 'groupindex' else {}
             if hasattr(self, name):
                 return getattr(self, name)
-            return super(JSInterpreter.JS_RegExp, self).__getattr__(name)
+            raise AttributeError('{0} has no attribute named {1}'.format(self, name))
 
         @classmethod
         def regex_flags(cls, expr):
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 584581b6a..83f67bd95 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -47,6 +47,7 @@ from .compat import (
     compat_collections_abc,
     compat_cookiejar,
     compat_ctypes_WINFUNCTYPE,
+    compat_datetime_timedelta_total_seconds,
     compat_etree_fromstring,
     compat_expanduser,
     compat_html_entities,
@@ -3102,7 +3103,7 @@ def unified_timestamp(date_str, day_first=True):
             pass
     timetuple = email.utils.parsedate_tz(date_str)
     if timetuple:
-        return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
+        return calendar.timegm(timetuple) + pm_delta * 3600 - compat_datetime_timedelta_total_seconds(timezone)
 
 
 def determine_ext(url, default_ext='unknown_video'):

From b6dff4073d469cceadb099c00ccbf3bd6fc515a6 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 7 Jul 2023 18:41:32 +0100
Subject: [PATCH 088/156] [core] Revert version display from b8a86dc

---
 youtube_dl/YoutubeDL.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 068029d3e..4e7fd1063 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -2378,10 +2378,12 @@ class YoutubeDL(object):
                 self.get_encoding()))
         write_string(encoding_str, encoding=None)
 
-        self._write_string('[debug] youtube-dl version ' + __version__ + (' (single file build)\n' if ytdl_is_updateable() else '\n'))
+        writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))
+        writeln_debug('youtube-dl version ', __version__)
         if _LAZY_LOADER:
-            self._write_string('[debug] Lazy loading extractors enabled\n')
-        writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))  # moved down for easier merge
+            writeln_debug('Lazy loading extractors enabled')
+        if ytdl_is_updateable():
+            writeln_debug('Single file build')
         try:
             sp = subprocess.Popen(
                 ['git', 'rev-parse', '--short', 'HEAD'],

From f47fdb9564d3ca1c0fa70ed6031148ec908fdc7b Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 6 Jul 2023 15:46:22 +0100
Subject: [PATCH 089/156] [utils] Add {expected_type} and Iterable support to
 traverse_obj()

---
 test/test_utils.py  | 153 ++++++++++++++++++++++++++------
 youtube_dl/utils.py | 211 +++++++++++++++++++++++++++++---------------
 2 files changed, 265 insertions(+), 99 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index 5fab05f7c..1fc16ed05 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -79,10 +79,12 @@ from youtube_dl.utils import (
     rot47,
     shell_quote,
     smuggle_url,
+    str_or_none,
     str_to_int,
     strip_jsonp,
     strip_or_none,
     subtitles_filename,
+    T,
     timeconvert,
     traverse_obj,
     try_call,
@@ -1566,6 +1568,7 @@ Line 1
         self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
 
     def test_traverse_obj(self):
+        str = compat_str
         _TEST_DATA = {
             100: 100,
             1.2: 1.2,
@@ -1598,8 +1601,8 @@ Line 1
 
         # Test Ellipsis behavior
         self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
-                              (item for item in _TEST_DATA.values() if item is not None),
-                              msg='`...` should give all values except `None`')
+                              (item for item in _TEST_DATA.values() if item not in (None, {})),
+                              msg='`...` should give all non discarded values')
         self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
                               msg='`...` selection for dicts should select all values')
         self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
@@ -1607,13 +1610,51 @@ Line 1
                          msg='nested `...` queries should work')
         self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), range(4),
                               msg='`...` query result should be flattened')
+        self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)),
+                         msg='`...` should accept iterables')
 
         # Test function as key
         self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
                          [_TEST_DATA['urls']],
                          msg='function as query key should perform a filter based on (key, value)')
-        self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), ('str',),
-                              msg='exceptions in the query function should be caught')
+        self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'},
+                              msg='exceptions in the query function should be catched')
+        self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
+                         msg='function key should accept iterables')
+        if __debug__:
+            with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
+                traverse_obj(_TEST_DATA, lambda a: Ellipsis)
+            with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
+                traverse_obj(_TEST_DATA, lambda a, b, c: Ellipsis)
+
+        # Test set as key (transformation/type, like `expected_type`)
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper), )), ['STR'],
+                         msg='Function in set should be a transformation')
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str))), ['str'],
+                         msg='Type in set should be a type filter')
+        self.assertEqual(traverse_obj(_TEST_DATA, T(dict)), _TEST_DATA,
+                         msg='A single set should be wrapped into a path')
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper))), ['STR'],
+                         msg='Transformation function should not raise')
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str_or_none))),
+                         [item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
+                         msg='Function in set should be a transformation')
+        if __debug__:
+            with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
+                traverse_obj(_TEST_DATA, set())
+            with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
+                traverse_obj(_TEST_DATA, {str.upper, str})
+
+        # Test `slice` as a key
+        _SLICE_DATA = [0, 1, 2, 3, 4]
+        self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
+                         msg='slice on a dictionary should not throw')
+        self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
+                         msg='slice key should apply slice to sequence')
+        self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
+                         msg='slice key should apply slice to sequence')
+        self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
+                         msg='slice key should apply slice to sequence')
 
         # Test alternative paths
         self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
@@ -1659,15 +1700,23 @@ Line 1
                          {0: ['https://www.example.com/1', 'https://www.example.com/0']},
                          msg='triple nesting in dict path should be treated as branches')
         self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
-                         msg='remove `None` values when dict key')
+                         msg='remove `None` values when top level dict key fails')
         self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
-                         msg='do not remove `None` values if `default`')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {0: {}},
-                         msg='do not remove empty values when dict key')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: {}},
-                         msg='do not remove empty values when dict key and a default')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {0: []},
-                         msg='if branch in dict key not successful, return `[]`')
+                         msg='use `default` if key fails and `default`')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
+                         msg='remove empty values when dict key')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis},
+                         msg='use `default` when dict key and `default`')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
+                         msg='remove empty values when nested dict key fails')
+        self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
+                         msg='default to dict if pruned')
+        self.assertEqual(traverse_obj(None, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
+                         msg='default to dict if pruned and default is given')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=Ellipsis), {0: {0: Ellipsis}},
+                         msg='use nested `default` when nested dict key fails and `default`')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {},
+                         msg='remove key if branch in dict key not successful')
 
         # Testing default parameter behavior
         _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
@@ -1691,20 +1740,55 @@ Line 1
                          msg='if branched but not successful return `[]`, not `default`')
         self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
                          msg='if branched but object is empty return `[]`, not `default`')
+        self.assertEqual(traverse_obj(None, Ellipsis), [],
+                         msg='if branched but object is `None` return `[]`, not `default`')
+        self.assertEqual(traverse_obj({0: None}, (0, Ellipsis)), [],
+                         msg='if branched but state is `None` return `[]`, not `default`')
+
+        branching_paths = [
+            ('fail', Ellipsis),
+            (Ellipsis, 'fail'),
+            100 * ('fail',) + (Ellipsis,),
+            (Ellipsis,) + 100 * ('fail',),
+        ]
+        for branching_path in branching_paths:
+            self.assertEqual(traverse_obj({}, branching_path), [],
+                             msg='if branched but state is `None`, return `[]` (not `default`)')
+            self.assertEqual(traverse_obj({}, 'fail', branching_path), [],
+                             msg='if branching in last alternative and previous did not match, return `[]` (not `default`)')
+            self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x',
+                             msg='if branching in last alternative and previous did match, return single value')
+            self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x',
+                             msg='if branching in first alternative and non-branching path does match, return single value')
+            self.assertEqual(traverse_obj({}, branching_path, 'fail'), None,
+                             msg='if branching in first alternative and non-branching path does not match, return `default`')
 
         # Testing expected_type behavior
         _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
-        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=compat_str), 'str',
-                         msg='accept matching `expected_type` type')
-        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), None,
-                         msg='reject non matching `expected_type` type')
-        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: compat_str(x)), '0',
-                         msg='transform type using type function')
-        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str',
-                                      expected_type=lambda _: 1 / 0), None,
-                         msg='wrap expected_type function in try_call')
-        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=compat_str), ['str'],
-                         msg='eliminate items that expected_type fails on')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
+                         'str', msg='accept matching `expected_type` type')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
+                         None, msg='reject non matching `expected_type` type')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
+                         '0', msg='transform type using type function')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
+                         None, msg='wrap expected_type function in try_call')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=str),
+                         ['str'], msg='eliminate items that expected_type fails on')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int),
+                         {0: 100}, msg='type as expected_type should filter dict values')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
+                         {0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
+        self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int),
+                         1, msg='expected_type should not filter non final dict values')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
+                         {0: {0: 100}}, msg='expected_type should transform deep dict values')
+        self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)),
+                         [{0: Ellipsis}, {0: Ellipsis}], msg='expected_type should transform branched dict values')
+        self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int),
+                         [4], msg='expected_type regression for type matching in tuple branching')
+        self.assertEqual(traverse_obj(_TEST_DATA, ['data', Ellipsis], expected_type=int),
+                         [], msg='expected_type regression for type matching in dict result')
 
         # Test get_all behavior
         _GET_ALL_DATA = {'key': [0, 1, 2]}
@@ -1749,14 +1833,23 @@ Line 1
                                       _traverse_string=True), '.',
                          msg='traverse into converted data if `traverse_string`')
         self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
-                                      _traverse_string=True), list('str'),
-                         msg='`...` branching into string should result in list')
+                                      _traverse_string=True), 'str',
+                         msg='`...` should result in string (same value) if `traverse_string`')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
+                                      _traverse_string=True), 'sr',
+                         msg='`slice` should result in string if `traverse_string`')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"),
+                                      _traverse_string=True), 'str',
+                         msg='function should result in string if `traverse_string`')
         self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
                                       _traverse_string=True), ['s', 'r'],
-                         msg='branching into string should result in list')
-        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda _, x: x),
-                                      _traverse_string=True), list('str'),
-                         msg='function branching into string should result in list')
+                         msg='branching should result in list if `traverse_string`')
+        self.assertEqual(traverse_obj({}, (0, Ellipsis), _traverse_string=True), [],
+                         msg='branching should result in list if `traverse_string`')
+        self.assertEqual(traverse_obj({}, (0, lambda x, y: True), _traverse_string=True), [],
+                         msg='branching should result in list if `traverse_string`')
+        self.assertEqual(traverse_obj({}, (0, slice(1)), _traverse_string=True), [],
+                         msg='branching should result in list if `traverse_string`')
 
         # Test is_user_input behavior
         _IS_USER_INPUT_DATA = {'range8': list(range(8))}
@@ -1793,6 +1886,8 @@ Line 1
                          msg='failing str key on a `re.Match` should return `default`')
         self.assertEqual(traverse_obj(mobj, 8), None,
                          msg='failing int key on a `re.Match` should return `default`')
+        self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
+                         msg='function on a `re.Match` should give group name as well')
 
     def test_get_first(self):
         self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 83f67bd95..dbdbe5f59 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -16,6 +16,7 @@ import email.header
 import errno
 import functools
 import gzip
+import inspect
 import io
 import itertools
 import json
@@ -3881,7 +3882,7 @@ def detect_exe_version(output, version_re=None, unrecognized='present'):
         return unrecognized
 
 
-class LazyList(compat_collections_abc.Sequence):
+class LazyList(compat_collections_abc.Iterable):
     """Lazy immutable list from an iterable
     Note that slices of a LazyList are lists and not LazyList"""
 
@@ -4223,10 +4224,16 @@ def multipart_encode(data, boundary=None):
     return out, content_type
 
 
-def variadic(x, allowed_types=(compat_str, bytes, dict)):
-    if not isinstance(allowed_types, tuple) and isinstance(allowed_types, compat_collections_abc.Iterable):
+def is_iterable_like(x, allowed_types=compat_collections_abc.Iterable, blocked_types=NO_DEFAULT):
+    if blocked_types is NO_DEFAULT:
+        blocked_types = (compat_str, bytes, compat_collections_abc.Mapping)
+    return isinstance(x, allowed_types) and not isinstance(x, blocked_types)
+
+
+def variadic(x, allowed_types=NO_DEFAULT):
+    if isinstance(allowed_types, compat_collections_abc.Iterable):
         allowed_types = tuple(allowed_types)
-    return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,)
+    return x if is_iterable_like(x, blocked_types=allowed_types) else (x,)
 
 
 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
@@ -5993,7 +6000,7 @@ def clean_podcast_url(url):
 
 def traverse_obj(obj, *paths, **kwargs):
     """
-    Safely traverse nested `dict`s and `Sequence`s
+    Safely traverse nested `dict`s and `Iterable`s
 
     >>> obj = [{}, {"key": "value"}]
     >>> traverse_obj(obj, (1, "key"))
@@ -6001,14 +6008,17 @@ def traverse_obj(obj, *paths, **kwargs):
 
     Each of the provided `paths` is tested and the first producing a valid result will be returned.
     The next path will also be tested if the path branched but no results could be found.
-    Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
-    A value of None is treated as the absence of a value.
+    Supported values for traversal are `Mapping`, `Iterable` and `re.Match`.
+    Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
 
     The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
 
     The keys in the path can be one of:
         - `None`:           Return the current object.
-        - `str`/`int`:      Return `obj[key]`. For `re.Match, return `obj.group(key)`.
+        - `set`:            Requires the only item in the set to be a type or function,
+                            like `{type}`/`{func}`. If a `type`, returns only values
+                            of this type. If a function, returns `func(obj)`.
+        - `str`/`int`:      Return `obj[key]`. For `re.Match`, return `obj.group(key)`.
         - `slice`:          Branch out and return all values in `obj[key]`.
         - `Ellipsis`:       Branch out and return a list of all values.
         - `tuple`/`list`:   Branch out and return a list of all matching values.
@@ -6016,6 +6026,9 @@ def traverse_obj(obj, *paths, **kwargs):
         - `function`:       Branch out and return values filtered by the function.
                             Read as: `[value for key, value in obj if function(key, value)]`.
                             For `Sequence`s, `key` is the index of the value.
+                            For `Iterable`s, `key` is the enumeration count of the value.
+                            For `re.Match`es, `key` is the group number (0 = full match)
+                            as well as additionally any group names, if given.
         - `dict`            Transform the current object and return a matching dict.
                             Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
 
@@ -6024,8 +6037,12 @@ def traverse_obj(obj, *paths, **kwargs):
     @params paths           Paths which to traverse by.
     Keyword arguments:
     @param default          Value to return if the paths do not match.
+                            If the last key in the path is a `dict`, it will apply to each value inside
+                            the dict instead, depth first. Try to avoid if using nested `dict` keys.
     @param expected_type    If a `type`, only accept final values of this type.
                             If any other callable, try to call the function on each result.
+                            If the last key in the path is a `dict`, it will apply to each value inside
+                            the dict instead, recursively. This does respect branching paths.
     @param get_all          If `False`, return the first matching result, otherwise all matching ones.
     @param casesense        If `False`, consider string dictionary keys as case insensitive.
 
@@ -6036,12 +6053,15 @@ def traverse_obj(obj, *paths, **kwargs):
     @param _traverse_string  Whether to traverse into objects as strings.
                             If `True`, any non-compatible object will first be
                             converted into a string and then traversed into.
+                            The return value of that path will be a string instead,
+                            not respecting any further branching.
 
 
     @returns                The result of the object traversal.
                             If successful, `get_all=True`, and the path branches at least once,
                             then a list of results is returned instead.
                             A list is always returned if the last path branches and no `default` is given.
+                            If a path ends on a `dict` that result will always be a `dict`.
     """
 
     # parameter defaults
@@ -6055,7 +6075,6 @@ def traverse_obj(obj, *paths, **kwargs):
     # instant compat
     str = compat_str
 
-    is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes))
     casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k
 
     if isinstance(expected_type, type):
@@ -6063,128 +6082,180 @@ def traverse_obj(obj, *paths, **kwargs):
     else:
         type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
 
+    def lookup_or_none(v, k, getter=None):
+        try:
+            return getter(v, k) if getter else v[k]
+        except IndexError:
+            return None
+
     def from_iterable(iterables):
         # chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
         for it in iterables:
             for item in it:
                 yield item
 
-    def apply_key(key, obj):
-        if obj is None:
-            return
+    def apply_key(key, obj, is_last):
+        branching = False
+
+        if obj is None and _traverse_string:
+            if key is Ellipsis or callable(key) or isinstance(key, slice):
+                branching = True
+                result = ()
+            else:
+                result = None
 
         elif key is None:
-            yield obj
+            result = obj
+
+        elif isinstance(key, set):
+            assert len(key) == 1, 'Set should only be used to wrap a single item'
+            item = next(iter(key))
+            if isinstance(item, type):
+                result = obj if isinstance(obj, item) else None
+            else:
+                result = try_call(item, args=(obj,))
 
         elif isinstance(key, (list, tuple)):
-            for branch in key:
-                _, result = apply_path(obj, branch)
-                for item in result:
-                    yield item
+            branching = True
+            result = from_iterable(
+                apply_path(obj, branch, is_last)[0] for branch in key)
 
         elif key is Ellipsis:
-            result = []
+            branching = True
             if isinstance(obj, compat_collections_abc.Mapping):
                 result = obj.values()
-            elif is_sequence(obj):
+            elif is_iterable_like(obj):
                 result = obj
             elif isinstance(obj, compat_re_Match):
                 result = obj.groups()
             elif _traverse_string:
+                branching = False
                 result = str(obj)
-            for item in result:
-                yield item
+            else:
+                result = ()
 
         elif callable(key):
-            if is_sequence(obj):
-                iter_obj = enumerate(obj)
-            elif isinstance(obj, compat_collections_abc.Mapping):
+            branching = True
+            if isinstance(obj, compat_collections_abc.Mapping):
                 iter_obj = obj.items()
+            elif is_iterable_like(obj):
+                iter_obj = enumerate(obj)
             elif isinstance(obj, compat_re_Match):
-                iter_obj = enumerate(itertools.chain([obj.group()], obj.groups()))
+                iter_obj = itertools.chain(
+                    enumerate(itertools.chain((obj.group(),), obj.groups())),
+                    obj.groupdict().items())
             elif _traverse_string:
+                branching = False
                 iter_obj = enumerate(str(obj))
             else:
-                return
-            for item in (v for k, v in iter_obj if try_call(key, args=(k, v))):
-                yield item
+                iter_obj = ()
+
+            result = (v for k, v in iter_obj if try_call(key, args=(k, v)))
+            if not branching:  # string traversal
+                result = ''.join(result)
 
         elif isinstance(key, dict):
-            iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
-            yield dict((k, v if v is not None else default) for k, v in iter_obj
-                       if v is not None or default is not NO_DEFAULT)
+            iter_obj = ((k, _traverse_obj(obj, v, False, is_last)) for k, v in key.items())
+            result = dict((k, v if v is not None else default) for k, v in iter_obj
+                          if v is not None or default is not NO_DEFAULT) or None
 
         elif isinstance(obj, compat_collections_abc.Mapping):
-            yield (obj.get(key) if casesense or (key in obj)
-                   else next((v for k, v in obj.items() if casefold(k) == key), None))
+            result = (try_call(obj.get, args=(key,))
+                      if casesense or try_call(obj.__contains__, args=(key,))
+                      else next((v for k, v in obj.items() if casefold(k) == key), None))
 
         elif isinstance(obj, compat_re_Match):
+            result = None
             if isinstance(key, int) or casesense:
-                try:
-                    yield obj.group(key)
-                    return
-                except IndexError:
-                    pass
-            if not isinstance(key, str):
-                return
+                result = lookup_or_none(obj, key, getter=compat_re_Match.group)
 
-            yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
+            elif isinstance(key, str):
+                result = next((v for k, v in obj.groupdict().items()
+                              if casefold(k) == key), None)
 
         else:
-            if _is_user_input:
-                key = (int_or_none(key) if ':' not in key
-                       else slice(*map(int_or_none, key.split(':'))))
+            result = None
+            if isinstance(key, (int, slice)):
+                if is_iterable_like(obj, compat_collections_abc.Sequence):
+                    branching = isinstance(key, slice)
+                    result = lookup_or_none(obj, key)
+                elif _traverse_string:
+                    result = lookup_or_none(str(obj), key)
 
-            if not isinstance(key, (int, slice)):
-                return
+        return branching, result if branching else (result,)
 
-            if not is_sequence(obj):
-                if not _traverse_string:
-                    return
-                obj = str(obj)
+    def lazy_last(iterable):
+        iterator = iter(iterable)
+        prev = next(iterator, NO_DEFAULT)
+        if prev is NO_DEFAULT:
+            return
 
-            try:
-                yield obj[key]
-            except IndexError:
-                pass
+        for item in iterator:
+            yield False, prev
+            prev = item
 
-    def apply_path(start_obj, path):
+        yield True, prev
+
+    def apply_path(start_obj, path, test_type):
         objs = (start_obj,)
         has_branched = False
 
-        for key in variadic(path):
-            if _is_user_input and key == ':':
-                key = Ellipsis
+        key = None
+        for last, key in lazy_last(variadic(path, (str, bytes, dict, set))):
+            if _is_user_input and isinstance(key, str):
+                if key == ':':
+                    key = Ellipsis
+                elif ':' in key:
+                    key = slice(*map(int_or_none, key.split(':')))
+                elif int_or_none(key) is not None:
+                    key = int(key)
 
             if not casesense and isinstance(key, str):
                 key = compat_casefold(key)
 
-            if key is Ellipsis or isinstance(key, (list, tuple)) or callable(key):
-                has_branched = True
+            if __debug__ and callable(key):
+                # Verify function signature
+                inspect.getcallargs(key, None, None)
 
-            key_func = functools.partial(apply_key, key)
-            objs = from_iterable(map(key_func, objs))
+            new_objs = []
+            for obj in objs:
+                branching, results = apply_key(key, obj, last)
+                has_branched |= branching
+                new_objs.append(results)
 
-        return has_branched, objs
+            objs = from_iterable(new_objs)
 
-    def _traverse_obj(obj, path, use_list=True):
-        has_branched, results = apply_path(obj, path)
-        results = LazyList(x for x in map(type_test, results) if x is not None)
+        if test_type and not isinstance(key, (dict, list, tuple)):
+            objs = map(type_test, objs)
+
+        return objs, has_branched, isinstance(key, dict)
+
+    def _traverse_obj(obj, path, allow_empty, test_type):
+        results, has_branched, is_dict = apply_path(obj, path, test_type)
+        results = LazyList(x for x in results if x not in (None, {}))
 
         if get_all and has_branched:
-            return results.exhaust() if results or use_list else None
+            if results:
+                return results.exhaust()
+            if allow_empty:
+                return [] if default is NO_DEFAULT else default
+            return None
 
-        return results[0] if results else None
+        return results[0] if results else {} if allow_empty and is_dict else None
 
     for index, path in enumerate(paths, 1):
-        use_list = default is NO_DEFAULT and index == len(paths)
-        result = _traverse_obj(obj, path, use_list)
+        result = _traverse_obj(obj, path, index == len(paths), True)
         if result is not None:
             return result
 
     return None if default is NO_DEFAULT else default
 
 
+def T(x):
+    """ For use in yt-dl instead of {type} or set((type,)) """
+    return set((x,))
+
+
 def get_first(obj, keys, **kwargs):
     return traverse_obj(obj, (Ellipsis,) + tuple(variadic(keys)), get_all=False, **kwargs)
 

From d5ef405c5d533c85cebd205a5b7958614c7013f3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 7 Jul 2023 18:45:31 +0100
Subject: [PATCH 090/156] [core] Align error reporting methods with yt-dlp

---
 test/helper.py          |  3 ++-
 test/test_YoutubeDL.py  | 10 ++--------
 youtube_dl/YoutubeDL.py | 39 ++++++++++++++++++++++++++++++++-------
 3 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/test/helper.py b/test/helper.py
index 883b2e877..e3314b03e 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -72,7 +72,8 @@ class FakeYDL(YoutubeDL):
     def to_screen(self, s, skip_eol=None):
         print(s)
 
-    def trouble(self, s, tb=None):
+    def trouble(self, *args, **kwargs):
+        s = args[0] if len(args) > 0 else kwargs.get('message', 'Missing message')
         raise Exception(s)
 
     def download(self, x):
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index f8c8e619c..60780b8a7 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -930,17 +930,11 @@ class TestYoutubeDL(unittest.TestCase):
     # Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
     def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
 
-        class _YDL(YDL):
-            def __init__(self, *args, **kwargs):
-                super(_YDL, self).__init__(*args, **kwargs)
-
-            def trouble(self, s, tb=None):
-                pass
-
-        ydl = _YDL({
+        ydl = YDL({
             'format': 'extra',
             'ignoreerrors': True,
         })
+        ydl.trouble = lambda *_, **__: None
 
         class VideoIE(InfoExtractor):
             _VALID_URL = r'video:(?P<id>\d+)'
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 4e7fd1063..1435754c2 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -582,7 +582,7 @@ class YoutubeDL(object):
         if self.params.get('cookiefile') is not None:
             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 
-    def trouble(self, message=None, tb=None):
+    def trouble(self, *args, **kwargs):
         """Determine action to take when a download problem appears.
 
         Depending on if the downloader has been configured to ignore
@@ -591,6 +591,11 @@ class YoutubeDL(object):
 
         tb, if given, is additional traceback information.
         """
+        # message=None, tb=None, is_error=True
+        message = args[0] if len(args) > 0 else kwargs.get('message', None)
+        tb = args[1] if len(args) > 1 else kwargs.get('tb', None)
+        is_error = args[2] if len(args) > 2 else kwargs.get('is_error', True)
+
         if message is not None:
             self.to_stderr(message)
         if self.params.get('verbose'):
@@ -603,7 +608,10 @@ class YoutubeDL(object):
                 else:
                     tb_data = traceback.format_list(traceback.extract_stack())
                     tb = ''.join(tb_data)
-            self.to_stderr(tb)
+            if tb:
+                self.to_stderr(tb)
+        if not is_error:
+            return
         if not self.params.get('ignoreerrors', False):
             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
                 exc_info = sys.exc_info()[1].exc_info
@@ -612,11 +620,18 @@ class YoutubeDL(object):
             raise DownloadError(message, exc_info)
         self._download_retcode = 1
 
-    def report_warning(self, message):
+    def report_warning(self, message, only_once=False, _cache={}):
         '''
         Print the message to stderr, it will be prefixed with 'WARNING:'
         If stderr is a tty file the 'WARNING:' will be colored
         '''
+        if only_once:
+            m_hash = hash((self, message))
+            m_cnt = _cache.setdefault(m_hash, 0)
+            _cache[m_hash] = m_cnt + 1
+            if m_cnt > 0:
+                return
+
         if self.params.get('logger') is not None:
             self.params['logger'].warning(message)
         else:
@@ -629,7 +644,7 @@ class YoutubeDL(object):
             warning_message = '%s %s' % (_msg_header, message)
             self.to_stderr(warning_message)
 
-    def report_error(self, message, tb=None):
+    def report_error(self, message, *args, **kwargs):
         '''
         Do the same as trouble, but prefixes the message with 'ERROR:', colored
         in red if stderr is a tty file.
@@ -638,8 +653,18 @@ class YoutubeDL(object):
             _msg_header = '\033[0;31mERROR:\033[0m'
         else:
             _msg_header = 'ERROR:'
-        error_message = '%s %s' % (_msg_header, message)
-        self.trouble(error_message, tb)
+        kwargs['message'] = '%s %s' % (_msg_header, message)
+        self.trouble(*args, **kwargs)
+
+    def report_unscoped_cookies(self, *args, **kwargs):
+        # message=None, tb=False, is_error=False
+        if len(args) <= 2:
+            kwargs.setdefault('is_error', False)
+            if len(args) <= 0:
+                kwargs.setdefault(
+                    'message',
+                    'Unscoped cookies are not allowed: please specify some sort of scoping')
+        self.report_error(*args, **kwargs)
 
     def report_file_already_downloaded(self, file_name):
         """Report file has already been fully downloaded."""
@@ -835,7 +860,7 @@ class YoutubeDL(object):
                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
                 self.report_error(msg)
             except ExtractorError as e:  # An error we somewhat expected
-                self.report_error(compat_str(e), e.format_traceback())
+                self.report_error(compat_str(e), tb=e.format_traceback())
             except MaxDownloadsReached:
                 raise
             except Exception as e:

From 1720c04dc56fa0d2caa0a455b1acbd569347482e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 17 Jul 2023 20:47:58 +0100
Subject: [PATCH 091/156] [test] Make skipped tests in test_execution work with
 Py 2.6

---
 test/test_execution.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/test/test_execution.py b/test/test_execution.py
index 35e7a5651..ae59e562a 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -24,21 +24,24 @@ except AttributeError:
 
 
 class TestExecution(unittest.TestCase):
+    def setUp(self):
+        self.module = 'youtube_dl'
+        if sys.version_info < (2, 7):
+            self.module += '.__main__'
+
     def test_import(self):
         subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir)
 
-    @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution')
     def test_module_exec(self):
-        subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+        subprocess.check_call([sys.executable, '-m', self.module, '--version'], cwd=rootDir, stdout=_DEV_NULL)
 
     def test_main_exec(self):
         subprocess.check_call([sys.executable, os.path.normpath('youtube_dl/__main__.py'), '--version'], cwd=rootDir, stdout=_DEV_NULL)
 
-    @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution')
     def test_cmdline_umlauts(self):
         os.environ['PYTHONIOENCODING'] = 'utf-8'
         p = subprocess.Popen(
-            [sys.executable, os.path.normpath('youtube_dl/__main__.py'), encodeArgument('ä'), '--version'],
+            [sys.executable, '-m', self.module, encodeArgument('ä'), '--version'],
             cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
         _, stderr = p.communicate()
         self.assertFalse(stderr)

From 648dc5304cb2476592ff142988b8c62675011fcc Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 7 Jul 2023 18:51:38 +0100
Subject: [PATCH 092/156] [compat] Add Request and HTTPClient compat for
 redirect

* support `method` parameter of `Request.__init__`  (Py 2 and old Py 3)
* support `getcode` method of compat_http_client.HTTPResponse (Py 2)
---
 youtube_dl/compat.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 2554fd1c3..cd11ba5aa 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -21,6 +21,7 @@ import socket
 import struct
 import subprocess
 import sys
+import types
 import xml.etree.ElementTree
 
 # naming convention
@@ -55,6 +56,22 @@ try:
 except ImportError:  # Python 2
     import urllib2 as compat_urllib_request
 
+# Also fix up lack of method arg in old Pythons
+try:
+    _req = compat_urllib_request.Request
+    _req('http://127.0.0.1', method='GET')
+except TypeError:
+    class _request(object):
+        def __new__(cls, url, *args, **kwargs):
+            method = kwargs.pop('method', None)
+            r = _req(url, *args, **kwargs)
+            if method:
+                r.get_method = types.MethodType(lambda _: method, r)
+            return r
+
+    compat_urllib_request.Request = _request
+
+
 try:
     import urllib.error as compat_urllib_error
 except ImportError:  # Python 2
@@ -79,6 +96,12 @@ try:
 except ImportError:  # Python 2
     import urllib as compat_urllib_response
 
+try:
+    compat_urllib_response.addinfourl.status
+except AttributeError:
+    # .getcode() is deprecated in Py 3.
+    compat_urllib_response.addinfourl.status = property(lambda self: self.getcode())
+
 try:
     import http.cookiejar as compat_cookiejar
 except ImportError:  # Python 2
@@ -2360,6 +2383,11 @@ try:
     import http.client as compat_http_client
 except ImportError:  # Python 2
     import httplib as compat_http_client
+try:
+    compat_http_client.HTTPResponse.getcode
+except AttributeError:
+    # Py < 3.1
+    compat_http_client.HTTPResponse.getcode = lambda self: self.status
 
 try:
     from urllib.error import HTTPError as compat_HTTPError

From 46fde7caeeab13a6277aab22a0e8a29e10c30cc3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 7 Jun 2023 14:51:50 +0100
Subject: [PATCH 093/156] [core] Update redirect handling from yt-dlp

* Thx coletdjnz: https://github.com/yt-dlp/yt-dlp/pull/7094
* add test that redirected `POST` loses its `Content-Type`
---
 test/test_http.py   | 489 +++++++++++++++++++++++++++++++++++++++-----
 youtube_dl/utils.py |  74 ++++---
 2 files changed, 484 insertions(+), 79 deletions(-)

diff --git a/test/test_http.py b/test/test_http.py
index 487a9bc77..1a65df9e0 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -8,33 +8,160 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+import gzip
+import io
+import ssl
+import tempfile
+import threading
+import zlib
+
+# avoid deprecated alias assertRaisesRegexp
+if hasattr(unittest.TestCase, 'assertRaisesRegex'):
+    unittest.TestCase.assertRaisesRegexp = unittest.TestCase.assertRaisesRegex
+
+try:
+    import brotli
+except ImportError:
+    brotli = None
+try:
+    from urllib.request import pathname2url
+except ImportError:
+    from urllib import pathname2url
+
+from youtube_dl.compat import (
+    compat_http_cookiejar_Cookie,
+    compat_http_server,
+    compat_str as str,
+    compat_urllib_error,
+    compat_urllib_HTTPError,
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+
+from youtube_dl.utils import (
+    sanitized_Request,
+    urlencode_postdata,
+)
+
 from test.helper import (
+    FakeYDL,
     FakeLogger,
     http_server_port,
 )
 from youtube_dl import YoutubeDL
-from youtube_dl.compat import compat_http_server, compat_urllib_request
-import ssl
-import threading
 
 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 
 
 class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+    protocol_version = 'HTTP/1.1'
+
+    # work-around old/new -style class inheritance
+    def super(self, meth_name, *args, **kwargs):
+        from types import MethodType
+        try:
+            super()
+            fn = lambda s, m, *a, **k: getattr(super(), m)(*a, **k)
+        except TypeError:
+            fn = lambda s, m, *a, **k: getattr(compat_http_server.BaseHTTPRequestHandler, m)(s, *a, **k)
+        self.super = MethodType(fn, self)
+        return self.super(meth_name, *args, **kwargs)
+
     def log_message(self, format, *args):
         pass
 
+    def _headers(self):
+        payload = str(self.headers).encode('utf-8')
+        self.send_response(200)
+        self.send_header('Content-Type', 'application/json')
+        self.send_header('Content-Length', str(len(payload)))
+        self.end_headers()
+        self.wfile.write(payload)
+
+    def _redirect(self):
+        self.send_response(int(self.path[len('/redirect_'):]))
+        self.send_header('Location', '/method')
+        self.send_header('Content-Length', '0')
+        self.end_headers()
+
+    def _method(self, method, payload=None):
+        self.send_response(200)
+        self.send_header('Content-Length', str(len(payload or '')))
+        self.send_header('Method', method)
+        self.end_headers()
+        if payload:
+            self.wfile.write(payload)
+
+    def _status(self, status):
+        payload = '<html>{0} NOT FOUND</html>'.format(status).encode('utf-8')
+        self.send_response(int(status))
+        self.send_header('Content-Type', 'text/html; charset=utf-8')
+        self.send_header('Content-Length', str(len(payload)))
+        self.end_headers()
+        self.wfile.write(payload)
+
+    def _read_data(self):
+        if 'Content-Length' in self.headers:
+            return self.rfile.read(int(self.headers['Content-Length']))
+
+    def _test_url(self, path, host='127.0.0.1', scheme='http', port=None):
+        return '{0}://{1}:{2}/{3}'.format(
+            scheme, host,
+            port if port is not None
+            else http_server_port(self.server), path)
+
+    def do_POST(self):
+        data = self._read_data()
+        if self.path.startswith('/redirect_'):
+            self._redirect()
+        elif self.path.startswith('/method'):
+            self._method('POST', data)
+        elif self.path.startswith('/headers'):
+            self._headers()
+        else:
+            self._status(404)
+
+    def do_HEAD(self):
+        if self.path.startswith('/redirect_'):
+            self._redirect()
+        elif self.path.startswith('/method'):
+            self._method('HEAD')
+        else:
+            self._status(404)
+
+    def do_PUT(self):
+        data = self._read_data()
+        if self.path.startswith('/redirect_'):
+            self._redirect()
+        elif self.path.startswith('/method'):
+            self._method('PUT', data)
+        else:
+            self._status(404)
+
     def do_GET(self):
+
+        def respond(payload=b'<html><video src="/vid.mp4" /></html>',
+                    payload_type='text/html; charset=utf-8',
+                    payload_encoding=None,
+                    resp_code=200):
+            self.send_response(resp_code)
+            self.send_header('Content-Type', payload_type)
+            if payload_encoding:
+                self.send_header('Content-Encoding', payload_encoding)
+            self.send_header('Content-Length', str(len(payload)))  # required for persistent connections
+            self.end_headers()
+            self.wfile.write(payload)
+
+        def gzip_compress(p):
+            buf = io.BytesIO()
+            with gzip.GzipFile(fileobj=buf, mode='wb') as f:
+                f.write(p)
+            return buf.getvalue()
+
         if self.path == '/video.html':
-            self.send_response(200)
-            self.send_header('Content-Type', 'text/html; charset=utf-8')
-            self.end_headers()
-            self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
+            respond()
         elif self.path == '/vid.mp4':
-            self.send_response(200)
-            self.send_header('Content-Type', 'video/mp4')
-            self.end_headers()
-            self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
+            respond(b'\x00\x00\x00\x00\x20\x66\x74[video]', 'video/mp4')
         elif self.path == '/302':
             if sys.version_info[0] == 3:
                 # XXX: Python 3 http server does not allow non-ASCII header values
@@ -42,60 +169,284 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
                 self.end_headers()
                 return
 
-            new_url = 'http://127.0.0.1:%d/中文.html' % http_server_port(self.server)
+            new_url = self._test_url('中文.html')
             self.send_response(302)
             self.send_header(b'Location', new_url.encode('utf-8'))
             self.end_headers()
         elif self.path == '/%E4%B8%AD%E6%96%87.html':
-            self.send_response(200)
-            self.send_header('Content-Type', 'text/html; charset=utf-8')
+            respond()
+        elif self.path == '/%c7%9f':
+            respond()
+        elif self.path.startswith('/redirect_'):
+            self._redirect()
+        elif self.path.startswith('/method'):
+            self._method('GET')
+        elif self.path.startswith('/headers'):
+            self._headers()
+        elif self.path == '/trailing_garbage':
+            payload = b'<html><video src="/vid.mp4" /></html>'
+            compressed = gzip_compress(payload) + b'trailing garbage'
+            respond(compressed, payload_encoding='gzip')
+        elif self.path == '/302-non-ascii-redirect':
+            new_url = self._test_url('中文.html')
+            # actually respond with permanent redirect
+            self.send_response(301)
+            self.send_header('Location', new_url)
+            self.send_header('Content-Length', '0')
             self.end_headers()
-            self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
+        elif self.path == '/content-encoding':
+            encodings = self.headers.get('ytdl-encoding', '')
+            payload = b'<html><video src="/vid.mp4" /></html>'
+            for encoding in filter(None, (e.strip() for e in encodings.split(','))):
+                if encoding == 'br' and brotli:
+                    payload = brotli.compress(payload)
+                elif encoding == 'gzip':
+                    payload = gzip_compress(payload)
+                elif encoding == 'deflate':
+                    payload = zlib.compress(payload)
+                elif encoding == 'unsupported':
+                    payload = b'raw'
+                    break
+                else:
+                    self._status(415)
+                    return
+            respond(payload, payload_encoding=encodings)
+
         else:
-            assert False
+            self._status(404)
+
+    def send_header(self, keyword, value):
+        """
+        Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
+        This is against what is defined in RFC 3986: but we need to test that we support this
+        since some sites incorrectly do this.
+        """
+        if keyword.lower() == 'connection':
+            return self.super('send_header', keyword, value)
+
+        if not hasattr(self, '_headers_buffer'):
+            self._headers_buffer = []
+
+        self._headers_buffer.append('{0}: {1}\r\n'.format(keyword, value).encode('utf-8'))
+
+    def end_headers(self):
+        if hasattr(self, '_headers_buffer'):
+            self.wfile.write(b''.join(self._headers_buffer))
+            self._headers_buffer = []
+        self.super('end_headers')
 
 
 class TestHTTP(unittest.TestCase):
     def setUp(self):
-        self.httpd = compat_http_server.HTTPServer(
+        # HTTP server
+        self.http_httpd = compat_http_server.HTTPServer(
             ('127.0.0.1', 0), HTTPTestRequestHandler)
-        self.port = http_server_port(self.httpd)
-        self.server_thread = threading.Thread(target=self.httpd.serve_forever)
-        self.server_thread.daemon = True
-        self.server_thread.start()
+        self.http_port = http_server_port(self.http_httpd)
 
-    def test_unicode_path_redirection(self):
-        # XXX: Python 3 http server does not allow non-ASCII header values
-        if sys.version_info[0] == 3:
-            return
+        self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever)
+        self.http_server_thread.daemon = True
+        self.http_server_thread.start()
 
-        ydl = YoutubeDL({'logger': FakeLogger()})
-        r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port)
-        self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port)
+        try:
+            from http.server import ThreadingHTTPServer
+        except ImportError:
+            try:
+                from socketserver import ThreadingMixIn
+            except ImportError:
+                from SocketServer import ThreadingMixIn
 
+            class ThreadingHTTPServer(ThreadingMixIn, compat_http_server.HTTPServer):
+                pass
 
-class TestHTTPS(unittest.TestCase):
-    def setUp(self):
+        # HTTPS server
         certfn = os.path.join(TEST_DIR, 'testcert.pem')
-        self.httpd = compat_http_server.HTTPServer(
+        self.https_httpd = ThreadingHTTPServer(
             ('127.0.0.1', 0), HTTPTestRequestHandler)
-        self.httpd.socket = ssl.wrap_socket(
-            self.httpd.socket, certfile=certfn, server_side=True)
-        self.port = http_server_port(self.httpd)
-        self.server_thread = threading.Thread(target=self.httpd.serve_forever)
-        self.server_thread.daemon = True
-        self.server_thread.start()
+        try:
+            sslctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+            sslctx.verify_mode = ssl.CERT_NONE
+            sslctx.check_hostname = False
+            sslctx.load_cert_chain(certfn, None)
+            self.https_httpd.socket = sslctx.wrap_socket(
+                self.https_httpd.socket, server_side=True)
+        except AttributeError:
+            self.https_httpd.socket = ssl.wrap_socket(
+                self.https_httpd.socket, certfile=certfn, server_side=True)
+
+        self.https_port = http_server_port(self.https_httpd)
+        self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever)
+        self.https_server_thread.daemon = True
+        self.https_server_thread.start()
+
+    def tearDown(self):
+
+        def closer(svr):
+            def _closer():
+                svr.shutdown()
+                svr.server_close()
+            return _closer
+
+        shutdown_thread = threading.Thread(target=closer(self.http_httpd))
+        shutdown_thread.start()
+        self.http_server_thread.join(2.0)
+
+        shutdown_thread = threading.Thread(target=closer(self.https_httpd))
+        shutdown_thread.start()
+        self.https_server_thread.join(2.0)
+
+    def _test_url(self, path, host='127.0.0.1', scheme='http', port=None):
+        return '{0}://{1}:{2}/{3}'.format(
+            scheme, host,
+            port if port is not None
+            else self.https_port if scheme == 'https'
+            else self.http_port, path)
 
     def test_nocheckcertificate(self):
-        if sys.version_info >= (2, 7, 9):  # No certificate checking anyways
-            ydl = YoutubeDL({'logger': FakeLogger()})
-            self.assertRaises(
-                Exception,
-                ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port)
+        with FakeYDL({'logger': FakeLogger()}) as ydl:
+            with self.assertRaises(compat_urllib_error.URLError):
+                ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https')))
 
-        ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
-        r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
-        self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
+        with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl:
+            r = ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https')))
+            self.assertEqual(r.getcode(), 200)
+            r.close()
+
+    def test_percent_encode(self):
+        with FakeYDL() as ydl:
+            # Unicode characters should be encoded with uppercase percent-encoding
+            res = ydl.urlopen(sanitized_Request(self._test_url('中文.html')))
+            self.assertEqual(res.getcode(), 200)
+            res.close()
+            # don't normalize existing percent encodings
+            res = ydl.urlopen(sanitized_Request(self._test_url('%c7%9f')))
+            self.assertEqual(res.getcode(), 200)
+            res.close()
+
+    def test_unicode_path_redirection(self):
+        with FakeYDL() as ydl:
+            r = ydl.urlopen(sanitized_Request(self._test_url('302-non-ascii-redirect')))
+            self.assertEqual(r.url, self._test_url('%E4%B8%AD%E6%96%87.html'))
+            r.close()
+
+    def test_redirect(self):
+        with FakeYDL() as ydl:
+            def do_req(redirect_status, method, check_no_content=False):
+                data = b'testdata' if method in ('POST', 'PUT') else None
+                res = ydl.urlopen(sanitized_Request(
+                    self._test_url('redirect_{0}'.format(redirect_status)),
+                    method=method, data=data))
+                if check_no_content:
+                    self.assertNotIn('Content-Type', res.headers)
+                return res.read().decode('utf-8'), res.headers.get('method', '')
+            # A 303 must either use GET or HEAD for subsequent request
+            self.assertEqual(do_req(303, 'POST'), ('', 'GET'))
+            self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD'))
+
+            self.assertEqual(do_req(303, 'PUT'), ('', 'GET'))
+
+            # 301 and 302 turn POST only into a GET, with no Content-Type
+            self.assertEqual(do_req(301, 'POST', True), ('', 'GET'))
+            self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD'))
+            self.assertEqual(do_req(302, 'POST', True), ('', 'GET'))
+            self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD'))
+
+            self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT'))
+            self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT'))
+
+            # 307 and 308 should not change method
+            for m in ('POST', 'PUT'):
+                self.assertEqual(do_req(307, m), ('testdata', m))
+                self.assertEqual(do_req(308, m), ('testdata', m))
+
+            self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD'))
+            self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD'))
+
+            # These should not redirect and instead raise an HTTPError
+            for code in (300, 304, 305, 306):
+                with self.assertRaises(compat_urllib_HTTPError):
+                    do_req(code, 'GET')
+
+    def test_content_type(self):
+        # https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28
+        with FakeYDL({'nocheckcertificate': True}) as ydl:
+            # method should be auto-detected as POST
+            r = sanitized_Request(self._test_url('headers', scheme='https'), data=urlencode_postdata({'test': 'test'}))
+
+            headers = ydl.urlopen(r).read().decode('utf-8')
+            self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
+
+            # test http
+            r = sanitized_Request(self._test_url('headers'), data=urlencode_postdata({'test': 'test'}))
+            headers = ydl.urlopen(r).read().decode('utf-8')
+            self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
+
+    def test_cookiejar(self):
+        with FakeYDL() as ydl:
+            ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
+                0, 'test', 'ytdl', None, False, '127.0.0.1', True,
+                False, '/headers', True, False, None, False, None, None, {}))
+            data = ydl.urlopen(sanitized_Request(self._test_url('headers'))).read()
+            self.assertIn(b'Cookie: test=ytdl', data)
+
+    def test_no_compression_compat_header(self):
+        with FakeYDL() as ydl:
+            data = ydl.urlopen(
+                sanitized_Request(
+                    self._test_url('headers'),
+                    headers={'Youtubedl-no-compression': True})).read()
+            self.assertIn(b'Accept-Encoding: identity', data)
+            self.assertNotIn(b'youtubedl-no-compression', data.lower())
+
+    def test_gzip_trailing_garbage(self):
+        # https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5
+        # https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f
+        with FakeYDL() as ydl:
+            data = ydl.urlopen(sanitized_Request(self._test_url('trailing_garbage'))).read().decode('utf-8')
+            self.assertEqual(data, '<html><video src="/vid.mp4" /></html>')
+
+    def __test_compression(self, encoding):
+        with FakeYDL() as ydl:
+            res = ydl.urlopen(
+                sanitized_Request(
+                    self._test_url('content-encoding'),
+                    headers={'ytdl-encoding': encoding}))
+            self.assertEqual(res.headers.get('Content-Encoding'), encoding)
+            self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
+
+    @unittest.skipUnless(brotli, 'brotli support is not installed')
+    @unittest.expectedFailure
+    def test_brotli(self):
+        self.__test_compression('br')
+
+    @unittest.expectedFailure
+    def test_deflate(self):
+        self.__test_compression('deflate')
+
+    @unittest.expectedFailure
+    def test_gzip(self):
+        self.__test_compression('gzip')
+
+    @unittest.expectedFailure  # not yet implemented
+    def test_multiple_encodings(self):
+        # https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4
+        with FakeYDL() as ydl:
+            for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
+                res = ydl.urlopen(
+                    sanitized_Request(
+                        self._test_url('content-encoding'),
+                        headers={'ytdl-encoding': pair}))
+                self.assertEqual(res.headers.get('Content-Encoding'), pair)
+                self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
+
+    def test_unsupported_encoding(self):
+        # it should return the raw content
+        with FakeYDL() as ydl:
+            res = ydl.urlopen(
+                sanitized_Request(
+                    self._test_url('content-encoding'),
+                    headers={'ytdl-encoding': 'unsupported'}))
+            self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')
+            self.assertEqual(res.read(), b'raw')
 
 
 def _build_proxy_handler(name):
@@ -109,7 +460,7 @@ def _build_proxy_handler(name):
             self.send_response(200)
             self.send_header('Content-Type', 'text/plain; charset=utf-8')
             self.end_headers()
-            self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
+            self.wfile.write('{0}: {1}'.format(self.proxy_name, self.path).encode('utf-8'))
     return HTTPTestRequestHandler
 
 
@@ -129,10 +480,30 @@ class TestProxy(unittest.TestCase):
         self.geo_proxy_thread.daemon = True
         self.geo_proxy_thread.start()
 
+    def tearDown(self):
+
+        def closer(svr):
+            def _closer():
+                svr.shutdown()
+                svr.server_close()
+            return _closer
+
+        shutdown_thread = threading.Thread(target=closer(self.proxy))
+        shutdown_thread.start()
+        self.proxy_thread.join(2.0)
+
+        shutdown_thread = threading.Thread(target=closer(self.geo_proxy))
+        shutdown_thread.start()
+        self.geo_proxy_thread.join(2.0)
+
+    def _test_proxy(self, host='127.0.0.1', port=None):
+        return '{0}:{1}'.format(
+            host, port if port is not None else self.port)
+
     def test_proxy(self):
-        geo_proxy = '127.0.0.1:{0}'.format(self.geo_port)
+        geo_proxy = self._test_proxy(port=self.geo_port)
         ydl = YoutubeDL({
-            'proxy': '127.0.0.1:{0}'.format(self.port),
+            'proxy': self._test_proxy(),
             'geo_verification_proxy': geo_proxy,
         })
         url = 'http://foo.com/bar'
@@ -146,7 +517,7 @@ class TestProxy(unittest.TestCase):
 
     def test_proxy_with_idn(self):
         ydl = YoutubeDL({
-            'proxy': '127.0.0.1:{0}'.format(self.port),
+            'proxy': self._test_proxy(),
         })
         url = 'http://中文.tw/'
         response = ydl.urlopen(url).read().decode('utf-8')
@@ -154,5 +525,25 @@ class TestProxy(unittest.TestCase):
         self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
 
 
+class TestFileURL(unittest.TestCase):
+    # See https://github.com/ytdl-org/youtube-dl/issues/8227
+    def test_file_urls(self):
+        tf = tempfile.NamedTemporaryFile(delete=False)
+        tf.write(b'foobar')
+        tf.close()
+        url = compat_urllib_parse.urljoin('file://', pathname2url(tf.name))
+        with FakeYDL() as ydl:
+            self.assertRaisesRegexp(
+                compat_urllib_error.URLError, 'file:// scheme is explicitly disabled in youtube-dl for security reasons', ydl.urlopen, url)
+        # not yet implemented
+        """
+        with FakeYDL({'enable_file_urls': True}) as ydl:
+            res = ydl.urlopen(url)
+            self.assertEqual(res.read(), b'foobar')
+            res.close()
+        """
+        os.unlink(tf.name)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index dbdbe5f59..58c710b08 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -41,7 +41,6 @@ import zlib
 from .compat import (
     compat_HTMLParseError,
     compat_HTMLParser,
-    compat_HTTPError,
     compat_basestring,
     compat_casefold,
     compat_chr,
@@ -64,6 +63,7 @@ from .compat import (
     compat_struct_pack,
     compat_struct_unpack,
     compat_urllib_error,
+    compat_urllib_HTTPError,
     compat_urllib_parse,
     compat_urllib_parse_parse_qs as compat_parse_qs,
     compat_urllib_parse_urlencode,
@@ -2614,7 +2614,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 
     Part of this code was copied from:
 
-    http://techknack.net/python-urllib2-handlers/
+    http://techknack.net/python-urllib2-handlers/, archived at
+    https://web.archive.org/web/20130527205558/http://techknack.net/python-urllib2-handlers/
 
     Andrew Rowls, the author of that code, agreed to release it to the
     public domain.
@@ -2672,7 +2673,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
             req._Request__original = req._Request__original.partition('#')[0]
             req._Request__r_type = req._Request__r_type.partition('#')[0]
 
-        return req
+        # Use the totally undocumented AbstractHTTPHandler per
+        # https://github.com/yt-dlp/yt-dlp/pull/4158
+        return compat_urllib_request.AbstractHTTPHandler.do_request_(self, req)
 
     def http_response(self, req, resp):
         old_resp = resp
@@ -2683,7 +2686,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
             try:
                 uncompressed = io.BytesIO(gz.read())
             except IOError as original_ioerror:
-                # There may be junk add the end of the file
+                # There may be junk at the end of the file
                 # See http://stackoverflow.com/q/4928560/35070 for details
                 for i in range(1, 1024):
                     try:
@@ -2710,9 +2713,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
             if location:
                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
                 if sys.version_info >= (3, 0):
-                    location = location.encode('iso-8859-1').decode('utf-8')
-                else:
-                    location = location.decode('utf-8')
+                    location = location.encode('iso-8859-1')
+                location = location.decode('utf-8')
                 location_escaped = escape_url(location)
                 if location != location_escaped:
                     del resp.headers['Location']
@@ -2940,17 +2942,16 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
 
     The code is based on HTTPRedirectHandler implementation from CPython [1].
 
-    This redirect handler solves two issues:
-     - ensures redirect URL is always unicode under python 2
-     - introduces support for experimental HTTP response status code
-       308 Permanent Redirect [2] used by some sites [3]
+    This redirect handler fixes and improves the logic to better align with RFC7261
+    and what browsers tend to do [2][3]
 
     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
-    2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
-    3. https://github.com/ytdl-org/youtube-dl/issues/28768
+    2. https://datatracker.ietf.org/doc/html/rfc7231
+    3. https://github.com/python/cpython/issues/91306
     """
 
-    http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
+    # Supply possibly missing alias
+    http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
 
     def redirect_request(self, req, fp, code, msg, headers, newurl):
         """Return a Request or None in response to a redirect.
@@ -2962,19 +2963,16 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
         else should try to handle this url.  Return None if you can't
         but another Handler might.
         """
-        m = req.get_method()
-        if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
-                 or code in (301, 302, 303) and m == "POST")):
-            raise compat_HTTPError(req.full_url, code, msg, headers, fp)
-        # Strictly (according to RFC 2616), 301 or 302 in response to
-        # a POST MUST NOT cause a redirection without confirmation
-        # from the user (of urllib.request, in this case).  In practice,
-        # essentially all clients do redirect in this case, so we do
-        # the same.
+        if code not in (301, 302, 303, 307, 308):
+            raise compat_urllib_HTTPError(req.full_url, code, msg, headers, fp)
+
+        new_method = req.get_method()
+        new_data = req.data
+        remove_headers = []
 
         # On python 2 urlh.geturl() may sometimes return redirect URL
-        # as byte string instead of unicode. This workaround allows
-        # to force it always return unicode.
+        # as a byte string instead of unicode. This workaround forces
+        # it to return unicode.
         if sys.version_info[0] < 3:
             newurl = compat_str(newurl)
 
@@ -2983,13 +2981,29 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
         # but it is kept for compatibility with other callers.
         newurl = newurl.replace(' ', '%20')
 
-        CONTENT_HEADERS = ("content-length", "content-type")
+        # A 303 must either use GET or HEAD for subsequent request
+        # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
+        if code == 303 and req.get_method() != 'HEAD':
+            new_method = 'GET'
+        # 301 and 302 redirects are commonly turned into a GET from a POST
+        # for subsequent requests by browsers, so we'll do the same.
+        # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
+        # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
+        elif code in (301, 302) and req.get_method() == 'POST':
+            new_method = 'GET'
+
+        # only remove payload if method changed (e.g. POST to GET)
+        if new_method != req.get_method():
+            new_data = None
+            remove_headers.extend(['Content-Length', 'Content-Type'])
+
         # NB: don't use dict comprehension for python 2.6 compatibility
-        newheaders = dict((k, v) for k, v in req.headers.items()
-                          if k.lower() not in CONTENT_HEADERS)
+        new_headers = dict((k, v) for k, v in req.header_items()
+                           if k.lower() not in remove_headers)
+
         return compat_urllib_request.Request(
-            newurl, headers=newheaders, origin_req_host=req.origin_req_host,
-            unverifiable=True)
+            newurl, headers=new_headers, origin_req_host=req.origin_req_host,
+            unverifiable=True, method=new_method, data=new_data)
 
 
 def extract_timezone(date_str):

From b383be98874d4dded67ee8a679fae30340722709 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 7 Jun 2023 19:38:54 +0100
Subject: [PATCH 094/156] [core] Remove `Cookie` header on redirect to prevent
 leaks

Adated from yt-dlp/yt-dlp-ghsa-v8mc-9377-rwjj/pull/1/commits/101caac
Thx coletdjnz
---
 test/test_http.py   | 32 ++++++++++++++++++++++++++++++--
 youtube_dl/utils.py |  8 ++++++--
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/test/test_http.py b/test/test_http.py
index 1a65df9e0..cd180b51f 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -183,6 +183,11 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
             self._method('GET')
         elif self.path.startswith('/headers'):
             self._headers()
+        elif self.path.startswith('/308-to-headers'):
+            self.send_response(308)
+            self.send_header('Location', '/headers')
+            self.send_header('Content-Length', '0')
+            self.end_headers()
         elif self.path == '/trailing_garbage':
             payload = b'<html><video src="/vid.mp4" /></html>'
             compressed = gzip_compress(payload) + b'trailing garbage'
@@ -385,8 +390,31 @@ class TestHTTP(unittest.TestCase):
             ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
                 0, 'test', 'ytdl', None, False, '127.0.0.1', True,
                 False, '/headers', True, False, None, False, None, None, {}))
-            data = ydl.urlopen(sanitized_Request(self._test_url('headers'))).read()
-            self.assertIn(b'Cookie: test=ytdl', data)
+            data = ydl.urlopen(sanitized_Request(
+                self._test_url('headers'))).read().decode('utf-8')
+            self.assertIn('Cookie: test=ytdl', data)
+
+    def test_passed_cookie_header(self):
+        # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
+        with FakeYDL() as ydl:
+            # Specified Cookie header should be used
+            res = ydl.urlopen(sanitized_Request(
+                self._test_url('headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
+            self.assertIn('Cookie: test=test', res)
+
+            # Specified Cookie header should be removed on any redirect
+            res = ydl.urlopen(sanitized_Request(
+                self._test_url('308-to-headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
+            self.assertNotIn('Cookie: test=test', res)
+
+            # Specified Cookie header should override global cookiejar for that request
+            ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
+                0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
+                False, '/headers', True, False, None, False, None, None, {}))
+            data = ydl.urlopen(sanitized_Request(
+                self._test_url('headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
+            self.assertNotIn('Cookie: test=ytdlp', data)
+            self.assertIn('Cookie: test=test', data)
 
     def test_no_compression_compat_header(self):
         with FakeYDL() as ydl:
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 58c710b08..c21cd3687 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2968,7 +2968,6 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
 
         new_method = req.get_method()
         new_data = req.data
-        remove_headers = []
 
         # On python 2 urlh.geturl() may sometimes return redirect URL
         # as a byte string instead of unicode. This workaround forces
@@ -2981,6 +2980,11 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
         # but it is kept for compatibility with other callers.
         newurl = newurl.replace(' ', '%20')
 
+        # Technically the Cookie header should be in unredirected_hdrs;
+        # however in practice some may set it in normal headers anyway.
+        # We will remove it here to prevent any leaks.
+        remove_headers = ['Cookie']
+
         # A 303 must either use GET or HEAD for subsequent request
         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
         if code == 303 and req.get_method() != 'HEAD':
@@ -2999,7 +3003,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
 
         # NB: don't use dict comprehension for python 2.6 compatibility
         new_headers = dict((k, v) for k, v in req.header_items()
-                           if k.lower() not in remove_headers)
+                           if k.title() not in remove_headers)
 
         return compat_urllib_request.Request(
             newurl, headers=new_headers, origin_req_host=req.origin_req_host,

From 3801d36416d6e3e6031dc4fcac01891ce7ddb55b Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Tue, 4 Jul 2023 14:03:39 -0500
Subject: [PATCH 095/156] [utils] `YoutubeDLCookieJar`: Add `get_cookie_header`
 and `get_cookies_for_url` methods

---
 youtube_dl/utils.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index c21cd3687..ac6c81465 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2912,6 +2912,19 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
                 cookie.expires = None
                 cookie.discard = True
 
+    def get_cookie_header(self, url):
+        """Generate a Cookie HTTP header for a given url"""
+        cookie_req = sanitized_Request(url)
+        self.add_cookie_header(cookie_req)
+        return cookie_req.get_header('Cookie')
+
+    def get_cookies_for_url(self, url):
+        """Generate a list of Cookie objects for a given url"""
+        # Policy `_now` attribute must be set before calling `_cookies_for_request`
+        # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
+        self._policy._now = self._now = int(time.time())
+        return self._cookies_for_request(sanitized_Request(url))
+
 
 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
     def __init__(self, cookiejar=None):

From 8334ec961b802ad7ef8571b776c5fc727206dc9b Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Tue, 4 Jul 2023 21:41:04 +0200
Subject: [PATCH 096/156] [core] Process header cookies on loading

---
 test/test_YoutubeDL.py          | 185 +++++++++++++++++++++++++++++++-
 test/test_YoutubeDLCookieJar.py |  14 +++
 youtube_dl/YoutubeDL.py         | 182 ++++++++++++++++++++++++++-----
 youtube_dl/downloader/common.py |   9 ++
 4 files changed, 357 insertions(+), 33 deletions(-)

diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 60780b8a7..6cf555827 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -10,14 +10,30 @@ import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 import copy
+import json
 
-from test.helper import FakeYDL, assertRegexpMatches
+from test.helper import (
+    FakeYDL,
+    assertRegexpMatches,
+    try_rm,
+)
 from youtube_dl import YoutubeDL
-from youtube_dl.compat import compat_str, compat_urllib_error
+from youtube_dl.compat import (
+    compat_http_cookiejar_Cookie,
+    compat_http_cookies_SimpleCookie,
+    compat_kwargs,
+    compat_str,
+    compat_urllib_error,
+)
+
 from youtube_dl.extractor import YoutubeIE
 from youtube_dl.extractor.common import InfoExtractor
 from youtube_dl.postprocessor.common import PostProcessor
-from youtube_dl.utils import ExtractorError, match_filter_func
+from youtube_dl.utils import (
+    ExtractorError,
+    match_filter_func,
+    traverse_obj,
+)
 
 TEST_URL = 'http://localhost/sample.mp4'
 
@@ -29,11 +45,14 @@ class YDL(FakeYDL):
         self.msgs = []
 
     def process_info(self, info_dict):
-        self.downloaded_info_dicts.append(info_dict)
+        self.downloaded_info_dicts.append(info_dict.copy())
 
     def to_screen(self, msg):
         self.msgs.append(msg)
 
+    def dl(self, *args, **kwargs):
+        assert False, 'Downloader must not be invoked for test_YoutubeDL'
+
 
 def _make_result(formats, **kwargs):
     res = {
@@ -42,8 +61,9 @@ def _make_result(formats, **kwargs):
         'title': 'testttitle',
         'extractor': 'testex',
         'extractor_key': 'TestEx',
+        'webpage_url': 'http://example.com/watch?v=shenanigans',
     }
-    res.update(**kwargs)
+    res.update(**compat_kwargs(kwargs))
     return res
 
 
@@ -1011,5 +1031,160 @@ class TestYoutubeDL(unittest.TestCase):
         self.assertEqual(out_info['release_date'], '20210930')
 
 
+class TestYoutubeDLCookies(unittest.TestCase):
+
+    @staticmethod
+    def encode_cookie(cookie):
+        if not isinstance(cookie, dict):
+            cookie = vars(cookie)
+        for name, value in cookie.items():
+            yield name, compat_str(value)
+
+    @classmethod
+    def comparable_cookies(cls, cookies):
+        # Work around cookiejar cookies not being unicode strings
+        return sorted(map(tuple, map(sorted, map(cls.encode_cookie, cookies))))
+
+    def assertSameCookies(self, c1, c2, msg=None):
+        return self.assertEqual(
+            *map(self.comparable_cookies, (c1, c2)),
+            msg=msg)
+
+    def assertSameCookieStrings(self, c1, c2, msg=None):
+        return self.assertSameCookies(
+            *map(lambda c: compat_http_cookies_SimpleCookie(c).values(), (c1, c2)),
+            msg=msg)
+
+    def test_header_cookies(self):
+
+        ydl = FakeYDL()
+        ydl.report_warning = lambda *_, **__: None
+
+        def cookie(name, value, version=None, domain='', path='', secure=False, expires=None):
+            return compat_http_cookiejar_Cookie(
+                version or 0, name, value, None, False,
+                domain, bool(domain), bool(domain), path, bool(path),
+                secure, expires, False, None, None, rest={})
+
+        test_url, test_domain = (t % ('yt.dl',) for t in ('https://%s/test', '.%s'))
+
+        def test(encoded_cookies, cookies, headers=False, round_trip=None, error_re=None):
+            def _test():
+                ydl.cookiejar.clear()
+                ydl._load_cookies(encoded_cookies, autoscope=headers)
+                if headers:
+                    ydl._apply_header_cookies(test_url)
+                data = {'url': test_url}
+                ydl._calc_headers(data)
+                self.assertSameCookies(
+                    cookies, ydl.cookiejar,
+                    'Extracted cookiejar.Cookie is not the same')
+                if not headers:
+                    self.assertSameCookieStrings(
+                        data.get('cookies'), round_trip or encoded_cookies,
+                        msg='Cookie is not the same as round trip')
+                ydl.__dict__['_YoutubeDL__header_cookies'] = []
+
+            try:
+                _test()
+            except AssertionError:
+                raise
+            except Exception as e:
+                if not error_re:
+                    raise
+                assertRegexpMatches(self, e.args[0], error_re.join(('.*',) * 2))
+
+        test('test=value; Domain=' + test_domain, [cookie('test', 'value', domain=test_domain)])
+        test('test=value', [cookie('test', 'value')], error_re='Unscoped cookies are not allowed')
+        test('cookie1=value1; Domain={0}; Path=/test; cookie2=value2; Domain={0}; Path=/'.format(test_domain), [
+            cookie('cookie1', 'value1', domain=test_domain, path='/test'),
+            cookie('cookie2', 'value2', domain=test_domain, path='/')])
+        cookie_kw = compat_kwargs(
+            {'domain': test_domain, 'path': '/test', 'secure': True, 'expires': '9999999999', })
+        test('test=value; Domain={domain}; Path={path}; Secure; Expires={expires}'.format(**cookie_kw), [
+            cookie('test', 'value', **cookie_kw)])
+        test('test="value; "; path=/test; domain=' + test_domain, [
+            cookie('test', 'value; ', domain=test_domain, path='/test')],
+            round_trip='test="value\\073 "; Domain={0}; Path=/test'.format(test_domain))
+        test('name=; Domain=' + test_domain, [cookie('name', '', domain=test_domain)],
+             round_trip='name=""; Domain=' + test_domain)
+        test('test=value', [cookie('test', 'value', domain=test_domain)], headers=True)
+        test('cookie1=value; Domain={0}; cookie2=value'.format(test_domain), [],
+             headers=True, error_re='Invalid syntax')
+        ydl.report_warning = ydl.report_error
+        test('test=value', [], headers=True, error_re='Passing cookies as a header is a potential security risk')
+
+    def test_infojson_cookies(self):
+        TEST_FILE = 'test_infojson_cookies.info.json'
+        TEST_URL = 'https://example.com/example.mp4'
+        COOKIES = 'a=b; Domain=.example.com; c=d; Domain=.example.com'
+        COOKIE_HEADER = {'Cookie': 'a=b; c=d'}
+
+        ydl = FakeYDL()
+        ydl.process_info = lambda x: ydl._write_info_json('test', x, TEST_FILE)
+
+        def make_info(info_header_cookies=False, fmts_header_cookies=False, cookies_field=False):
+            fmt = {'url': TEST_URL}
+            if fmts_header_cookies:
+                fmt['http_headers'] = COOKIE_HEADER
+            if cookies_field:
+                fmt['cookies'] = COOKIES
+            return _make_result([fmt], http_headers=COOKIE_HEADER if info_header_cookies else None)
+
+        def test(initial_info, note):
+
+            def failure_msg(why):
+                return ' when '.join((why, note))
+
+            result = {}
+            result['processed'] = ydl.process_ie_result(initial_info)
+            self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
+                            msg=failure_msg('No cookies set in cookiejar after initial process'))
+            ydl.cookiejar.clear()
+            with open(TEST_FILE) as infojson:
+                result['loaded'] = ydl.sanitize_info(json.load(infojson), True)
+            result['final'] = ydl.process_ie_result(result['loaded'].copy(), download=False)
+            self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
+                            msg=failure_msg('No cookies set in cookiejar after final process'))
+            ydl.cookiejar.clear()
+            for key in ('processed', 'loaded', 'final'):
+                info = result[key]
+                self.assertIsNone(
+                    traverse_obj(info, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False),
+                    msg=failure_msg('Cookie header not removed in {0} result'.format(key)))
+                self.assertSameCookieStrings(
+                    traverse_obj(info, ((None, ('formats', 0)), 'cookies'), get_all=False), COOKIES,
+                    msg=failure_msg('No cookies field found in {0} result'.format(key)))
+
+        test({'url': TEST_URL, 'http_headers': COOKIE_HEADER, 'id': '1', 'title': 'x'}, 'no formats field')
+        test(make_info(info_header_cookies=True), 'info_dict header cokies')
+        test(make_info(fmts_header_cookies=True), 'format header cookies')
+        test(make_info(info_header_cookies=True, fmts_header_cookies=True), 'info_dict and format header cookies')
+        test(make_info(info_header_cookies=True, fmts_header_cookies=True, cookies_field=True), 'all cookies fields')
+        test(make_info(cookies_field=True), 'cookies format field')
+        test({'url': TEST_URL, 'cookies': COOKIES, 'id': '1', 'title': 'x'}, 'info_dict cookies field only')
+
+        try_rm(TEST_FILE)
+
+    def test_add_headers_cookie(self):
+        def check_for_cookie_header(result):
+            return traverse_obj(result, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False)
+
+        ydl = FakeYDL({'http_headers': {'Cookie': 'a=b'}})
+        ydl._apply_header_cookies(_make_result([])['webpage_url'])  # Scope to input webpage URL: .example.com
+
+        fmt = {'url': 'https://example.com/video.mp4'}
+        result = ydl.process_ie_result(_make_result([fmt]), download=False)
+        self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies in result info_dict')
+        self.assertEqual(result.get('cookies'), 'a=b; Domain=.example.com', msg='No cookies were set in cookies field')
+        self.assertIn('a=b', ydl.cookiejar.get_cookie_header(fmt['url']), msg='No cookies were set in cookiejar')
+
+        fmt = {'url': 'https://wrong.com/video.mp4'}
+        result = ydl.process_ie_result(_make_result([fmt]), download=False)
+        self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies for wrong domain')
+        self.assertFalse(result.get('cookies'), msg='Cookies set in cookies field for wrong domain')
+        self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain')
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py
index 05f48bd74..4f9dd71ae 100644
--- a/test/test_YoutubeDLCookieJar.py
+++ b/test/test_YoutubeDLCookieJar.py
@@ -46,6 +46,20 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
         # will be ignored
         self.assertFalse(cookiejar._cookies)
 
+    def test_get_cookie_header(self):
+        cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
+        cookiejar.load(ignore_discard=True, ignore_expires=True)
+        header = cookiejar.get_cookie_header('https://www.foobar.foobar')
+        self.assertIn('HTTPONLY_COOKIE', header)
+
+    def test_get_cookies_for_url(self):
+        cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
+        cookiejar.load(ignore_discard=True, ignore_expires=True)
+        cookies = cookiejar.get_cookies_for_url('https://www.foobar.foobar/')
+        self.assertEqual(len(cookies), 2)
+        cookies = cookiejar.get_cookies_for_url('https://foobar.foobar/')
+        self.assertFalse(cookies)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 1435754c2..98d080f43 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -5,6 +5,7 @@ from __future__ import absolute_import, unicode_literals
 
 import collections
 import contextlib
+import copy
 import datetime
 import errno
 import fileinput
@@ -34,10 +35,12 @@ from string import ascii_letters
 
 from .compat import (
     compat_basestring,
-    compat_cookiejar,
+    compat_collections_chain_map as ChainMap,
     compat_filter as filter,
     compat_get_terminal_size,
     compat_http_client,
+    compat_http_cookiejar_Cookie,
+    compat_http_cookies_SimpleCookie,
     compat_integer_types,
     compat_kwargs,
     compat_map as map,
@@ -53,6 +56,7 @@ from .compat import (
 from .utils import (
     age_restricted,
     args_to_str,
+    bug_reports_message,
     ContentTooShortError,
     date_from_str,
     DateRange,
@@ -97,6 +101,7 @@ from .utils import (
     std_headers,
     str_or_none,
     subtitles_filename,
+    traverse_obj,
     UnavailableVideoError,
     url_basename,
     version_tuple,
@@ -376,6 +381,9 @@ class YoutubeDL(object):
         self.params.update(params)
         self.cache = Cache(self)
 
+        self._header_cookies = []
+        self._load_cookies_from_headers(self.params.get('http_headers'))
+
         def check_deprecated(param, option, suggestion):
             if self.params.get(param) is not None:
                 self.report_warning(
@@ -870,8 +878,83 @@ class YoutubeDL(object):
                     raise
         return wrapper
 
+    def _remove_cookie_header(self, http_headers):
+        """Filters out `Cookie` header from an `http_headers` dict
+        The `Cookie` header is removed to prevent leaks as a result of unscoped cookies.
+        See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
+
+        @param http_headers     An `http_headers` dict from which any `Cookie` header
+                                should be removed, or None
+        """
+        return dict(filter(lambda pair: pair[0].lower() != 'cookie', (http_headers or {}).items()))
+
+    def _load_cookies(self, data, **kwargs):
+        """Loads cookies from a `Cookie` header
+
+        This tries to work around the security vulnerability of passing cookies to every domain.
+
+        @param data         The Cookie header as a string to load the cookies from
+        @param autoscope    If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
+                            If `True`, save cookies for later to be stored in the jar with a limited scope
+                            If a URL, save cookies in the jar with the domain of the URL
+        """
+        # autoscope=True (kw-only)
+        autoscope = kwargs.get('autoscope', True)
+
+        for cookie in compat_http_cookies_SimpleCookie(data).values() if data else []:
+            if autoscope and any(cookie.values()):
+                raise ValueError('Invalid syntax in Cookie Header')
+
+            domain = cookie.get('domain') or ''
+            expiry = cookie.get('expires')
+            if expiry == '':  # 0 is valid so we check for `''` explicitly
+                expiry = None
+            prepared_cookie = compat_http_cookiejar_Cookie(
+                cookie.get('version') or 0, cookie.key, cookie.value, None, False,
+                domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
+                bool(cookie.get('secure')), expiry, False, None, None, {})
+
+            if domain:
+                self.cookiejar.set_cookie(prepared_cookie)
+            elif autoscope is True:
+                self.report_warning(
+                    'Passing cookies as a header is a potential security risk; '
+                    'they will be scoped to the domain of the downloaded urls. '
+                    'Please consider loading cookies from a file or browser instead.',
+                    only_once=True)
+                self._header_cookies.append(prepared_cookie)
+            elif autoscope:
+                self.report_warning(
+                    'The extractor result contains an unscoped cookie as an HTTP header. '
+                    'If you are specifying an input URL, ' + bug_reports_message(),
+                    only_once=True)
+                self._apply_header_cookies(autoscope, [prepared_cookie])
+            else:
+                self.report_unscoped_cookies()
+
+    def _load_cookies_from_headers(self, headers):
+        self._load_cookies(traverse_obj(headers, 'cookie', casesense=False))
+
+    def _apply_header_cookies(self, url, cookies=None):
+        """This method applies stray header cookies to the provided url
+
+        This loads header cookies and scopes them to the domain provided in `url`.
+        While this is not ideal, it helps reduce the risk of them being sent to
+        an unintended destination.
+        """
+        parsed = compat_urllib_parse.urlparse(url)
+        if not parsed.hostname:
+            return
+
+        for cookie in map(copy.copy, cookies or self._header_cookies):
+            cookie.domain = '.' + parsed.hostname
+            self.cookiejar.set_cookie(cookie)
+
     @__handle_extraction_exceptions
     def __extract_info(self, url, ie, download, extra_info, process):
+        # Compat with passing cookies in http headers
+        self._apply_header_cookies(url)
+
         ie_result = ie.extract(url)
         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
             return
@@ -897,7 +980,7 @@ class YoutubeDL(object):
 
     def process_ie_result(self, ie_result, download=True, extra_info={}):
         """
-        Take the result of the ie(may be modified) and resolve all unresolved
+        Take the result of the ie (may be modified) and resolve all unresolved
         references (URLs, playlist items).
 
         It will also download the videos if 'download'.
@@ -1468,23 +1551,45 @@ class YoutubeDL(object):
         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
         return _build_selector_function(parsed_selector)
 
-    def _calc_headers(self, info_dict):
-        res = std_headers.copy()
+    def _calc_headers(self, info_dict, load_cookies=False):
+        if load_cookies:  # For --load-info-json
+            # load cookies from http_headers in legacy info.json
+            self._load_cookies(traverse_obj(info_dict, ('http_headers', 'Cookie'), casesense=False),
+                               autoscope=info_dict['url'])
+            # load scoped cookies from info.json
+            self._load_cookies(info_dict.get('cookies'), autoscope=False)
 
-        add_headers = info_dict.get('http_headers')
-        if add_headers:
-            res.update(add_headers)
-
-        cookies = self._calc_cookies(info_dict)
+        cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
         if cookies:
-            res['Cookie'] = cookies
+            # Make a string like name1=val1; attr1=a_val1; ...name2=val2; ...
+            # By convention a cookie name can't be a well-known attribute name
+            # so this syntax is unambiguous and can be parsed by (eg) SimpleCookie
+            encoder = compat_http_cookies_SimpleCookie()
+            values = []
+            attributes = (('Domain', '='), ('Path', '='), ('Secure',), ('Expires', '='), ('Version', '='))
+            attributes = tuple([x[0].lower()] + list(x) for x in attributes)
+            for cookie in cookies:
+                _, value = encoder.value_encode(cookie.value)
+                # Py 2 '' --> '', Py 3 '' --> '""'
+                if value == '':
+                    value = '""'
+                values.append('='.join((cookie.name, value)))
+                for attr in attributes:
+                    value = getattr(cookie, attr[0], None)
+                    if value:
+                        values.append('%s%s' % (''.join(attr[1:]), value if len(attr) == 3 else ''))
+            info_dict['cookies'] = '; '.join(values)
+
+        res = std_headers.copy()
+        res.update(info_dict.get('http_headers') or {})
+        res = self._remove_cookie_header(res)
 
         if 'X-Forwarded-For' not in res:
             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
             if x_forwarded_for_ip:
                 res['X-Forwarded-For'] = x_forwarded_for_ip
 
-        return res
+        return res or None
 
     def _calc_cookies(self, info_dict):
         pr = sanitized_Request(info_dict['url'])
@@ -1663,10 +1768,13 @@ class YoutubeDL(object):
                 format['protocol'] = determine_protocol(format)
             # Add HTTP headers, so that external programs can use them from the
             # json output
-            full_format_info = info_dict.copy()
-            full_format_info.update(format)
-            format['http_headers'] = self._calc_headers(full_format_info)
-        # Remove private housekeeping stuff
+            format['http_headers'] = self._calc_headers(ChainMap(format, info_dict), load_cookies=True)
+
+        # Safeguard against old/insecure infojson when using --load-info-json
+        info_dict['http_headers'] = self._remove_cookie_header(
+            info_dict.get('http_headers') or {}) or None
+
+        # Remove private housekeeping stuff (copied to http_headers in _calc_headers())
         if '__x_forwarded_for_ip' in info_dict:
             del info_dict['__x_forwarded_for_ip']
 
@@ -1927,17 +2035,9 @@ class YoutubeDL(object):
                                                 (sub_lang, error_to_compat_str(err)))
                             continue
 
-        if self.params.get('writeinfojson', False):
-            infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
-            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
-                self.to_screen('[info] Video description metadata is already present')
-            else:
-                self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
-                try:
-                    write_json_file(self.filter_requested_info(info_dict), infofn)
-                except (OSError, IOError):
-                    self.report_error('Cannot write metadata to JSON file ' + infofn)
-                    return
+        self._write_info_json(
+            'video description', info_dict,
+            replace_extension(filename, 'info.json', info_dict.get('ext')))
 
         self._write_thumbnails(info_dict, filename)
 
@@ -1958,7 +2058,11 @@ class YoutubeDL(object):
                         fd.add_progress_hook(ph)
                     if self.params.get('verbose'):
                         self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
-                    return fd.download(name, info)
+
+                    new_info = dict((k, v) for k, v in info.items() if not k.startswith('__p'))
+                    new_info['http_headers'] = self._calc_headers(new_info)
+
+                    return fd.download(name, new_info)
 
                 if info_dict.get('requested_formats') is not None:
                     downloaded = []
@@ -2484,7 +2588,7 @@ class YoutubeDL(object):
         opts_proxy = self.params.get('proxy')
 
         if opts_cookiefile is None:
-            self.cookiejar = compat_cookiejar.CookieJar()
+            self.cookiejar = YoutubeDLCookieJar()
         else:
             opts_cookiefile = expand_path(opts_cookiefile)
             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
@@ -2545,6 +2649,28 @@ class YoutubeDL(object):
             encoding = preferredencoding()
         return encoding
 
+    def _write_info_json(self, label, info_dict, infofn, overwrite=None):
+        if not self.params.get('writeinfojson', False):
+            return False
+
+        def msg(fmt, lbl):
+            return fmt % (lbl + ' metadata',)
+
+        if overwrite is None:
+            overwrite = not self.params.get('nooverwrites', False)
+
+        if not overwrite and os.path.exists(encodeFilename(infofn)):
+            self.to_screen(msg('[info] %s is already present', label.title()))
+            return 'exists'
+        else:
+            self.to_screen(msg('[info] Writing %s as JSON to: ' + infofn, label))
+            try:
+                write_json_file(self.filter_requested_info(info_dict), infofn)
+                return True
+            except (OSError, IOError):
+                self.report_error(msg('Cannot write %s to JSON file ' + infofn, label))
+                return
+
     def _write_thumbnails(self, info_dict, filename):
         if self.params.get('writethumbnail', False):
             thumbnails = info_dict.get('thumbnails')
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index c86ce2aa5..08c98b336 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -13,7 +13,9 @@ from ..utils import (
     error_to_compat_str,
     format_bytes,
     shell_quote,
+    T,
     timeconvert,
+    traverse_obj,
 )
 
 
@@ -339,6 +341,10 @@ class FileDownloader(object):
     def download(self, filename, info_dict):
         """Download to a filename using the info from info_dict
         Return True on success and False otherwise
+
+        This method filters the `Cookie` header from the info_dict to prevent leaks.
+        Downloaders have their own way of handling cookies.
+        See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
         """
 
         nooverwrites_and_exists = (
@@ -373,6 +379,9 @@ class FileDownloader(object):
                     else '%.2f' % sleep_interval))
             time.sleep(sleep_interval)
 
+        info_dict['http_headers'] = dict(traverse_obj(info_dict, (
+            'http_headers', T(dict.items), lambda _, pair: pair[0].lower() != 'cookie'))) or None
+
         return self.real_download(filename, info_dict)
 
     def real_download(self, filename, info_dict):

From 21438a4194376c3a9b1e5c322c825d43a1b03d6e Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Tue, 4 Jul 2023 16:40:56 -0500
Subject: [PATCH 097/156] [downloader/external] Fix cookie support

---
 test/test_downloader_external.py  | 157 ++++++++++++++++++++++++++++--
 youtube_dl/downloader/common.py   |   5 -
 youtube_dl/downloader/external.py | 124 +++++++++++++++++++----
 3 files changed, 256 insertions(+), 30 deletions(-)

diff --git a/test/test_downloader_external.py b/test/test_downloader_external.py
index c0239502b..029f9b05f 100644
--- a/test/test_downloader_external.py
+++ b/test/test_downloader_external.py
@@ -12,20 +12,65 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from test.helper import (
     FakeLogger,
+    FakeYDL,
     http_server_port,
     try_rm,
 )
 from youtube_dl import YoutubeDL
-from youtube_dl.compat import compat_http_server
-from youtube_dl.utils import encodeFilename
-from youtube_dl.downloader.external import Aria2pFD
+from youtube_dl.compat import (
+    compat_http_cookiejar_Cookie,
+    compat_http_server,
+    compat_kwargs,
+)
+from youtube_dl.utils import (
+    encodeFilename,
+    join_nonempty,
+)
+from youtube_dl.downloader.external import (
+    Aria2cFD,
+    Aria2pFD,
+    AxelFD,
+    CurlFD,
+    FFmpegFD,
+    HttpieFD,
+    WgetFD,
+)
 import threading
 
-TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-
-
 TEST_SIZE = 10 * 1024
 
+TEST_COOKIE = {
+    'version': 0,
+    'name': 'test',
+    'value': 'ytdlp',
+    'port': None,
+    'port_specified': False,
+    'domain': '.example.com',
+    'domain_specified': True,
+    'domain_initial_dot': False,
+    'path': '/',
+    'path_specified': True,
+    'secure': False,
+    'expires': None,
+    'discard': False,
+    'comment': None,
+    'comment_url': None,
+    'rest': {},
+}
+
+TEST_COOKIE_VALUE = join_nonempty('name', 'value', delim='=', from_dict=TEST_COOKIE)
+
+TEST_INFO = {'url': 'http://www.example.com/'}
+
+
+def cookiejar_Cookie(**cookie_args):
+    return compat_http_cookiejar_Cookie(**compat_kwargs(cookie_args))
+
+
+def ifExternalFDAvailable(externalFD):
+    return unittest.skipUnless(externalFD.available(),
+                               externalFD.get_basename() + ' not found')
+
 
 class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
     def log_message(self, format, *args):
@@ -70,7 +115,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
             assert False, 'unrecognised server path'
 
 
-@unittest.skipUnless(Aria2pFD.available(), 'aria2p module not found')
+@ifExternalFDAvailable(Aria2pFD)
 class TestAria2pFD(unittest.TestCase):
     def setUp(self):
         self.httpd = compat_http_server.HTTPServer(
@@ -111,5 +156,103 @@ class TestAria2pFD(unittest.TestCase):
         })
 
 
+@ifExternalFDAvailable(HttpieFD)
+class TestHttpieFD(unittest.TestCase):
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = HttpieFD(ydl, {})
+            self.assertEqual(
+                downloader._make_cmd('test', TEST_INFO),
+                ['http', '--download', '--output', 'test', 'http://www.example.com/'])
+
+            # Test cookie header is added
+            ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+            self.assertEqual(
+                downloader._make_cmd('test', TEST_INFO),
+                ['http', '--download', '--output', 'test',
+                 'http://www.example.com/', 'Cookie:' + TEST_COOKIE_VALUE])
+
+
+@ifExternalFDAvailable(AxelFD)
+class TestAxelFD(unittest.TestCase):
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = AxelFD(ydl, {})
+            self.assertEqual(
+                downloader._make_cmd('test', TEST_INFO),
+                ['axel', '-o', 'test', '--', 'http://www.example.com/'])
+
+            # Test cookie header is added
+            ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+            self.assertEqual(
+                downloader._make_cmd('test', TEST_INFO),
+                ['axel', '-o', 'test', '-H', 'Cookie: ' + TEST_COOKIE_VALUE,
+                 '--max-redirect=0', '--', 'http://www.example.com/'])
+
+
+@ifExternalFDAvailable(WgetFD)
+class TestWgetFD(unittest.TestCase):
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = WgetFD(ydl, {})
+            self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
+            # Test cookiejar tempfile arg is added
+            ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+            self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
+
+
+@ifExternalFDAvailable(CurlFD)
+class TestCurlFD(unittest.TestCase):
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = CurlFD(ydl, {})
+            self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO))
+            # Test cookie header is added
+            ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+            self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO))
+            self.assertIn(TEST_COOKIE_VALUE, downloader._make_cmd('test', TEST_INFO))
+
+
+@ifExternalFDAvailable(Aria2cFD)
+class TestAria2cFD(unittest.TestCase):
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = Aria2cFD(ydl, {})
+            downloader._make_cmd('test', TEST_INFO)
+            self.assertFalse(hasattr(downloader, '_cookies_tempfile'))
+
+            # Test cookiejar tempfile arg is added
+            ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+            cmd = downloader._make_cmd('test', TEST_INFO)
+            self.assertIn('--load-cookies=%s' % downloader._cookies_tempfile, cmd)
+
+
+@ifExternalFDAvailable(FFmpegFD)
+class TestFFmpegFD(unittest.TestCase):
+    _args = []
+
+    def _test_cmd(self, args):
+        self._args = args
+
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = FFmpegFD(ydl, {})
+            downloader._debug_cmd = self._test_cmd
+            info_dict = TEST_INFO.copy()
+            info_dict['ext'] = 'mp4'
+
+            downloader._call_downloader('test', info_dict)
+            self.assertEqual(self._args, [
+                'ffmpeg', '-y', '-i', 'http://www.example.com/',
+                '-c', 'copy', '-f', 'mp4', 'file:test'])
+
+            # Test cookies arg is added
+            ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+            downloader._call_downloader('test', info_dict)
+            self.assertEqual(self._args, [
+                'ffmpeg', '-y', '-cookies', TEST_COOKIE_VALUE + '; path=/; domain=.example.com;\r\n',
+                '-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test'])
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index 08c98b336..afb4ee33d 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -13,9 +13,7 @@ from ..utils import (
     error_to_compat_str,
     format_bytes,
     shell_quote,
-    T,
     timeconvert,
-    traverse_obj,
 )
 
 
@@ -379,9 +377,6 @@ class FileDownloader(object):
                     else '%.2f' % sleep_interval))
             time.sleep(sleep_interval)
 
-        info_dict['http_headers'] = dict(traverse_obj(info_dict, (
-            'http_headers', T(dict.items), lambda _, pair: pair[0].lower() != 'cookie'))) or None
-
         return self.real_download(filename, info_dict)
 
     def real_download(self, filename, info_dict):
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index 1b6bd1fa2..7fc864e85 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -1,9 +1,10 @@
 from __future__ import unicode_literals
 
-import os.path
+import os
 import re
 import subprocess
 import sys
+import tempfile
 import time
 
 from .common import FileDownloader
@@ -23,6 +24,8 @@ from ..utils import (
     check_executable,
     is_outdated_version,
     process_communicate_or_kill,
+    T,
+    traverse_obj,
 )
 
 
@@ -30,6 +33,7 @@ class ExternalFD(FileDownloader):
     def real_download(self, filename, info_dict):
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
+        self._cookies_tempfile = None
 
         try:
             started = time.time()
@@ -42,6 +46,13 @@ class ExternalFD(FileDownloader):
             # should take place
             retval = 0
             self.to_screen('[%s] Interrupted by user' % self.get_basename())
+        finally:
+            if self._cookies_tempfile and os.path.isfile(self._cookies_tempfile):
+                try:
+                    os.remove(self._cookies_tempfile)
+                except OSError:
+                    self.report_warning(
+                        'Unable to delete temporary cookies file "{0}"'.format(self._cookies_tempfile))
 
         if retval == 0:
             status = {
@@ -97,6 +108,16 @@ class ExternalFD(FileDownloader):
     def _configuration_args(self, default=[]):
         return cli_configuration_args(self.params, 'external_downloader_args', default)
 
+    def _write_cookies(self):
+        if not self.ydl.cookiejar.filename:
+            tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
+            tmp_cookies.close()
+            self._cookies_tempfile = tmp_cookies.name
+            self.to_screen('[download] Writing temporary cookies file to "{0}"'.format(self._cookies_tempfile))
+        # real_download resets _cookies_tempfile; if it's None, save() will write to cookiejar.filename
+        self.ydl.cookiejar.save(self._cookies_tempfile, ignore_discard=True, ignore_expires=True)
+        return self.ydl.cookiejar.filename or self._cookies_tempfile
+
     def _call_downloader(self, tmpfilename, info_dict):
         """ Either overwrite this or implement _make_cmd """
         cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
@@ -110,13 +131,21 @@ class ExternalFD(FileDownloader):
             self.to_stderr(stderr.decode('utf-8', 'replace'))
         return p.returncode
 
+    @staticmethod
+    def _header_items(info_dict):
+        return traverse_obj(
+            info_dict, ('http_headers', T(dict.items), Ellipsis))
+
 
 class CurlFD(ExternalFD):
     AVAILABLE_OPT = '-V'
 
     def _make_cmd(self, tmpfilename, info_dict):
-        cmd = [self.exe, '--location', '-o', tmpfilename]
-        for key, val in info_dict['http_headers'].items():
+        cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
+        cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+        if cookie_header:
+            cmd += ['--cookie', cookie_header]
+        for key, val in self._header_items(info_dict):
             cmd += ['--header', '%s: %s' % (key, val)]
         cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
         cmd += self._valueless_option('--silent', 'noprogress')
@@ -151,8 +180,11 @@ class AxelFD(ExternalFD):
 
     def _make_cmd(self, tmpfilename, info_dict):
         cmd = [self.exe, '-o', tmpfilename]
-        for key, val in info_dict['http_headers'].items():
+        for key, val in self._header_items(info_dict):
             cmd += ['-H', '%s: %s' % (key, val)]
+        cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+        if cookie_header:
+            cmd += ['-H', 'Cookie: {0}'.format(cookie_header), '--max-redirect=0']
         cmd += self._configuration_args()
         cmd += ['--', info_dict['url']]
         return cmd
@@ -162,8 +194,10 @@ class WgetFD(ExternalFD):
     AVAILABLE_OPT = '--version'
 
     def _make_cmd(self, tmpfilename, info_dict):
-        cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
-        for key, val in info_dict['http_headers'].items():
+        cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
+        if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
+            cmd += ['--load-cookies', self._write_cookies()]
+        for key, val in self._header_items(info_dict):
             cmd += ['--header', '%s: %s' % (key, val)]
         cmd += self._option('--limit-rate', 'ratelimit')
         retry = self._option('--tries', 'retries')
@@ -182,21 +216,58 @@ class WgetFD(ExternalFD):
 class Aria2cFD(ExternalFD):
     AVAILABLE_OPT = '-v'
 
+    @staticmethod
+    def _aria2c_filename(fn):
+        return fn if os.path.isabs(fn) else os.path.join('.', fn)
+
     def _make_cmd(self, tmpfilename, info_dict):
-        cmd = [self.exe, '-c']
-        cmd += self._configuration_args([
-            '--min-split-size', '1M', '--max-connection-per-server', '4'])
-        dn = os.path.dirname(tmpfilename)
-        if dn:
-            cmd += ['--dir', dn]
-        cmd += ['--out', os.path.basename(tmpfilename)]
-        for key, val in info_dict['http_headers'].items():
+        cmd = [self.exe, '-c',
+               '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
+               '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
+        if 'fragments' in info_dict:
+            cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
+        else:
+            cmd += ['--min-split-size', '1M']
+
+        if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
+            cmd += ['--load-cookies={0}'.format(self._write_cookies())]
+        for key, val in self._header_items(info_dict):
             cmd += ['--header', '%s: %s' % (key, val)]
+        cmd += self._configuration_args(['--max-connection-per-server', '4'])
+        cmd += ['--out', os.path.basename(tmpfilename)]
+        cmd += self._option('--max-overall-download-limit', 'ratelimit')
         cmd += self._option('--interface', 'source_address')
         cmd += self._option('--all-proxy', 'proxy')
         cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
         cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
-        cmd += ['--', info_dict['url']]
+        cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
+        cmd += self._configuration_args()
+
+        # aria2c strips out spaces from the beginning/end of filenames and paths.
+        # We work around this issue by adding a "./" to the beginning of the
+        # filename and relative path, and adding a "/" at the end of the path.
+        # See: https://github.com/yt-dlp/yt-dlp/issues/276
+        # https://github.com/ytdl-org/youtube-dl/issues/20312
+        # https://github.com/aria2/aria2/issues/1373
+        dn = os.path.dirname(tmpfilename)
+        if dn:
+            cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep]
+        if 'fragments' not in info_dict:
+            cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))]
+        cmd += ['--auto-file-renaming=false']
+        if 'fragments' in info_dict:
+            cmd += ['--file-allocation=none', '--uri-selector=inorder']
+            url_list_file = '%s.frag.urls' % (tmpfilename, )
+            url_list = []
+            for frag_index, fragment in enumerate(info_dict['fragments']):
+                fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
+                url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
+            stream, _ = self.sanitize_open(url_list_file, 'wb')
+            stream.write('\n'.join(url_list).encode())
+            stream.close()
+            cmd += ['-i', self._aria2c_filename(url_list_file)]
+        else:
+            cmd += ['--', info_dict['url']]
         return cmd
 
 
@@ -235,8 +306,10 @@ class Aria2pFD(ExternalFD):
         }
         options['dir'] = os.path.dirname(tmpfilename) or os.path.abspath('.')
         options['out'] = os.path.basename(tmpfilename)
+        if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
+            options['load-cookies'] = self._write_cookies()
         options['header'] = []
-        for key, val in info_dict['http_headers'].items():
+        for key, val in self._header_items(info_dict):
             options['header'].append('{0}: {1}'.format(key, val))
         download = aria2.add_uris([info_dict['url']], options)
         status = {
@@ -265,8 +338,16 @@ class HttpieFD(ExternalFD):
 
     def _make_cmd(self, tmpfilename, info_dict):
         cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
-        for key, val in info_dict['http_headers'].items():
+        for key, val in self._header_items(info_dict):
             cmd += ['%s:%s' % (key, val)]
+
+        # httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
+        # If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
+        # 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
+        # 2: https://httpie.io/docs/cli/sessions
+        cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+        if cookie_header:
+            cmd += ['Cookie:%s' % cookie_header]
         return cmd
 
 
@@ -312,7 +393,14 @@ class FFmpegFD(ExternalFD):
         # if end_time:
         #     args += ['-t', compat_str(end_time - start_time)]
 
-        if info_dict['http_headers'] and re.match(r'^https?://', url):
+        cookies = self.ydl.cookiejar.get_cookies_for_url(url)
+        if cookies:
+            args.extend(['-cookies', ''.join(
+                '{0}={1}; path={2}; domain={3};\r\n'.format(
+                    cookie.name, cookie.value, cookie.path, cookie.domain)
+                for cookie in cookies)])
+
+        if info_dict.get('http_headers') and re.match(r'^https?://', url):
             # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
             # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
             headers = handle_youtubedl_headers(info_dict['http_headers'])

From 1634b1d61efa36c31c86b8c64c88dc297a7af28a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 11 Jul 2023 21:51:32 +0100
Subject: [PATCH 098/156] [doc] Warn against setting cookies with --add-header

---
 youtube_dl/options.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index d802b7e59..434f520d3 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -544,12 +544,14 @@ def parseOpts(overrideArguments=None):
     workarounds.add_option(
         '--referer',
         metavar='URL', dest='referer', default=None,
-        help='Specify a custom referer, use if the video access is restricted to one domain',
+        help='Specify a custom Referer: use if the video access is restricted to one domain',
     )
     workarounds.add_option(
         '--add-header',
         metavar='FIELD:VALUE', dest='headers', action='append',
-        help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
+        help=('Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times. '
+              'NB Use --cookies rather than adding a Cookie header if its contents may be sensitive; '
+              'data from a Cookie header will be sent to all domains, not just the one intended')
     )
     workarounds.add_option(
         '--bidi-workaround',

From 1d8d5a93f7187438587c3a754b53fdf30322cef0 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 13 Jul 2023 20:14:50 +0100
Subject: [PATCH 099/156] [test] Fixes for old Pythons

---
 .github/workflows/ci.yml |  4 ++--
 test/helper.py           |  6 ++++++
 test/test_http.py        |  7 ++++++-
 test/test_utils.py       |  6 +++---
 youtube_dl/jsinterp.py   | 14 +++++++-------
 youtube_dl/utils.py      |  8 ++++++--
 6 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ce878c1b1..c3aabde47 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -301,7 +301,7 @@ jobs:
       if: ${{ matrix.python-version == '2.6' }}
       shell: bash
       run: |
-        # see pip for Jython
+        # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
         $PIP -qq show unittest2 || { \
           for u in "65/26/32b8464df2a97e6dd1b656ed26b2c194606c16fe163c695a992b36c11cdf/six-1.13.0-py2.py3-none-any.whl" \
               "f2/94/3af39d34be01a24a6e65433d19e107099374224905f1e0cc6bbe1fd22a2f/argparse-1.4.0-py2.py3-none-any.whl" \
@@ -312,7 +312,7 @@ jobs:
             $PIP install ${u##*/}; \
           done; }
         # make tests use unittest2
-        for test in ./test/test_*.py; do
+        for test in ./test/test_*.py ./test/helper.py; do
           sed -r -i -e '/^import unittest$/s/test/test2 as unittest/' "$test"
         done
     #-------- nose --------
diff --git a/test/helper.py b/test/helper.py
index e3314b03e..aa99001b2 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -9,6 +9,7 @@ import re
 import types
 import ssl
 import sys
+import unittest
 
 import youtube_dl.extractor
 from youtube_dl import YoutubeDL
@@ -17,6 +18,7 @@ from youtube_dl.compat import (
     compat_str,
 )
 from youtube_dl.utils import (
+    IDENTITY,
     preferredencoding,
     write_string,
 )
@@ -298,3 +300,7 @@ def http_server_port(httpd):
     else:
         sock = httpd.socket
     return sock.getsockname()[1]
+
+
+def expectedFailureIf(cond):
+    return unittest.expectedFailure if cond else IDENTITY
diff --git a/test/test_http.py b/test/test_http.py
index cd180b51f..1a6b2e878 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -8,6 +8,7 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+import contextlib
 import gzip
 import io
 import ssl
@@ -154,7 +155,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
 
         def gzip_compress(p):
             buf = io.BytesIO()
-            with gzip.GzipFile(fileobj=buf, mode='wb') as f:
+            with contextlib.closing(gzip.GzipFile(fileobj=buf, mode='wb')) as f:
                 f.write(p)
             return buf.getvalue()
 
@@ -306,6 +307,10 @@ class TestHTTP(unittest.TestCase):
             else self.https_port if scheme == 'https'
             else self.http_port, path)
 
+    @unittest.skipUnless(
+        sys.version_info >= (3, 2)
+        or (sys.version_info[0] == 2 and sys.version_info[1:] >= (7, 9)),
+        'No support for certificate check in SSL')
     def test_nocheckcertificate(self):
         with FakeYDL({'logger': FakeLogger()}) as ydl:
             with self.assertRaises(compat_urllib_error.URLError):
diff --git a/test/test_utils.py b/test/test_utils.py
index 1fc16ed05..2ee727caf 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1617,7 +1617,7 @@ Line 1
         self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
                          [_TEST_DATA['urls']],
                          msg='function as query key should perform a filter based on (key, value)')
-        self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'},
+        self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), set(('str',)),
                               msg='exceptions in the query function should be catched')
         self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
                          msg='function key should accept iterables')
@@ -1643,7 +1643,7 @@ Line 1
             with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
                 traverse_obj(_TEST_DATA, set())
             with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
-                traverse_obj(_TEST_DATA, {str.upper, str})
+                traverse_obj(_TEST_DATA, set((str.upper, str)))
 
         # Test `slice` as a key
         _SLICE_DATA = [0, 1, 2, 3, 4]
@@ -1779,7 +1779,7 @@ Line 1
                          {0: 100}, msg='type as expected_type should filter dict values')
         self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
                          {0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
-        self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int),
+        self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, set((int_or_none,))), expected_type=int),
                          1, msg='expected_type should not filter non final dict values')
         self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
                          {0: {0: 100}}, msg='expected_type should transform deep dict values')
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 882432b80..86d902248 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -280,16 +280,16 @@ class JSInterpreter(object):
             # make Py 2.6 conform to its lying documentation
             if name == 'flags':
                 self.flags = self.__flags
+                return self.flags
             elif name == 'pattern':
                 self.pattern = self.__pattern_txt
+                return self.pattern
+            elif hasattr(self.__self, name):
+                v = getattr(self.__self, name)
+                setattr(self, name, v)
+                return v
             elif name in ('groupindex', 'groups'):
-                # in case these get set after a match?
-                if hasattr(self.__self, name):
-                    setattr(self, name, getattr(self.__self, name))
-                else:
-                    return 0 if name == 'groupindex' else {}
-            if hasattr(self, name):
-                return getattr(self, name)
+                return 0 if name == 'groupindex' else {}
             raise AttributeError('{0} has no attribute named {1}'.format(self, name))
 
         @classmethod
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index ac6c81465..494f8341b 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -6198,7 +6198,8 @@ def traverse_obj(obj, *paths, **kwargs):
         elif isinstance(obj, compat_re_Match):
             result = None
             if isinstance(key, int) or casesense:
-                result = lookup_or_none(obj, key, getter=compat_re_Match.group)
+                # Py 2.6 doesn't have methods in the Match class/type
+                result = lookup_or_none(obj, key, getter=lambda _, k: obj.group(k))
 
             elif isinstance(key, str):
                 result = next((v for k, v in obj.groupdict().items()
@@ -6246,7 +6247,10 @@ def traverse_obj(obj, *paths, **kwargs):
 
             if __debug__ and callable(key):
                 # Verify function signature
-                inspect.getcallargs(key, None, None)
+                args = inspect.getargspec(key)
+                if len(args.args) != 2:
+                    # crash differently in 2.6 !
+                    inspect.getcallargs(key, None, None)
 
             new_objs = []
             for obj in objs:

From 47214e46d852e9d7ddf81d69a8e70806e2396c6c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 17 Jul 2023 20:39:11 +0100
Subject: [PATCH 100/156] [compat] Fix old Pythons broken loading of valueless
 cookie attributes

Cookie string parsing in Py 2.6.9, probably earlier, requires `=`.
Also 3.2, though the CPython code appears to be OK: 3.1 was also wrong.
---
 youtube_dl/compat.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index cd11ba5aa..1d784d90f 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -126,12 +126,24 @@ except ImportError:  # Python 2
     import Cookie as compat_cookies
 compat_http_cookies = compat_cookies
 
-if sys.version_info[0] == 2:
+if sys.version_info[0] == 2 or sys.version_info < (3, 3):
     class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
         def load(self, rawdata):
-            if isinstance(rawdata, compat_str):
-                rawdata = str(rawdata)
-            return super(compat_cookies_SimpleCookie, self).load(rawdata)
+            must_have_value = 0
+            if not isinstance(rawdata, dict):
+                if sys.version_info[:2] != (2, 7):
+                    # attribute must have value for parsing
+                    rawdata, must_have_value = re.subn(
+                        r'(?i)(;\s*)(secure|httponly)(\s*(?:;|$))', r'\1\2=\2\3', rawdata)
+                if sys.version_info[0] == 2:
+                    if isinstance(rawdata, compat_str):
+                        rawdata = str(rawdata)
+            super(compat_cookies_SimpleCookie, self).load(rawdata)
+            if must_have_value > 0:
+                for morsel in self.values():
+                    for attr in ('secure', 'httponly'):
+                        if morsel.get(attr):
+                            morsel[attr] = True
 else:
     compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
 compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie

From 825a40744bf9aeb743452db24e43d3eb61feb6c2 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 3 May 2023 12:40:09 +0100
Subject: [PATCH 101/156] [utils] Align traverse_obj() with yt-dlp

Thanks Grub4k for these:
* traverse `Iterable`s, from https://github.com/yt-dlp/yt-dlp/pull/6902, etc
* traverse `set` key for transformations/filters, `re.Match` group names, from
  https://github.com/yt-dlp/yt-dlp/commit/776995bc109c5cd1aa56b684fada2ce718a386ec, etc
* traverse `re.Match`es, from https://github.com/yt-dlp/yt-dlp/pull/5174
* always return list when branching, from https://github.com/yt-dlp/yt-dlp/pull/5170
---
 test/test_utils.py  | 37 +++++++++++++++++++++----------------
 youtube_dl/utils.py |  9 ++-------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index 2ee727caf..1b5d170fe 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -20,7 +20,7 @@ import xml.etree.ElementTree
 from youtube_dl.utils import (
     age_restricted,
     args_to_str,
-    encode_base_n,
+    base_url,
     caesar,
     clean_html,
     clean_podcast_url,
@@ -29,10 +29,12 @@ from youtube_dl.utils import (
     detect_exe_version,
     determine_ext,
     dict_get,
+    encode_base_n,
     encode_compat_str,
     encodeFilename,
     escape_rfc3986,
     escape_url,
+    expand_path,
     extract_attributes,
     ExtractorError,
     find_xpath_attr,
@@ -51,6 +53,7 @@ from youtube_dl.utils import (
     js_to_json,
     LazyList,
     limit_length,
+    lowercase_escape,
     merge_dicts,
     mimetype2ext,
     month_by_name,
@@ -66,17 +69,16 @@ from youtube_dl.utils import (
     parse_resolution,
     parse_bitrate,
     pkcs1pad,
-    read_batch_urls,
-    sanitize_filename,
-    sanitize_path,
-    sanitize_url,
-    expand_path,
     prepend_extension,
-    replace_extension,
+    read_batch_urls,
     remove_start,
     remove_end,
     remove_quotes,
+    replace_extension,
     rot47,
+    sanitize_filename,
+    sanitize_path,
+    sanitize_url,
     shell_quote,
     smuggle_url,
     str_or_none,
@@ -93,10 +95,8 @@ from youtube_dl.utils import (
     unified_timestamp,
     unsmuggle_url,
     uppercase_escape,
-    lowercase_escape,
     url_basename,
     url_or_none,
-    base_url,
     urljoin,
     urlencode_postdata,
     urshift,
@@ -1586,6 +1586,11 @@ Line 1
             'dict': {},
         }
 
+        # define a pukka Iterable
+        def iter_range(stop):
+            for from_ in range(stop):
+                yield from_
+
         # Test base functionality
         self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
                          msg='allow tuple path')
@@ -1602,13 +1607,13 @@ Line 1
         # Test Ellipsis behavior
         self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
                               (item for item in _TEST_DATA.values() if item not in (None, {})),
-                              msg='`...` should give all non discarded values')
+                              msg='`...` should give all non-discarded values')
         self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
                               msg='`...` selection for dicts should select all values')
         self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
                          ['https://www.example.com/0', 'https://www.example.com/1'],
                          msg='nested `...` queries should work')
-        self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), range(4),
+        self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), iter_range(4),
                               msg='`...` query result should be flattened')
         self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)),
                          msg='`...` should accept iterables')
@@ -1618,7 +1623,7 @@ Line 1
                          [_TEST_DATA['urls']],
                          msg='function as query key should perform a filter based on (key, value)')
         self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), set(('str',)),
-                              msg='exceptions in the query function should be catched')
+                              msg='exceptions in the query function should be caught')
         self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
                          msg='function key should accept iterables')
         if __debug__:
@@ -1706,7 +1711,7 @@ Line 1
         self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
                          msg='remove empty values when dict key')
         self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis},
-                         msg='use `default` when dict key and `default`')
+                         msg='use `default` when dict key and a default')
         self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
                          msg='remove empty values when nested dict key fails')
         self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
@@ -1768,7 +1773,7 @@ Line 1
         self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
                          'str', msg='accept matching `expected_type` type')
         self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
-                         None, msg='reject non matching `expected_type` type')
+                         None, msg='reject non-matching `expected_type` type')
         self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
                          '0', msg='transform type using type function')
         self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
@@ -1780,7 +1785,7 @@ Line 1
         self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
                          {0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
         self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, set((int_or_none,))), expected_type=int),
-                         1, msg='expected_type should not filter non final dict values')
+                         1, msg='expected_type should not filter non-final dict values')
         self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
                          {0: {0: 100}}, msg='expected_type should transform deep dict values')
         self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)),
@@ -1838,7 +1843,7 @@ Line 1
         self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
                                       _traverse_string=True), 'sr',
                          msg='`slice` should result in string if `traverse_string`')
-        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"),
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == 's'),
                                       _traverse_string=True), 'str',
                          msg='function should result in string if `traverse_string`')
         self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 494f8341b..b77a7fb0e 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4268,13 +4268,8 @@ def variadic(x, allowed_types=NO_DEFAULT):
 
 
 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
-    if isinstance(key_or_keys, (list, tuple)):
-        for key in key_or_keys:
-            if key not in d or d[key] is None or skip_false_values and not d[key]:
-                continue
-            return d[key]
-        return default
-    return d.get(key_or_keys, default)
+    exp = (lambda x: x or None) if skip_false_values else IDENTITY
+    return traverse_obj(d, *variadic(key_or_keys), expected_type=exp, default=default)
 
 
 def try_call(*funcs, **kwargs):

From d9d07a95815a992bf5f876a62f25c831eb3f32ac Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 3 May 2023 12:06:34 +0100
Subject: [PATCH 102/156] [utils] Improve js_to_json, align with yt-dlp *
 support variable substitution, from
 https://github.com/yt-dlp/yt-dlp/pull/#521 etc,   thanks ChillingPepper,
 Grub4k, pukkandan * improve escape handling, from
 https://github.com/yt-dlp/yt-dlp/pull/#521   thanks Grub4k * support template
 strings from https://github.com/yt-dlp/yt-dlp/pull/6623   thanks Grub4k * add
 limited `!` evaluation (eg, !!0 -> false, see tests)

---
 test/test_utils.py  | 103 ++++++++++++++++++++++++++++++++++++++--
 youtube_dl/utils.py | 112 ++++++++++++++++++++++++++++++++++----------
 2 files changed, 186 insertions(+), 29 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index 1b5d170fe..e83977f29 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -905,6 +905,85 @@ class TestUtil(unittest.TestCase):
         )
         self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
 
+    def test_js_to_json_vars_strings(self):
+        self.assertDictEqual(
+            json.loads(js_to_json(
+                '''{
+                    'null': a,
+                    'nullStr': b,
+                    'true': c,
+                    'trueStr': d,
+                    'false': e,
+                    'falseStr': f,
+                    'unresolvedVar': g,
+                }''',
+                {
+                    'a': 'null',
+                    'b': '"null"',
+                    'c': 'true',
+                    'd': '"true"',
+                    'e': 'false',
+                    'f': '"false"',
+                    'g': 'var',
+                }
+            )),
+            {
+                'null': None,
+                'nullStr': 'null',
+                'true': True,
+                'trueStr': 'true',
+                'false': False,
+                'falseStr': 'false',
+                'unresolvedVar': 'var'
+            }
+        )
+
+        self.assertDictEqual(
+            json.loads(js_to_json(
+                '''{
+                    'int': a,
+                    'intStr': b,
+                    'float': c,
+                    'floatStr': d,
+                }''',
+                {
+                    'a': '123',
+                    'b': '"123"',
+                    'c': '1.23',
+                    'd': '"1.23"',
+                }
+            )),
+            {
+                'int': 123,
+                'intStr': '123',
+                'float': 1.23,
+                'floatStr': '1.23',
+            }
+        )
+
+        self.assertDictEqual(
+            json.loads(js_to_json(
+                '''{
+                    'object': a,
+                    'objectStr': b,
+                    'array': c,
+                    'arrayStr': d,
+                }''',
+                {
+                    'a': '{}',
+                    'b': '"{}"',
+                    'c': '[]',
+                    'd': '"[]"',
+                }
+            )),
+            {
+                'object': {},
+                'objectStr': '{}',
+                'array': [],
+                'arrayStr': '[]',
+            }
+        )
+
     def test_js_to_json_realworld(self):
         inp = '''{
             'clip':{'provider':'pseudo'}
@@ -975,10 +1054,10 @@ class TestUtil(unittest.TestCase):
             !42: 42
         }''')
         self.assertEqual(json.loads(on), {
-            'a': 0,
-            'b': 1,
-            'c': 0,
-            'd': 42.42,
+            'a': True,
+            'b': False,
+            'c': False,
+            'd': True,
             'e': [],
             'f': "abc",
             'g': "",
@@ -1048,10 +1127,26 @@ class TestUtil(unittest.TestCase):
         on = js_to_json('{ "040": "040" }')
         self.assertEqual(json.loads(on), {'040': '040'})
 
+        on = js_to_json('[1,//{},\n2]')
+        self.assertEqual(json.loads(on), [1, 2])
+
+        on = js_to_json(r'"\^\$\#"')
+        self.assertEqual(json.loads(on), R'^$#', msg='Unnecessary escapes should be stripped')
+
+        on = js_to_json('\'"\\""\'')
+        self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
+
     def test_js_to_json_malformed(self):
         self.assertEqual(js_to_json('42a1'), '42"a1"')
         self.assertEqual(js_to_json('42a-1'), '42"a"-1')
 
+    def test_js_to_json_template_literal(self):
+        self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"')
+        self.assertEqual(js_to_json('`${name}${name}`', {'name': '"X"'}), '"XX"')
+        self.assertEqual(js_to_json('`${name}${name}`', {'name': '5'}), '"55"')
+        self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
+        self.assertEqual(js_to_json('`${name}`', {}), '"name"')
+
     def test_extract_attributes(self):
         self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
         self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index b77a7fb0e..b05f65283 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4365,46 +4365,108 @@ def strip_jsonp(code):
         r'\g<callback_data>', code)
 
 
-def js_to_json(code):
-    COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
+def js_to_json(code, *args, **kwargs):
+
+    # vars is a dict of (var, val) pairs to substitute
+    vars = args[0] if len(args) > 0 else kwargs.get('vars', {})
+    strict = kwargs.get('strict', False)
+
+    STRING_QUOTES = '\'"`'
+    STRING_RE = '|'.join(r'{0}(?:\\.|[^\\{0}])*{0}'.format(q) for q in STRING_QUOTES)
+    COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
     INTEGER_TABLE = (
         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
+        (r'(?s)^(\d+){skip}:?$'.format(skip=SKIP_RE), 10),
     )
+    # compat candidate
+    JSONDecodeError = json.JSONDecodeError if 'JSONDecodeError' in dir(json) else ValueError
+
+    def process_escape(match):
+        JSON_PASSTHROUGH_ESCAPES = r'"\bfnrtu'
+        escape = match.group(1) or match.group(2)
+
+        return ('\\' + escape if escape in JSON_PASSTHROUGH_ESCAPES
+                else '\\u00' if escape == 'x'
+                else '' if escape == '\n'
+                else escape)
+
+    def template_substitute(match):
+        evaluated = js_to_json(match.group(1), vars, strict=strict)
+        if evaluated[0] == '"':
+            return json.loads(evaluated)
+        return evaluated
 
     def fix_kv(m):
         v = m.group(0)
         if v in ('true', 'false', 'null'):
             return v
-        elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
-            return ""
+        elif v in ('undefined', 'void 0'):
+            return 'null'
+        elif v.startswith('/*') or v.startswith('//') or v == ',':
+            return ''
 
-        if v[0] in ("'", '"'):
-            v = re.sub(r'(?s)\\.|"', lambda m: {
-                '"': '\\"',
-                "\\'": "'",
-                '\\\n': '',
-                '\\x': '\\u00',
-            }.get(m.group(0), m.group(0)), v[1:-1])
-        else:
-            for regex, base in INTEGER_TABLE:
-                im = re.match(regex, v)
-                if im:
-                    i = int(im.group(1), base)
-                    return '"%d":' % i if v.endswith(':') else '%d' % i
+        if v[0] in STRING_QUOTES:
+            v = re.sub(r'(?s)\${([^}]+)}', template_substitute, v[1:-1]) if v[0] == '`' else v[1:-1]
+            escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v)
+            return '"{0}"'.format(escaped)
 
-        return '"%s"' % v
+        inv = IDENTITY
+        im = re.split(r'^!+', v)
+        if len(im) > 1 and not im[-1].endswith(':'):
+            if (len(v) - len(im[1])) % 2 == 1:
+                inv = lambda x: 'true' if x == 0 else 'false'
+            else:
+                inv = lambda x: 'false' if x == 0 else 'true'
+        if not any(x for x in im):
+            return
+        v = im[-1]
+
+        for regex, base in INTEGER_TABLE:
+            im = re.match(regex, v)
+            if im:
+                i = int(im.group(1), base)
+                return ('"%s":' if v.endswith(':') else '%s') % inv(i)
+
+        if v in vars:
+            try:
+                if not strict:
+                    json.loads(vars[v])
+            except JSONDecodeError:
+                return inv(json.dumps(vars[v]))
+            else:
+                return inv(vars[v])
+
+        if not strict:
+            v = try_call(inv, args=(v,), default=v)
+            if v in ('true', 'false'):
+                return v
+            return '"{0}"'.format(v)
+
+        raise ValueError('Unknown value: ' + v)
+
+    def create_map(mobj):
+        return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
+
+    code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
+    if not strict:
+        code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
+        code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
+        code = re.sub(r'parseInt\([^\d]+(\d+)[^\d]+\)', r'\1', code)
+        code = re.sub(r'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^)]*["\'])\s*\)', r'\1', code)
 
     return re.sub(r'''(?sx)
-        "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
-        '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
-        {comment}|,(?={skip}[\]}}])|
-        (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
-        \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
-        [0-9]+(?={skip}:)|
+        {str_}|
+        {comment}|
+        ,(?={skip}[\]}}])|
+        void\s0|
+        !*(?:(?<!\d)[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
+        (?:\b|!+)0(?:[xX][\da-fA-F]+|[0-7]+)(?:{skip}:)?|
+        !+\d+(?:\.\d*)?(?:{skip}:)?|
+        [0-9]+(?:{skip}:)|
         !+
-        '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
+        '''.format(comment=COMMENT_RE, skip=SKIP_RE, str_=STRING_RE), fix_kv, code)
 
 
 def qualities(quality_ids):

From cb9366eda584fde2421140adf994eadc5bb6b943 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 3 May 2023 10:54:52 +0100
Subject: [PATCH 103/156] [utils] Minor updates (merge_dicts, T)

A couple of mods to ease yt-dlp back-ports:
* add kwargs to merge_dicts:
  `unblank=True` (disallow empty string), `rev=False` (reverse the merge list)
* add `T(x)` shortcut for `{x}`, unsupported in Py2.6
---
 youtube_dl/utils.py | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index b05f65283..0cbbec0f3 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4269,7 +4269,8 @@ def variadic(x, allowed_types=NO_DEFAULT):
 
 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
     exp = (lambda x: x or None) if skip_false_values else IDENTITY
-    return traverse_obj(d, *variadic(key_or_keys), expected_type=exp, default=default)
+    return traverse_obj(d, *variadic(key_or_keys), expected_type=exp,
+                        default=default, get_all=False)
 
 
 def try_call(*funcs, **kwargs):
@@ -4302,16 +4303,38 @@ def try_get(src, getter, expected_type=None):
                 return v
 
 
-def merge_dicts(*dicts):
+def merge_dicts(*dicts, **kwargs):
+    """
+        Merge the `dict`s in `dicts` using the first valid value for each key.
+        Normally valid: not None and not an empty string
+
+        Keyword-only args:
+        unblank:    allow empty string if False (default True)
+        rev:        merge dicts in reverse order (default False)
+
+        merge_dicts(dct1, dct2, ..., unblank=False, rev=True)
+        matches {**dct1, **dct2, ...}
+
+        However, merge_dicts(dct1, dct2, ..., rev=True) may often be better.
+    """
+
+    unblank = kwargs.get('unblank', True)
+    rev = kwargs.get('rev', False)
+
+    if unblank:
+        def can_merge_str(k, v, to_dict):
+            return (isinstance(v, compat_str) and v
+                    and isinstance(to_dict[k], compat_str)
+                    and not to_dict[k])
+    else:
+        can_merge_str = lambda k, v, to_dict: False
+
     merged = {}
-    for a_dict in dicts:
+    for a_dict in reversed(dicts) if rev else dicts:
         for k, v in a_dict.items():
             if v is None:
                 continue
-            if (k not in merged
-                    or (isinstance(v, compat_str) and v
-                        and isinstance(merged[k], compat_str)
-                        and not merged[k])):
+            if (k not in merged) or can_merge_str(k, v, merged):
                 merged[k] = v
     return merged
 

From 1e8ccdd2eb77901e18feb8a9d48e62d11651cd1e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 3 May 2023 13:08:58 +0100
Subject: [PATCH 104/156] [InfoExtractor] Support groups in _`search_regex()`,
 etc

---
 youtube_dl/extractor/common.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 7244e5df6..dbdf456f5 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1005,6 +1005,8 @@ class InfoExtractor(object):
             if group is None:
                 # return the first matching group
                 return next(g for g in mobj.groups() if g is not None)
+            elif isinstance(group, (list, tuple)):
+                return tuple(mobj.group(g) for g in group)
             else:
                 return mobj.group(group)
         elif default is not NO_DEFAULT:
@@ -1020,10 +1022,9 @@ class InfoExtractor(object):
         Like _search_regex, but strips HTML tags and unescapes entities.
         """
         res = self._search_regex(pattern, string, name, default, fatal, flags, group)
-        if res:
-            return clean_html(res).strip()
-        else:
-            return res
+        if isinstance(res, tuple):
+            return tuple(map(clean_html, res))
+        return clean_html(res)
 
     def _get_netrc_login_info(self, netrc_machine=None):
         username = None

From 4566e6e53ebd87c6c548a8414ab5bd742c14c2b0 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 3 May 2023 10:02:25 +0100
Subject: [PATCH 105/156] [GlobalPlayer] Add site extractors back-ported from
 yt-dlp

* from https://github.com/yt-dlp/yt-dlp/pull/6903, thanks garret1317
---
 youtube_dl/extractor/extractors.py   |  15 +-
 youtube_dl/extractor/globalplayer.py | 285 +++++++++++++++++++++++++++
 2 files changed, 296 insertions(+), 4 deletions(-)
 create mode 100644 youtube_dl/extractor/globalplayer.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 3a87f9e33..811a2605a 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -444,6 +444,13 @@ from .gfycat import GfycatIE
 from .giantbomb import GiantBombIE
 from .giga import GigaIE
 from .glide import GlideIE
+from .globalplayer import (
+    GlobalPlayerLiveIE,
+    GlobalPlayerLivePlaylistIE,
+    GlobalPlayerAudioIE,
+    GlobalPlayerAudioEpisodeIE,
+    GlobalPlayerVideoIE
+)
 from .globo import (
     GloboIE,
     GloboArticleIE,
@@ -975,6 +982,10 @@ from .pornhub import (
 from .pornotube import PornotubeIE
 from .pornovoisines import PornoVoisinesIE
 from .pornoxo import PornoXOIE
+from .pr0gramm import (
+    Pr0grammIE,
+    Pr0grammStaticIE,
+)
 from .puhutv import (
     PuhuTVIE,
     PuhuTVSerieIE,
@@ -1678,7 +1689,3 @@ from .zingmp3 import (
 )
 from .zoom import ZoomIE
 from .zype import ZypeIE
-from .pr0gramm import (
-    Pr0grammIE,
-    Pr0grammStaticIE,
-)
diff --git a/youtube_dl/extractor/globalplayer.py b/youtube_dl/extractor/globalplayer.py
new file mode 100644
index 000000000..cceab9e6a
--- /dev/null
+++ b/youtube_dl/extractor/globalplayer.py
@@ -0,0 +1,285 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    join_nonempty,
+    merge_dicts,
+    parse_duration,
+    str_or_none,
+    T,
+    traverse_obj,
+    unified_strdate,
+    unified_timestamp,
+    urlhandle_detect_ext,
+)
+
+
+class GlobalPlayerBaseIE(InfoExtractor):
+
+    import re
+
+    @classmethod
+    def _match_valid_url(cls, url):
+        return cls.re.match(cls._VALID_URL, url)
+
+    def _search_nextjs_data(self, webpage, video_id, **kw):
+        return self._parse_json(
+            self._search_regex(
+                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
+                webpage, 'next.js data', **kw),
+            video_id, **kw)
+
+    def _get_page_props(self, url, video_id):
+        webpage = self._download_webpage(url, video_id)
+        return self._search_nextjs_data(webpage, video_id)['props']['pageProps']
+
+    def _request_ext(self, url, video_id):
+        return urlhandle_detect_ext(self._request_webpage(  # Server rejects HEAD requests
+            url, video_id, note='Determining source extension'))
+
+    def _extract_audio(self, episode, series):
+
+        def clean_desc(x):
+            x = clean_html(x)
+            if x:
+                x = x.replace('\xa0', ' ')
+            return x
+
+        return merge_dicts({
+            'vcodec': 'none',
+        }, traverse_obj(series, {
+            'series': 'title',
+            'series_id': 'id',
+            'thumbnail': 'imageUrl',
+            'uploader': 'itunesAuthor',  # podcasts only
+        }), traverse_obj(episode, {
+            'id': 'id',
+            'description': ('description', T(clean_desc)),
+            'duration': ('duration', T(parse_duration)),
+            'thumbnail': 'imageUrl',
+            'url': 'streamUrl',
+            'timestamp': (('pubDate', 'startDate'), T(unified_timestamp)),
+            'title': 'title',
+        }, get_all=False), rev=True)
+
+
+class GlobalPlayerLiveIE(GlobalPlayerBaseIE):
+    _VALID_URL = r'https?://www\.globalplayer\.com/live/(?P<id>\w+)/\w+'
+    _TESTS = [{
+        'url': 'https://www.globalplayer.com/live/smoothchill/uk/',
+        'info_dict': {
+            'id': '2mx1E',
+            'ext': 'aac',
+            'display_id': 'smoothchill-uk',
+            'title': 're:^Smooth Chill.+$',
+            'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png',
+            'description': 'Music To Chill To',
+            # 'live_status': 'is_live',
+            'is_live': True,
+        },
+    }, {
+        # national station
+        'url': 'https://www.globalplayer.com/live/heart/uk/',
+        'info_dict': {
+            'id': '2mwx4',
+            'ext': 'aac',
+            'description': 'turn up the feel good!',
+            'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
+            # 'live_status': 'is_live',
+            'is_live': True,
+            'title': 're:^Heart UK.+$',
+            'display_id': 'heart-uk',
+        },
+    }, {
+        # regional variation
+        'url': 'https://www.globalplayer.com/live/heart/london/',
+        'info_dict': {
+            'id': 'AMqg',
+            'ext': 'aac',
+            'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
+            'title': 're:^Heart London.+$',
+            # 'live_status': 'is_live',
+            'is_live': True,
+            'display_id': 'heart-london',
+            'description': 'turn up the feel good!',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        station = self._get_page_props(url, video_id)['station']
+        stream_url = station['streamUrl']
+
+        return merge_dicts({
+            'id': station['id'],
+            'display_id': (
+                join_nonempty('brandSlug', 'slug', from_dict=station)
+                or station.get('legacyStationPrefix')),
+            'url': stream_url,
+            'ext': self._request_ext(stream_url, video_id),
+            'vcodec': 'none',
+            'is_live': True,
+        }, {
+            'title': self._live_title(traverse_obj(
+                station, (('name', 'brandName'), T(str_or_none)),
+                get_all=False)),
+        }, traverse_obj(station, {
+            'description': 'tagline',
+            'thumbnail': 'brandLogo',
+        }), rev=True)
+
+
+class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
+    _VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P<id>\w+)'
+    _TESTS = [{
+        # "live playlist"
+        'url': 'https://www.globalplayer.com/playlists/8bLk/',
+        'info_dict': {
+            'id': '8bLk',
+            'ext': 'aac',
+            # 'live_status': 'is_live',
+            'is_live': True,
+            'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d',
+            'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=',
+            'title': 're:^Classic FM Hall of Fame.+$'
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        station = self._get_page_props(url, video_id)['playlistData']
+        stream_url = station['streamUrl']
+
+        return merge_dicts({
+            'id': video_id,
+            'url': stream_url,
+            'ext': self._request_ext(stream_url, video_id),
+            'vcodec': 'none',
+            'is_live': True,
+        }, traverse_obj(station, {
+            'title': 'title',
+            'description': 'description',
+            'thumbnail': 'image',
+        }), rev=True)
+
+
+class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
+    _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)/|catchup/\w+/\w+/)(?P<id>\w+)/?(?:$|[?#])'
+    _TESTS = [{
+        # podcast
+        'url': 'https://www.globalplayer.com/podcasts/42KuaM/',
+        'playlist_mincount': 5,
+        'info_dict': {
+            'id': '42KuaM',
+            'title': 'Filthy Ritual',
+            'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
+            'categories': ['Society & Culture', 'True Crime'],
+            'uploader': 'Global',
+            'description': 'md5:da5b918eac9ae319454a10a563afacf9',
+        },
+    }, {
+        # radio catchup
+        'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/',
+        'playlist_mincount': 2,
+        'info_dict': {
+            'id': '46vyD7z',
+            'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
+            'title': 'Nick Ferrari',
+            'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
+        props = self._get_page_props(url, video_id)
+        series = props['podcastInfo'] if podcast else props['catchupInfo']
+
+        return merge_dicts({
+            '_type': 'playlist',
+            'id': video_id,
+            'entries': [self._extract_audio(ep, series) for ep in traverse_obj(
+                        series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))],
+            'categories': traverse_obj(series, ('categories', Ellipsis, 'name')) or None,
+        }, traverse_obj(series, {
+            'description': 'description',
+            'thumbnail': 'imageUrl',
+            'title': 'title',
+            'uploader': 'itunesAuthor',  # podcasts only
+        }), rev=True)
+
+
+class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE):
+    _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)|catchup/\w+/\w+)/episodes/(?P<id>\w+)/?(?:$|[?#])'
+    _TESTS = [{
+        # podcast
+        'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/',
+        'info_dict': {
+            'id': '7DrfNnE',
+            'ext': 'mp3',
+            'title': 'Filthy Ritual - Trailer',
+            'description': 'md5:1f1562fd0f01b4773b590984f94223e0',
+            'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
+            'duration': 225.0,
+            'timestamp': 1681254900,
+            'series': 'Filthy Ritual',
+            'series_id': '42KuaM',
+            'upload_date': '20230411',
+            'uploader': 'Global',
+        },
+    }, {
+        # radio catchup
+        'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/',
+        'only_matching': True,
+        # expired: refresh the details with a current show for a full test
+        'info_dict': {
+            'id': '2zGq26Vcv1fCWhddC4JAwETXWe',
+            'ext': 'm4a',
+            'timestamp': 1682056800,
+            'series': 'Nick Ferrari',
+            'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
+            'upload_date': '20230421',
+            'series_id': '46vyD7z',
+            'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
+            'title': 'Nick Ferrari',
+            'duration': 10800.0,
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
+        props = self._get_page_props(url, video_id)
+        episode = props['podcastEpisode'] if podcast else props['catchupEpisode']
+
+        return self._extract_audio(
+            episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {})
+
+
+class GlobalPlayerVideoIE(GlobalPlayerBaseIE):
+    _VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P<id>\w+)'
+    _TESTS = [{
+        'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/',
+        'info_dict': {
+            'id': '2JsSZ7Gm2uP',
+            'ext': 'mp4',
+            'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd',
+            'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550',
+            'upload_date': '20230420',
+            'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        meta = self._get_page_props(url, video_id)['videoData']
+
+        return merge_dicts({
+            'id': video_id,
+        }, traverse_obj(meta, {
+            'url': 'url',
+            'thumbnail': ('image', 'url'),
+            'title': 'title',
+            'upload_date': ('publish_date', T(unified_strdate)),
+            'description': 'description',
+        }), rev=True)

From eaaf4c6736b98e20a1923162ae05952c8cb51ee1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 3 May 2023 10:04:46 +0100
Subject: [PATCH 106/156] [Whyp] Add extractor back-ported from yt-dlp

* from https://github.com/yt-dlp/yt-dlp/pull/6803, thanks CoryTibbettsDev
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/whyp.py       | 78 ++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)
 create mode 100644 youtube_dl/extractor/whyp.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 811a2605a..9f247dbbf 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1576,6 +1576,7 @@ from .weibo import (
     WeiboMobileIE
 )
 from .weiqitv import WeiqiTVIE
+from .whyp import WhypIE
 from .wistia import (
     WistiaIE,
     WistiaPlaylistIE,
diff --git a/youtube_dl/extractor/whyp.py b/youtube_dl/extractor/whyp.py
new file mode 100644
index 000000000..16f9154ad
--- /dev/null
+++ b/youtube_dl/extractor/whyp.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    merge_dicts,
+    str_or_none,
+    T,
+    traverse_obj,
+    url_or_none,
+)
+
+
+class WhypIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?whyp\.it/tracks/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.whyp.it/tracks/18337/home-page-example-track-b4kq7',
+        'md5': 'c1187b42ebf8605284e3dc92aeb33d16',
+        'info_dict': {
+            'url': 'https://cdn.whyp.it/50eb17cc-e9ff-4e18-b89b-dc9206a95cb1.mp3',
+            'id': '18337',
+            'title': 'Home Page Example Track',
+            'description': 'md5:bd758000fb93f3159339c852b5b9133c',
+            'ext': 'mp3',
+            'duration': 52.82,
+            'uploader': 'Brad',
+            'uploader_id': '1',
+            'thumbnail': 'https://cdn.whyp.it/a537bb36-3373-4c61-96c8-27fc1b2f427a.jpg',
+        },
+    }, {
+        'url': 'https://www.whyp.it/tracks/18337',
+        'only_matching': True,
+    }]
+
+    def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', fatal=True, traverse=('data', 0)):
+        """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
+
+        import functools
+        import json
+        import re
+        from ..utils import (js_to_json, NO_DEFAULT)
+
+        re_ctx = re.escape(context_name)
+        FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
+        js, arg_keys, arg_vals = self._search_regex(
+            (p.format(re_ctx, FUNCTION_RE) for p in (r'<script>\s*window\.{0}={1}\s*\)\s*;?\s*</script>', r'{0}\(.*?{1}')),
+            webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
+            default=NO_DEFAULT if fatal else (None, None, None))
+        if js is None:
+            return {}
+
+        args = dict(zip(arg_keys.split(','), map(json.dumps, self._parse_json(
+            '[{0}]'.format(arg_vals), video_id, transform_source=js_to_json, fatal=fatal) or ())))
+
+        ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
+        return traverse_obj(ret, traverse) or {}
+
+    def _real_extract(self, url):
+        unique_id = self._match_id(url)
+        webpage = self._download_webpage(url, unique_id)
+        data = self._search_nuxt_data(webpage, unique_id)['rawTrack']
+
+        return merge_dicts({
+            'url': data['audio_url'],
+            'id': unique_id,
+        }, traverse_obj(data, {
+            'title': 'title',
+            'description': 'description',
+            'duration': ('duration', T(float_or_none)),
+            'uploader': ('user', 'username'),
+            'uploader_id': ('user', 'id', T(str_or_none)),
+            'thumbnail': ('artwork_url', T(url_or_none)),
+        }), {
+            'ext': 'mp3',
+            'vcodec': 'none',
+            'http_headers': {'Referer': 'https://whyp.it/'},
+        }, rev=True)

From 4339910df3fe97a054069cb98da594dd1b50c13a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 3 May 2023 10:07:35 +0100
Subject: [PATCH 107/156] [DLF] Add site extractors back-ported from yt-dlp

* from https://github.com/yt-dlp/yt-dlp/pull/6697, thanks nick-cd
---
 youtube_dl/extractor/dlf.py        | 204 +++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |   4 +
 2 files changed, 208 insertions(+)
 create mode 100644 youtube_dl/extractor/dlf.py

diff --git a/youtube_dl/extractor/dlf.py b/youtube_dl/extractor/dlf.py
new file mode 100644
index 000000000..cc3de4582
--- /dev/null
+++ b/youtube_dl/extractor/dlf.py
@@ -0,0 +1,204 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+)
+from ..utils import (
+    determine_ext,
+    extract_attributes,
+    int_or_none,
+    merge_dicts,
+    traverse_obj,
+    url_or_none,
+    variadic,
+)
+
+
+class DLFBaseIE(InfoExtractor):
+    _VALID_URL_BASE = r'https?://(?:www\.)?deutschlandfunk\.de/'
+    _BUTTON_REGEX = r'(<button[^>]+alt="Anhören"[^>]+data-audio-diraid[^>]*>)'
+
+    def _parse_button_attrs(self, button, audio_id=None):
+        attrs = extract_attributes(button)
+        audio_id = audio_id or attrs['data-audio-diraid']
+
+        url = traverse_obj(
+            attrs, 'data-audio-download-src', 'data-audio', 'data-audioreference',
+            'data-audio-src', expected_type=url_or_none)
+        ext = determine_ext(url)
+        formats = (self._extract_m3u8_formats(url, audio_id, fatal=False)
+                   if ext == 'm3u8' else [{'url': url, 'ext': ext, 'vcodec': 'none'}])
+        self._sort_formats(formats)
+
+        def traverse_attrs(path):
+            path = list(variadic(path))
+            t = path.pop() if callable(path[-1]) else None
+            return traverse_obj(attrs, path, expected_type=t, get_all=False)
+
+        def txt_or_none(v, default=None):
+            return default if v is None else (compat_str(v).strip() or default)
+
+        return merge_dicts(*reversed([{
+            'id': audio_id,
+            # 'extractor_key': DLFIE.ie_key(),
+            # 'extractor': DLFIE.IE_NAME,
+            'formats': formats,
+        }, dict((k, traverse_attrs(v)) for k, v in {
+            'title': (('data-audiotitle', 'data-audio-title', 'data-audio-download-tracking-title'), txt_or_none),
+            'duration': (('data-audioduration', 'data-audio-duration'), int_or_none),
+            'thumbnail': ('data-audioimage', url_or_none),
+            'uploader': 'data-audio-producer',
+            'series': 'data-audio-series',
+            'channel': 'data-audio-origin-site-name',
+            'webpage_url': ('data-audio-download-tracking-path', url_or_none),
+        }.items())]))
+
+
+class DLFIE(DLFBaseIE):
+    IE_NAME = 'dlf'
+    _VALID_URL = DLFBaseIE._VALID_URL_BASE + r'[\w-]+-dlf-(?P<id>[\da-f]{8})-100\.html'
+    _TESTS = [
+        # Audio as an HLS stream
+        {
+            'url': 'https://www.deutschlandfunk.de/tanz-der-saiteninstrumente-das-wild-strings-trio-aus-slowenien-dlf-03a3eb19-100.html',
+            'info_dict': {
+                'id': '03a3eb19',
+                'title': r're:Tanz der Saiteninstrumente [-/] Das Wild Strings Trio aus Slowenien',
+                'ext': 'm4a',
+                'duration': 3298,
+                'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
+                'uploader': 'Deutschlandfunk',
+                'series': 'On Stage',
+                'channel': 'deutschlandfunk'
+            },
+            'params': {
+                'skip_download': 'm3u8'
+            },
+            'skip': 'This webpage no longer exists'
+        }, {
+            'url': 'https://www.deutschlandfunk.de/russische-athleten-kehren-zurueck-auf-die-sportbuehne-ein-gefaehrlicher-tueroeffner-dlf-d9cc1856-100.html',
+            'info_dict': {
+                'id': 'd9cc1856',
+                'title': 'Russische Athleten kehren zurück auf die Sportbühne: Ein gefährlicher Türöffner',
+                'ext': 'mp3',
+                'duration': 291,
+                'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
+                'uploader': 'Deutschlandfunk',
+                'series': 'Kommentare und Themen der Woche',
+                'channel': 'deutschlandfunk'
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        audio_id = self._match_id(url)
+        webpage = self._download_webpage(url, audio_id)
+
+        return self._parse_button_attrs(
+            self._search_regex(self._BUTTON_REGEX, webpage, 'button'), audio_id)
+
+
+class DLFCorpusIE(DLFBaseIE):
+    IE_NAME = 'dlf:corpus'
+    IE_DESC = 'DLF Multi-feed Archives'
+    _VALID_URL = DLFBaseIE._VALID_URL_BASE + r'(?P<id>(?![\w-]+-dlf-[\da-f]{8})[\w-]+-\d+)\.html'
+    _TESTS = [
+        # Recorded news broadcast with referrals to related broadcasts
+        {
+            'url': 'https://www.deutschlandfunk.de/fechten-russland-belarus-ukraine-protest-100.html',
+            'info_dict': {
+                'id': 'fechten-russland-belarus-ukraine-protest-100',
+                'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
+                'description': 'md5:91340aab29c71aa7518ad5be13d1e8ad'
+            },
+            'playlist_mincount': 5,
+            'playlist': [{
+                'info_dict': {
+                    'id': '1fc5d64a',
+                    'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
+                    'ext': 'mp3',
+                    'duration': 252,
+                    'thumbnail': 'https://assets.deutschlandfunk.de/aad16241-6b76-4a09-958b-96d0ee1d6f57/512x512.jpg?t=1679480020313',
+                    'uploader': 'Deutschlandfunk',
+                    'series': 'Sport',
+                    'channel': 'deutschlandfunk'
+                }
+            }, {
+                'info_dict': {
+                    'id': '2ada145f',
+                    'title': r're:(?:Sportpolitik / )?Fechtverband votiert für Rückkehr russischer Athleten',
+                    'ext': 'mp3',
+                    'duration': 336,
+                    'thumbnail': 'https://assets.deutschlandfunk.de/FILE_93982766f7317df30409b8a184ac044a/512x512.jpg?t=1678547581005',
+                    'uploader': 'Deutschlandfunk',
+                    'series': 'Deutschlandfunk Nova',
+                    'channel': 'deutschlandfunk-nova'
+                }
+            }, {
+                'info_dict': {
+                    'id': '5e55e8c9',
+                    'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
+                    'ext': 'mp3',
+                    'duration': 187,
+                    'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
+                    'uploader': 'Deutschlandfunk',
+                    'series': 'Sport am Samstag',
+                    'channel': 'deutschlandfunk'
+                }
+            }, {
+                'info_dict': {
+                    'id': '47e1a096',
+                    'title': r're:Rückkehr Russlands im Fechten [-/] "Fassungslos, dass es einfach so passiert ist"',
+                    'ext': 'mp3',
+                    'duration': 602,
+                    'thumbnail': 'https://assets.deutschlandfunk.de/da4c494a-21cc-48b4-9cc7-40e09fd442c2/512x512.jpg?t=1678562155770',
+                    'uploader': 'Deutschlandfunk',
+                    'series': 'Sport am Samstag',
+                    'channel': 'deutschlandfunk'
+                }
+            }, {
+                'info_dict': {
+                    'id': '5e55e8c9',
+                    'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
+                    'ext': 'mp3',
+                    'duration': 187,
+                    'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
+                    'uploader': 'Deutschlandfunk',
+                    'series': 'Sport am Samstag',
+                    'channel': 'deutschlandfunk'
+                }
+            }]
+        },
+        # Podcast feed with tag buttons, playlist count fluctuates
+        {
+            'url': 'https://www.deutschlandfunk.de/kommentare-und-themen-der-woche-100.html',
+            'info_dict': {
+                'id': 'kommentare-und-themen-der-woche-100',
+                'title': 'Meinung - Kommentare und Themen der Woche',
+                'description': 'md5:2901bbd65cd2d45e116d399a099ce5d5',
+            },
+            'playlist_mincount': 10,
+        },
+        # Podcast feed with no description
+        {
+            'url': 'https://www.deutschlandfunk.de/podcast-tolle-idee-100.html',
+            'info_dict': {
+                'id': 'podcast-tolle-idee-100',
+                'title': 'Wissenschaftspodcast - Tolle Idee! - Was wurde daraus?',
+            },
+            'playlist_mincount': 11,
+        },
+    ]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+
+        return self.playlist_result(
+            map(self._parse_button_attrs, re.findall(self._BUTTON_REGEX, webpage)),
+            playlist_id, self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
+            self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage, default=None))
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 9f247dbbf..be73c0665 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -295,6 +295,10 @@ from .dbtv import DBTVIE
 from .dctp import DctpTvIE
 from .deezer import DeezerPlaylistIE
 from .democracynow import DemocracynowIE
+from .dlf import (
+    DLFCorpusIE,
+    DLFIE,
+)
 from .dfb import DFBIE
 from .dhm import DHMIE
 from .digg import DiggIE

From 846522204104e3078c597fa1872465024a684ad6 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 4 May 2023 00:08:26 +0100
Subject: [PATCH 108/156] [Clipchamp] Add new extractor back-ported from yt-dlp

---
 youtube_dl/extractor/clipchamp.py  | 76 ++++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |  1 +
 2 files changed, 77 insertions(+)
 create mode 100644 youtube_dl/extractor/clipchamp.py

diff --git a/youtube_dl/extractor/clipchamp.py b/youtube_dl/extractor/clipchamp.py
new file mode 100644
index 000000000..5a732e808
--- /dev/null
+++ b/youtube_dl/extractor/clipchamp.py
@@ -0,0 +1,76 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    merge_dicts,
+    T,
+    traverse_obj,
+    unified_timestamp,
+    url_or_none,
+)
+
+
+class ClipchampIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU',
+        'info_dict': {
+            'id': 'gRXZ4ZhdDaU',
+            'ext': 'mp4',
+            'title': 'Untitled video',
+            'uploader': 'Alexander Schwartz',
+            'timestamp': 1680805580,
+            'upload_date': '20230406',
+            'thumbnail': r're:^https?://.+\.jpg',
+        },
+        'params': {
+            'skip_download': 'm3u8',
+            'format': 'bestvideo',
+        },
+    }]
+
+    _STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
+    _STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
+
+    def _search_nextjs_data(self, webpage, video_id, **kw):
+        return self._parse_json(
+            self._search_regex(
+                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
+                webpage, 'next.js data', **kw),
+            video_id, **kw)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
+
+        storage_location = data.get('storage_location')
+        if storage_location != 'cf_stream':
+            raise ExtractorError('Unsupported clip storage location "%s"' % (storage_location,))
+
+        path = data['download_url']
+        iframe = self._download_webpage(
+            'https://iframe.cloudflarestream.com/' + path, video_id, 'Downloading player iframe')
+        subdomain = self._search_regex(
+            r'''\bcustomer-domain-prefix\s*=\s*("|')(?P<sd>[\w-]+)\1''', iframe,
+            'subdomain', group='sd', fatal=False) or 'customer-2ut9yn3y6fta1yxe'
+
+        formats = self._extract_mpd_formats(
+            self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id,
+            query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash')
+        formats.extend(self._extract_m3u8_formats(
+            self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4',
+            query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls'))
+
+        return merge_dicts({
+            'id': video_id,
+            'formats': formats,
+            'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), T(compat_str)))) or None,
+        }, traverse_obj(data, {
+            'title': ('project', 'project_name', T(compat_str)),
+            'timestamp': ('created_at', T(unified_timestamp)),
+            'thumbnail': ('thumbnail_url', T(url_or_none)),
+        }), rev=True)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index be73c0665..42b009ef5 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -226,6 +226,7 @@ from .ciscolive import (
     CiscoLiveSearchIE,
 )
 from .cjsw import CJSWIE
+from .clipchamp import ClipchampIE
 from .cliphunter import CliphunterIE
 from .clippit import ClippitIE
 from .cliprs import ClipRsIE

From b2741f2654e6ddfebc1771b5d5fadb5fd6fe3863 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 5 May 2023 19:25:42 +0100
Subject: [PATCH 109/156] [InfoExtractor] Add search methods for Next/Nuxt.js
 from yt-dlp * add _search_nextjs_data(), from
 https://github.com/yt-dlp/yt-dlp/pull/1386   thanks selfisekai * add
 _search_nuxt_data(), from https://github.com/yt-dlp/yt-dlp/pull/1921,  
 thanks Lesmiscore, pukkandan * add tests for the above * also fix HTML5 type
 recognition and tests, from  
 https://github.com/yt-dlp/yt-dlp/commit/222a230871fe4fe63f35c49590379c9a77116819,
   thanks Lesmiscore * update extractors in PR using above, fix tests.

---
 test/test_InfoExtractor.py           | 111 +++++++++++++++++++++++++--
 youtube_dl/extractor/clipchamp.py    |   7 --
 youtube_dl/extractor/common.py       |  51 +++++++++++-
 youtube_dl/extractor/globalplayer.py |  32 ++++----
 youtube_dl/extractor/whyp.py         |  25 +-----
 5 files changed, 168 insertions(+), 58 deletions(-)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 6d25441db..34773fbd0 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -7,15 +7,33 @@ import io
 import os
 import sys
 import unittest
+
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
-from youtube_dl.compat import compat_etree_fromstring, compat_http_server
-from youtube_dl.extractor.common import InfoExtractor
-from youtube_dl.extractor import YoutubeIE, get_info_extractor
-from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
 import threading
 
+from test.helper import (
+    expect_dict,
+    expect_value,
+    FakeYDL,
+    http_server_port,
+)
+from youtube_dl.compat import (
+    compat_etree_fromstring,
+    compat_http_server,
+)
+from youtube_dl.extractor.common import InfoExtractor
+from youtube_dl.extractor import (
+    get_info_extractor,
+    YoutubeIE,
+)
+from youtube_dl.utils import (
+    encode_data_uri,
+    ExtractorError,
+    RegexNotFoundError,
+    strip_jsonp,
+)
+
 
 TEAPOT_RESPONSE_STATUS = 418
 TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
@@ -100,6 +118,71 @@ class TestInfoExtractor(unittest.TestCase):
         self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
         self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
 
+    def test_search_nextjs_data(self):
+        html = '''
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv="content-type" content=
+  "text/html; charset=utf-8">
+  <meta name="viewport" content="width=device-width">
+  <title>Test _search_nextjs_data()</title>
+</head>
+<body>
+  <div id="__next">
+    <div style="background-color:#17171E" class="FU" dir="ltr">
+      <div class="sc-93de261d-0 dyzzYE">
+        <div>
+          <header class="HD"></header>
+          <main class="MN">
+            <div style="height:0" class="HT0">
+              <div style="width:NaN%" data-testid=
+              "stream-container" class="WDN"></div>
+            </div>
+          </main>
+        </div>
+        <footer class="sc-6e5faf91-0 dEGaHS"></footer>
+      </div>
+    </div>
+  </div>
+  <script id="__NEXT_DATA__" type="application/json">
+  {"props":{"pageProps":{"video":{"id":"testid"}}}}
+  </script>
+</body>
+</html>
+'''
+        search = self.ie._search_nextjs_data(html, 'testID')
+        self.assertEqual(search['props']['pageProps']['video']['id'], 'testid')
+
+    def test_search_nuxt_data(self):
+        html = '''
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv="content-type" content=
+  "text/html; charset=utf-8">
+  <title>Nuxt.js Test Page</title>
+  <meta name="viewport" content=
+  "width=device-width, initial-scale=1">
+  <meta data-hid="robots" name="robots" content="all">
+</head>
+<body class="BD">
+  <div id="__layout">
+    <h1 class="H1">Example heading</h1>
+    <div class="IN">
+      <p>Decoy text</p>
+    </div>
+  </div>
+  <script>
+  window.__NUXT__=(function(a,b,c,d,e,f,g,h){return {decoy:" default",data:[{track:{id:f,title:g}}]}}(null,null,"c",null,null,"testid","Nuxt.js title",null));
+  </script>
+  <script src="/_nuxt/a12345b.js" defer="defer"></script>
+</body>
+</html>
+'''
+        search = self.ie._search_nuxt_data(html, 'testID')
+        self.assertEqual(search['track']['id'], 'testid')
+
     def test_search_json_ld_realworld(self):
         # https://github.com/ytdl-org/youtube-dl/issues/23306
         expect_dict(
@@ -348,6 +431,24 @@ class TestInfoExtractor(unittest.TestCase):
                 }],
             })
 
+        # from https://0000.studio/
+        # with type attribute but without extension in URL
+        expect_dict(
+            self,
+            self.ie._parse_html5_media_entries(
+                'https://0000.studio',
+                r'''
+                <video src="https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92"
+                    controls="controls" type="video/mp4" preload="metadata" autoplay="autoplay" playsinline class="object-contain">
+                </video>
+                ''', None)[0],
+            {
+                'formats': [{
+                    'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92',
+                    'ext': 'mp4',
+                }],
+            })
+
     def test_extract_jwplayer_data_realworld(self):
         # from http://www.suffolk.edu/sjc/
         expect_dict(
diff --git a/youtube_dl/extractor/clipchamp.py b/youtube_dl/extractor/clipchamp.py
index 5a732e808..3b485eaab 100644
--- a/youtube_dl/extractor/clipchamp.py
+++ b/youtube_dl/extractor/clipchamp.py
@@ -35,13 +35,6 @@ class ClipchampIE(InfoExtractor):
     _STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
     _STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
 
-    def _search_nextjs_data(self, webpage, video_id, **kw):
-        return self._parse_json(
-            self._search_regex(
-                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
-                webpage, 'next.js data', **kw),
-            video_id, **kw)
-
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index dbdf456f5..549781186 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 
 import base64
 import datetime
+import functools
 import hashlib
 import json
 import netrc
@@ -23,6 +24,7 @@ from ..compat import (
     compat_getpass,
     compat_integer_types,
     compat_http_client,
+    compat_map as map,
     compat_os_name,
     compat_str,
     compat_urllib_error,
@@ -31,6 +33,7 @@ from ..compat import (
     compat_urllib_request,
     compat_urlparse,
     compat_xml_parse_error,
+    compat_zip as zip,
 )
 from ..downloader.f4m import (
     get_base_url,
@@ -70,6 +73,7 @@ from ..utils import (
     str_or_none,
     str_to_int,
     strip_or_none,
+    traverse_obj,
     try_get,
     unescapeHTML,
     unified_strdate,
@@ -1349,6 +1353,44 @@ class InfoExtractor(object):
                     break
         return dict((k, v) for k, v in info.items() if v is not None)
 
+    def _search_nextjs_data(self, webpage, video_id, **kw):
+        nkw = dict((k, v) for k, v in kw.items() if k in ('transform_source', 'fatal'))
+        kw.pop('transform_source', None)
+        next_data = self._search_regex(
+            r'''<script[^>]+\bid\s*=\s*('|")__NEXT_DATA__\1[^>]*>(?P<nd>[^<]+)</script>''',
+            webpage, 'next.js data', group='nd', **kw)
+        if not next_data:
+            return {}
+        return self._parse_json(next_data, video_id, **nkw)
+
+    def _search_nuxt_data(self, webpage, video_id, *args, **kwargs):
+        """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
+
+        # self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)
+        context_name = args[0] if len(args) > 0 else kwargs.get('context_name', '__NUXT__')
+        fatal = kwargs.get('fatal', True)
+        traverse = kwargs.get('traverse', ('data', 0))
+
+        re_ctx = re.escape(context_name)
+
+        FUNCTION_RE = (r'\(\s*function\s*\((?P<arg_keys>[\s\S]*?)\)\s*\{\s*'
+                       r'return\s+(?P<js>\{[\s\S]*?})\s*;?\s*}\s*\((?P<arg_vals>[\s\S]*?)\)')
+
+        js, arg_keys, arg_vals = self._search_regex(
+            (p.format(re_ctx, FUNCTION_RE) for p in
+             (r'<script>\s*window\s*\.\s*{0}\s*=\s*{1}\s*\)\s*;?\s*</script>',
+              r'{0}\s*\([\s\S]*?{1}')),
+            webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
+            default=NO_DEFAULT if fatal else (None, None, None))
+        if js is None:
+            return {}
+
+        args = dict(zip(arg_keys.split(','), map(json.dumps, self._parse_json(
+            '[{0}]'.format(arg_vals), video_id, transform_source=js_to_json, fatal=fatal) or ())))
+
+        ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
+        return traverse_obj(ret, traverse) or {}
+
     @staticmethod
     def _hidden_inputs(html):
         html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
@@ -2496,7 +2538,8 @@ class InfoExtractor(object):
                 return f
             return {}
 
-        def _media_formats(src, cur_media_type, type_info={}):
+        def _media_formats(src, cur_media_type, type_info=None):
+            type_info = type_info or {}
             full_url = absolute_url(src)
             ext = type_info.get('ext') or determine_ext(full_url)
             if ext == 'm3u8':
@@ -2514,6 +2557,7 @@ class InfoExtractor(object):
                 formats = [{
                     'url': full_url,
                     'vcodec': 'none' if cur_media_type == 'audio' else None,
+                    'ext': ext,
                 }]
             return is_plain_url, formats
 
@@ -2522,7 +2566,7 @@ class InfoExtractor(object):
         # so we wll include them right here (see
         # https://www.ampproject.org/docs/reference/components/amp-video)
         # For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
-        _MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
+        _MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video(?:-js)?|audio)'
         media_tags = [(media_tag, media_tag_name, media_type, '')
                       for media_tag, media_tag_name, media_type
                       in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
@@ -2540,7 +2584,8 @@ class InfoExtractor(object):
             media_attributes = extract_attributes(media_tag)
             src = strip_or_none(media_attributes.get('src'))
             if src:
-                _, formats = _media_formats(src, media_type)
+                f = parse_content_type(media_attributes.get('type'))
+                _, formats = _media_formats(src, media_type, f)
                 media_info['formats'].extend(formats)
             media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
             if media_content:
diff --git a/youtube_dl/extractor/globalplayer.py b/youtube_dl/extractor/globalplayer.py
index cceab9e6a..db490b141 100644
--- a/youtube_dl/extractor/globalplayer.py
+++ b/youtube_dl/extractor/globalplayer.py
@@ -24,13 +24,6 @@ class GlobalPlayerBaseIE(InfoExtractor):
     def _match_valid_url(cls, url):
         return cls.re.match(cls._VALID_URL, url)
 
-    def _search_nextjs_data(self, webpage, video_id, **kw):
-        return self._parse_json(
-            self._search_regex(
-                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
-                webpage, 'next.js data', **kw),
-            video_id, **kw)
-
     def _get_page_props(self, url, video_id):
         webpage = self._download_webpage(url, video_id)
         return self._search_nextjs_data(webpage, video_id)['props']['pageProps']
@@ -39,13 +32,14 @@ class GlobalPlayerBaseIE(InfoExtractor):
         return urlhandle_detect_ext(self._request_webpage(  # Server rejects HEAD requests
             url, video_id, note='Determining source extension'))
 
-    def _extract_audio(self, episode, series):
+    @staticmethod
+    def _clean_desc(x):
+        x = clean_html(x)
+        if x:
+            x = x.replace('\xa0', ' ')
+        return x
 
-        def clean_desc(x):
-            x = clean_html(x)
-            if x:
-                x = x.replace('\xa0', ' ')
-            return x
+    def _extract_audio(self, episode, series):
 
         return merge_dicts({
             'vcodec': 'none',
@@ -56,7 +50,7 @@ class GlobalPlayerBaseIE(InfoExtractor):
             'uploader': 'itunesAuthor',  # podcasts only
         }), traverse_obj(episode, {
             'id': 'id',
-            'description': ('description', T(clean_desc)),
+            'description': ('description', T(self._clean_desc)),
             'duration': ('duration', T(parse_duration)),
             'thumbnail': 'imageUrl',
             'url': 'streamUrl',
@@ -141,9 +135,9 @@ class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
             'ext': 'aac',
             # 'live_status': 'is_live',
             'is_live': True,
-            'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d',
+            'description': r're:(?s).+\bclassical\b.+\bClassic FM Hall [oO]f Fame\b',
             'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=',
-            'title': 're:^Classic FM Hall of Fame.+$'
+            'title': 're:Classic FM Hall of Fame.+$'
         },
     }]
 
@@ -160,7 +154,7 @@ class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
             'is_live': True,
         }, traverse_obj(station, {
             'title': 'title',
-            'description': 'description',
+            'description': ('description', T(self._clean_desc)),
             'thumbnail': 'image',
         }), rev=True)
 
@@ -177,7 +171,7 @@ class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
             'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
             'categories': ['Society & Culture', 'True Crime'],
             'uploader': 'Global',
-            'description': 'md5:da5b918eac9ae319454a10a563afacf9',
+            'description': r're:(?s).+\bscam\b.+?\bseries available now\b',
         },
     }, {
         # radio catchup
@@ -203,7 +197,7 @@ class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
                         series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))],
             'categories': traverse_obj(series, ('categories', Ellipsis, 'name')) or None,
         }, traverse_obj(series, {
-            'description': 'description',
+            'description': ('description', T(self._clean_desc)),
             'thumbnail': 'imageUrl',
             'title': 'title',
             'uploader': 'itunesAuthor',  # podcasts only
diff --git a/youtube_dl/extractor/whyp.py b/youtube_dl/extractor/whyp.py
index 16f9154ad..644eb4617 100644
--- a/youtube_dl/extractor/whyp.py
+++ b/youtube_dl/extractor/whyp.py
@@ -21,7 +21,7 @@ class WhypIE(InfoExtractor):
             'url': 'https://cdn.whyp.it/50eb17cc-e9ff-4e18-b89b-dc9206a95cb1.mp3',
             'id': '18337',
             'title': 'Home Page Example Track',
-            'description': 'md5:bd758000fb93f3159339c852b5b9133c',
+            'description': r're:(?s).+\bexample track\b',
             'ext': 'mp3',
             'duration': 52.82,
             'uploader': 'Brad',
@@ -33,29 +33,6 @@ class WhypIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', fatal=True, traverse=('data', 0)):
-        """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
-
-        import functools
-        import json
-        import re
-        from ..utils import (js_to_json, NO_DEFAULT)
-
-        re_ctx = re.escape(context_name)
-        FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
-        js, arg_keys, arg_vals = self._search_regex(
-            (p.format(re_ctx, FUNCTION_RE) for p in (r'<script>\s*window\.{0}={1}\s*\)\s*;?\s*</script>', r'{0}\(.*?{1}')),
-            webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
-            default=NO_DEFAULT if fatal else (None, None, None))
-        if js is None:
-            return {}
-
-        args = dict(zip(arg_keys.split(','), map(json.dumps, self._parse_json(
-            '[{0}]'.format(arg_vals), video_id, transform_source=js_to_json, fatal=fatal) or ())))
-
-        ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
-        return traverse_obj(ret, traverse) or {}
-
     def _real_extract(self, url):
         unique_id = self._match_id(url)
         webpage = self._download_webpage(url, unique_id)

From a190b559640ce1b5fe67e5a4843dc58328503f3c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 19 Jul 2023 13:01:02 +0100
Subject: [PATCH 110/156] [utils] Fix broken Py 3.11+ compat in
 `traverse_obj()`

* inspect.getargspec is missing despite doc claiming backward compat
* replace with emulation of `Signature.bind()`
---
 youtube_dl/utils.py | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 0cbbec0f3..d52fa7a28 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -6109,6 +6109,37 @@ def clean_podcast_url(url):
         )/''', '', url)
 
 
+if __debug__:
+    # Raise TypeError if args can't be bound
+    # needs compat owing to unstable inspect API, thanks PSF :-(
+    try:
+        inspect.signature
+
+        def _try_bind_args(fn, *args, **kwargs):
+            inspect.signature(fn).bind(*args, **kwargs)
+    except AttributeError:
+        # Py < 3.3
+        def _try_bind_args(fn, *args, **kwargs):
+            fn_args = inspect.getargspec(fn)
+            # Py2: ArgInfo(args, varargs, keywords, defaults)
+            # Py3: ArgSpec(args, varargs, keywords, defaults)
+            if not fn_args.keywords:
+                for k in kwargs:
+                    if k not in (fn_args.args or []):
+                        raise TypeError("got an unexpected keyword argument: '{0}'".format(k))
+            if not fn_args.varargs:
+                args_to_bind = len(args)
+                bindable = len(fn_args.args or [])
+                if args_to_bind > bindable:
+                    raise TypeError('too many positional arguments')
+                bindable -= len(fn_args.defaults or [])
+                if args_to_bind < bindable:
+                    if kwargs:
+                        bindable -= len(set(fn_args.args or []) & set(kwargs))
+                    if bindable > args_to_bind:
+                        raise TypeError("missing a required argument: '{0}'".format(fn_args.args[args_to_bind]))
+
+
 def traverse_obj(obj, *paths, **kwargs):
     """
     Safely traverse nested `dict`s and `Iterable`s
@@ -6327,10 +6358,7 @@ def traverse_obj(obj, *paths, **kwargs):
 
             if __debug__ and callable(key):
                 # Verify function signature
-                args = inspect.getargspec(key)
-                if len(args.args) != 2:
-                    # crash differently in 2.6 !
-                    inspect.getcallargs(key, None, None)
+                _try_bind_args(key, None, None)
 
             new_objs = []
             for obj in objs:

From b2ba24bb026904f3503db71f65d2b1627f08edf1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 19 Jul 2023 14:14:50 +0100
Subject: [PATCH 111/156] [InfoExtractor] Add `_match_valid_url()` class method
 and refactor

* API compatible with yt-dlp
* also support Sequence of patterns in _VALID_URL
* one place to compile _VALID_URL
* TODO: remove existing extractor shims
---
 devscripts/make_lazy_extractors.py   | 14 ++++++--
 youtube_dl/extractor/common.py       | 51 +++++++++++++++++++++-------
 youtube_dl/extractor/globalplayer.py |  6 ----
 3 files changed, 49 insertions(+), 22 deletions(-)

diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index a8b6ff1b9..1a841a08b 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -4,6 +4,7 @@ from inspect import getsource
 import io
 import os
 from os.path import dirname as dirn
+import re
 import sys
 
 print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
@@ -29,11 +30,18 @@ from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
 with open('devscripts/lazy_load_template.py', 'rt') as f:
     module_template = f.read()
 
+
+def get_source(m):
+    return re.sub(r'(?m)^\s*#.*\n', '', getsource(m))
+
+
 module_contents = [
-    module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
+    module_template,
+    get_source(InfoExtractor.suitable),
+    get_source(InfoExtractor._match_valid_url) + '\n',
     'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n',
     # needed for suitable() methods of Youtube extractor (see #28780)
-    'from youtube_dl.utils import parse_qs\n',
+    'from youtube_dl.utils import parse_qs, variadic\n',
 ]
 
 ie_template = '''
@@ -66,7 +74,7 @@ def build_lazy_ie(ie, name):
         valid_url=valid_url,
         module=ie.__module__)
     if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
-        s += '\n' + getsource(ie.suitable)
+        s += '\n' + get_source(ie.suitable)
     if hasattr(ie, '_make_valid_url'):
         # search extractors
         s += make_valid_template.format(valid_url=ie._make_valid_url())
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 549781186..7f416d312 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -83,6 +83,7 @@ from ..utils import (
     urljoin,
     url_basename,
     url_or_none,
+    variadic,
     xpath_element,
     xpath_text,
     xpath_with_ns,
@@ -371,9 +372,22 @@ class InfoExtractor(object):
     title, description etc.
 
 
-    Subclasses of this one should re-define the _real_initialize() and
-    _real_extract() methods and define a _VALID_URL regexp.
-    Probably, they should also be added to the list of extractors.
+    A subclass of InfoExtractor must be defined to handle each specific site (or
+    several sites). Such a concrete subclass should be added to the list of
+    extractors. It should also:
+    * define its _VALID_URL attribute as a regexp, or a Sequence of alternative
+      regexps (but see below)
+    * re-define the _real_extract() method
+    * optionally re-define the _real_initialize() method.
+
+    An extractor subclass may also override suitable() if necessary, but the
+    function signature must be preserved and the function must import everything
+    it needs (except other extractors), so that lazy_extractors works correctly.
+    If the subclass's suitable() and _real_extract() functions avoid using
+    _VALID_URL, the subclass need not set that class attribute.
+
+    An abstract subclass of InfoExtractor may be used to simplify implementation
+    within an extractor module; it should not be added to the list of extractors.
 
     _GEO_BYPASS attribute may be set to False in order to disable
     geo restriction bypass mechanisms for a particular extractor.
@@ -408,22 +422,33 @@ class InfoExtractor(object):
         self._x_forwarded_for_ip = None
         self.set_downloader(downloader)
 
+    @classmethod
+    def __match_valid_url(cls, url):
+        # This does not use has/getattr intentionally - we want to know whether
+        # we have cached the regexp for cls, whereas getattr would also
+        # match its superclass
+        if '_VALID_URL_RE' not in cls.__dict__:
+            # _VALID_URL can now be a list/tuple of patterns
+            cls._VALID_URL_RE = tuple(map(re.compile, variadic(cls._VALID_URL)))
+        # 20% faster than next(filter(None, (p.match(url) for p in cls._VALID_URL_RE)), None) in 2.7
+        for p in cls._VALID_URL_RE:
+            p = p.match(url)
+            if p:
+                return p
+
+    # The public alias can safely be overridden, as in some back-ports
+    _match_valid_url = __match_valid_url
+
     @classmethod
     def suitable(cls, url):
         """Receives a URL and returns True if suitable for this IE."""
-
-        # This does not use has/getattr intentionally - we want to know whether
-        # we have cached the regexp for *this* class, whereas getattr would also
-        # match the superclass
-        if '_VALID_URL_RE' not in cls.__dict__:
-            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
-        return cls._VALID_URL_RE.match(url) is not None
+        # This function must import everything it needs (except other extractors),
+        # so that lazy_extractors works correctly
+        return cls.__match_valid_url(url) is not None
 
     @classmethod
     def _match_id(cls, url):
-        if '_VALID_URL_RE' not in cls.__dict__:
-            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
-        m = cls._VALID_URL_RE.match(url)
+        m = cls.__match_valid_url(url)
         assert m
         return compat_str(m.group('id'))
 
diff --git a/youtube_dl/extractor/globalplayer.py b/youtube_dl/extractor/globalplayer.py
index db490b141..ae75dcabf 100644
--- a/youtube_dl/extractor/globalplayer.py
+++ b/youtube_dl/extractor/globalplayer.py
@@ -18,12 +18,6 @@ from ..utils import (
 
 class GlobalPlayerBaseIE(InfoExtractor):
 
-    import re
-
-    @classmethod
-    def _match_valid_url(cls, url):
-        return cls.re.match(cls._VALID_URL, url)
-
     def _get_page_props(self, url, video_id):
         webpage = self._download_webpage(url, video_id)
         return self._search_nextjs_data(webpage, video_id)['props']['pageProps']

From 1fa8b86f0b95f2e1488042ceeda8f356ea2a5448 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Jul 2023 05:29:59 +0100
Subject: [PATCH 112/156] [utils] Remove stray undocumented Host header in
 redirect (fix 46fde7c)

---
 youtube_dl/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index d52fa7a28..6d798f13a 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2996,7 +2996,8 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
         # Technically the Cookie header should be in unredirected_hdrs;
         # however in practice some may set it in normal headers anyway.
         # We will remove it here to prevent any leaks.
-        remove_headers = ['Cookie']
+        # Also remove unwanted and undocumented Host header for old URL
+        remove_headers = ['Cookie', 'Host']
 
         # A 303 must either use GET or HEAD for subsequent request
         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4

From 74eef6bb5e6b88d042aa13caec667aa3df84ba73 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Jul 2023 12:42:46 +0100
Subject: [PATCH 113/156] [workflows/ci.yml] Extend Python versions * add 3.10
 - 3.12 * use https://pypi.org/project/pynose/ for Py >= 3.9 * test Windows
 with 3.4 * set defaults (main, both) except push: (all, core)

---
 .github/workflows/ci.yml | 48 +++++++++++++++++++++++++---------------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c3aabde47..10951d322 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,22 +1,34 @@
 name: CI
 
 env:
-  # add 3.10+ after patching nose (https://github.com/nose-devs/nose/issues/1099)
-  # or switching to fork of https://github.com/mdmintz/pynose
-  all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9
-  main-cpython-versions: 2.7, 3.2, 3.5, 3.9
+  all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 3.10, 3.11, 3.12
+  main-cpython-versions: 2.7, 3.2, 3.5, 3.9, 3.11
   pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7
   cpython-versions: main
-  test-set: both
+  test-set: core
 
 on:
   push:
+    inputs:
+      cpython-versions:
+        type: string
+        default: all
+      test-set:
+        type: string
+        default: core
   pull_request:
+    inputs:
+      cpython-versions:
+        type: string
+        default: main
+      test-set:
+        type: string
+        default: both
   workflow_dispatch:
     inputs:
       cpython-versions:
         type: choice
-        description: CPython versions (main = 2.7, 3.2, 3.5, 3.9)
+        description: CPython versions (main = 2.7, 3.2, 3.5, 3.9, 3.11)
         options:
           - all
           - main
@@ -30,7 +42,7 @@ on:
           - core
           - download
         required: true
-        default: core
+        default: both
 
 permissions:
   contents: read
@@ -44,7 +56,8 @@ jobs:
       test-set: ${{ steps.run.outputs.test-set }}
       own-pip-versions: ${{ steps.run.outputs.own-pip-versions }}
     steps:
-    - id: run
+    - name: Make version array
+      id: run
       run: |
         # Make a JSON Array from comma/space-separated string (no extra escaping)
         json_list() { \
@@ -66,7 +79,6 @@ jobs:
         # versions with a special get-pip.py in a per-version subdirectory
         printf 'own-pip-versions=%s\n' \
           "$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT"
-
   tests:
     name: Run tests
     needs: select
@@ -82,19 +94,18 @@ jobs:
       fail-fast: true
       matrix:
         os: [ubuntu-20.04]
-        # outside steps, use github.env...., not env....
         python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }}
         python-impl: [cpython]
         ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }}
         run-tests-ext: [sh]
         include:
         - os: windows-2019
-          python-version: 3.2
+          python-version: 3.4
           python-impl: cpython
           ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
           run-tests-ext: bat
         - os: windows-2019
-          python-version: 3.2
+          python-version: 3.4
           python-impl: cpython
           ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download'  || 'nodownload' }}
           run-tests-ext: bat
@@ -205,17 +216,14 @@ jobs:
           make install )
         rm -rf $openssl_name
         rmdir $openssl_ssl/certs && ln -s /etc/ssl/certs $openssl_ssl/certs
-
         # Download PyEnv from its GitHub repository.
         export PYENV_ROOT=${{ env.PYENV_ROOT }}
         export PATH=$PYENV_ROOT/bin:$PATH
         git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
-
         # Prevent pyenv build trying (and failing) to update pip
         export GET_PIP=get-pip-2.6.py
         echo 'import sys; sys.exit(0)' > ${GET_PIP}
         GET_PIP=$(realpath $GET_PIP)
-
         # Build and install Python
         export CFLAGS="-I$openssl_inc"
         export LDFLAGS="-L$openssl_lib"
@@ -322,7 +330,12 @@ jobs:
       run: |
         echo "$PATH"
         echo "$PYTHONHOME"
-        $PIP -qq show nose || $PIP install nose
+        # Use PyNose for recent Pythons instead of Nose
+        py3ver="${{ matrix.python-version }}"
+        py3ver=${py3ver#3.}
+        [ "$py3ver" != "${{ matrix.python-version }}" ] && py3ver=${py3ver%.*} || py3ver=0
+        [ "$py3ver" -ge 9 ] && nose=pynose || nose=nose
+        $PIP -qq show $nose || $PIP install $nose
     - name: Install nose for other Python 2
       if: ${{ matrix.python-impl == 'jython' || matrix.python-version == '2.6' }}
       shell: bash
@@ -354,7 +367,7 @@ jobs:
           '    def setUp(self):' \
           '        self.ver = os.environ["PYTHON_VER"].split("-")' \
           '    def test_python_ver(self):' \
-          '        self.assertEqual(sys.version[:3], self.ver[-1])' \
+          '        self.assertEqual(["%d" % v for v in sys.version_info[:2]], self.ver[-1].split(".")[:2])' \
           '        self.assertTrue(sys.version.startswith(self.ver[-1]))' \
           '        self.assertIn(self.ver[0], sys.version.lower())' \
           '    def test_python_impl(self):' \
@@ -370,7 +383,6 @@ jobs:
         PYTHON_IMPL: ${{ matrix.python-impl }}
       run: |
         ./devscripts/run_tests.${{ matrix.run-tests-ext }}
-
   flake8:
     name: Linter
     runs-on: ubuntu-latest

From 2a4e9faa773cce60e82453cb32f13e48513c4a46 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Jul 2023 15:49:46 +0100
Subject: [PATCH 114/156] [doc] Update developer guidance * mention pynose *
 mention traverse_obj and add/revise examples

[skip ci]
---
 README.md | 113 ++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 101 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 14a3d6c86..47e686f84 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.ex
 You can also use pip:
 
     sudo -H pip install --upgrade youtube-dl
-    
+
 This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
 
 macOS users can install youtube-dl with [Homebrew](https://brew.sh/):
@@ -563,7 +563,7 @@ The basic usage is not to set any template arguments when downloading a single f
  - `is_live` (boolean): Whether this video is a live stream or a fixed-length video
  - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
  - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
- - `format` (string): A human-readable description of the format 
+ - `format` (string): A human-readable description of the format
  - `format_id` (string): Format code specified by `--format`
  - `format_note` (string): Additional info about the format
  - `width` (numeric): Width of the video
@@ -675,7 +675,7 @@ The general syntax for format selection is `--format FORMAT` or shorter `-f FORM
 
 **tl;dr:** [navigate me to examples](#format-selection-examples).
 
-The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. 
+The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
 
 You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
 
@@ -760,7 +760,7 @@ Videos can be filtered by their upload date using the options `--date`, `--dateb
 
  - Absolute dates: Dates in the format `YYYYMMDD`.
  - Relative dates: Dates in the format `(now|today)[+-][0-9](day|week|month|year)(s)?`
- 
+
 Examples:
 
 ```bash
@@ -1000,6 +1000,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
     python test/test_download.py
     nosetests
 
+For Python versions 3.6 and later, you can use [pynose](https://pypi.org/project/pynose/) to implement `nosetests`. The original [nose](https://pypi.org/project/nose/) has not been upgraded for 3.10 and later.
+
 See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
 
 If you want to create a build of youtube-dl yourself, you'll need
@@ -1091,7 +1093,7 @@ In any case, thank you very much for your contributions!
 
 ## youtube-dl coding conventions
 
-This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
+This section introduces guidelines for writing idiomatic, robust and future-proof extractor code.
 
 Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
 
@@ -1114,7 +1116,7 @@ Say you have some source dictionary `meta` that you've fetched as JSON with HTTP
 ```python
 meta = self._download_json(url, video_id)
 ```
-    
+
 Assume at this point `meta`'s layout is:
 
 ```python
@@ -1158,7 +1160,7 @@ description = self._search_regex(
 ```
 
 On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present.
- 
+
 ### Provide fallbacks
 
 When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable.
@@ -1206,7 +1208,7 @@ r'(id|ID)=(?P<id>\d+)'
 #### Make regular expressions relaxed and flexible
 
 When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
- 
+
 ##### Example
 
 Say you need to extract `title` from the following HTML code:
@@ -1230,7 +1232,7 @@ title = self._search_regex(
     webpage, 'title', group='title')
 ```
 
-Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute: 
+Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute:
 
 The code definitely should not look like:
 
@@ -1331,27 +1333,114 @@ Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`]
 
 Use `url_or_none` for safe URL processing.
 
-Use `try_get` for safe metadata extraction from parsed JSON.
+Use `traverse_obj` for safe metadata extraction from parsed JSON.
 
-Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. 
+Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
 
 Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
 
 #### More examples
 
 ##### Safely extract optional description from parsed JSON
+
+When processing complex JSON, as often returned by site API requests or stashed in web pages for "hydration", you can use the `traverse_obj()` utility function to handle multiple fallback values and to ensure the expected type of metadata items. The function's docstring defines how the function works: also review usage in the codebase for more examples.
+
+In this example, a text `description`, or `None`, is pulled from the `.result.video[0].summary` member of the parsed JSON `response`, if available.
+
+```python
+description = traverse_obj(response, ('result', 'video', 0, 'summary', T(compat_str)))
+```
+`T(...)` is a shorthand for a set literal; if you hate people who still run Python 2.6, `T(type_or_transformation)` could be written as a set literal `{type_or_transformation}`.
+
+Some extractors use the older and less capable `try_get()` function in the same way.
+
 ```python
 description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
 ```
 
 ##### Safely extract more optional metadata
+
+In this example, various optional metadata values are extracted from the `.result.video[0]` member of the parsed JSON `response`, which is expected to be a JS object, parsed into a `dict`, with no crash if that isn't so, or if any of the target values are missing or invalid.
+
 ```python
-video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
+video = traverse_obj(response, ('result', 'video', 0, T(dict))) or {}
+# formerly:
+# video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
 description = video.get('summary')
 duration = float_or_none(video.get('durationMs'), scale=1000)
 view_count = int_or_none(video.get('views'))
 ```
 
+#### Safely extract nested lists
+
+Suppose you've extracted JSON like this into a Python data structure named `media_json` using, say, the `_download_json()` or `_parse_json()` methods of `InfoExtractor`:
+```json
+{
+    "title": "Example video",
+    "comment": "try extracting this",
+    "media": [{
+        "type": "bad",
+        "size": 320,
+        "url": "https://some.cdn.site/bad.mp4"
+    }, {
+        "type": "streaming",
+        "url": "https://some.cdn.site/hls.m3u8"
+    }, {
+        "type": "super",
+        "size": 1280,
+        "url": "https://some.cdn.site/good.webm"
+    }],
+    "moreStuff": "more values",
+    ...
+}
+```
+
+Then extractor code like this can collect the various fields of the JSON:
+```python
+...
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    T,
+    traverse_obj,
+    txt_or_none,
+    url_or_none,
+)
+...
+        ...
+        info_dict = {}
+        # extract title and description if valid and not empty
+        info_dict.update(traverse_obj(media_json, {
+            'title': ('title', T(txt_or_none)),
+            'description': ('comment', T(txt_or_none)),
+        }))
+
+        # extract any recognisable media formats
+        fmts = []
+        # traverse into "media" list, extract `dict`s with desired keys
+        for fmt in traverse_obj(media_json, ('media', Ellipsis, {
+                'format_id': ('type', T(txt_or_none)),
+                'url': ('url', T(url_or_none)),
+                'width': ('size', T(int_or_none)), })):
+            # bad `fmt` values were `None` and removed
+            if 'url' not in fmt:
+                continue
+            fmt_url = fmt['url']  # known to be valid URL
+            ext = determine_ext(fmt_url)
+            if ext == 'm3u8':
+                fmts.extend(self._extract_m3u8_formats(fmt_url, video_id, 'mp4', fatal=False))
+            else:
+                fmt['ext'] = ext
+                fmts.append(fmt)
+
+        # sort, raise if no formats
+        self._sort_formats(fmts)
+
+        info_dict['formats'] = fmts
+        ...
+```
+The extractor raises an exception rather than random crashes if the JSON structure changes so that no formats are found.
+
 # EMBEDDING YOUTUBE-DL
 
 youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new).

From ca71e56c481c6d5ce69b4756f8f8c0aff97d79b5 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Jul 2023 16:36:54 +0100
Subject: [PATCH 115/156] [workflows/ci.yml] Build 3.12 with pyenv

---
 .github/workflows/ci.yml | 40 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 10951d322..a1e21fd4a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -124,7 +124,7 @@ jobs:
     #-------- Python 3 -----
     - name: Set up supported Python ${{ matrix.python-version }}
       id: setup-python
-      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7'}}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != '3.12'}}
       # wrap broken actions/setup-python@v4
       uses: ytdl-org/setup-python@v1
       with:
@@ -162,6 +162,42 @@ jobs:
             'import sys' \
             'print(sys.path)' \
             | ${expected} -
+    #-------- Python 3.12 -
+    - name: Set up Python 3.12 environment
+      if: ${{ matrix.python-version == '3.12' }}
+      shell: bash
+      run: |
+        PYENV_ROOT=$HOME/.local/share/pyenv
+        echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV"
+    - name: Cache Python 3.12
+      id: cache312
+      if: ${{ matrix.python-version == '3.12' }}
+      uses: actions/cache@v3
+      with:
+        key: python-3.12
+        path: |
+          ${{ env.PYENV_ROOT }}
+    - name: Build and set up Python 3.12
+      if: ${{ matrix.python-version == '3.12' && ! steps.cache312.outputs.cache-hit }}
+      # dl and build locally
+      shell: bash
+      run: |
+        # Install build environment
+        sudo apt-get install -y build-essential llvm libssl-dev tk-dev  \
+                      libncursesw5-dev libreadline-dev libsqlite3-dev   \
+                      libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev
+        # Download PyEnv from its GitHub repository.
+        export PYENV_ROOT=${{ env.PYENV_ROOT }}
+        export PATH=$PYENV_ROOT/bin:$PATH
+        git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
+        pyenv install 3.12.0b4
+    - name: Locate Python 3.12
+      if: ${{ matrix.python-version == '3.12' }}
+      shell: bash
+      run: |
+        PYTHONHOME="${{ env.PYENV_ROOT }}/versions/3.12.0b4"
+        echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV"
+        echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV"
     #-------- Python 2.7 --
     - name: Set up Python 2.7
       if: ${{ matrix.python-version == '2.7' }}
@@ -325,7 +361,7 @@ jobs:
         done
     #-------- nose --------
     - name: Install nose for Python ${{ matrix.python-version }}
-      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }}
+      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' || matrix.python-version == '3.12' }}
       shell: bash
       run: |
         echo "$PATH"

From 7bce2ad441b874e7a1cf8cc81059c5601d832697 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Jul 2023 18:49:48 +0100
Subject: [PATCH 116/156] [build] Fix various Jython CI and test issues

---
 .github/workflows/ci.yml           | 38 +++++++++++++++++++-----------
 devscripts/make_lazy_extractors.py | 11 +++++++++
 test/test_http.py                  | 13 ++++++----
 youtube_dl/compat.py               |  2 +-
 4 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a1e21fd4a..6b91edd6c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -111,10 +111,12 @@ jobs:
           run-tests-ext: bat
         # jython
         - os: ubuntu-20.04
+          python-version: 2.7
           python-impl: jython
           ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
           run-tests-ext: sh
         - os: ubuntu-20.04
+          python-version: 2.7
           python-impl: jython
           ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download'  || 'nodownload' }}
           run-tests-ext: sh
@@ -163,22 +165,22 @@ jobs:
             'print(sys.path)' \
             | ${expected} -
     #-------- Python 3.12 -
-    - name: Set up Python 3.12 environment
-      if: ${{ matrix.python-version == '3.12' }}
+    - name: Set up CPython 3.12 environment
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }}
       shell: bash
       run: |
         PYENV_ROOT=$HOME/.local/share/pyenv
         echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV"
     - name: Cache Python 3.12
       id: cache312
-      if: ${{ matrix.python-version == '3.12' }}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }}
       uses: actions/cache@v3
       with:
         key: python-3.12
         path: |
           ${{ env.PYENV_ROOT }}
     - name: Build and set up Python 3.12
-      if: ${{ matrix.python-version == '3.12' && ! steps.cache312.outputs.cache-hit }}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' && ! steps.cache312.outputs.cache-hit }}
       # dl and build locally
       shell: bash
       run: |
@@ -192,7 +194,7 @@ jobs:
         git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
         pyenv install 3.12.0b4
     - name: Locate Python 3.12
-      if: ${{ matrix.python-version == '3.12' }}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }}
       shell: bash
       run: |
         PYTHONHOME="${{ env.PYENV_ROOT }}/versions/3.12.0b4"
@@ -200,7 +202,7 @@ jobs:
         echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV"
     #-------- Python 2.7 --
     - name: Set up Python 2.7
-      if: ${{ matrix.python-version == '2.7' }}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.7' }}
       # install 2.7
       shell: bash
       run: |
@@ -208,7 +210,7 @@ jobs:
         echo "PYTHONHOME=/usr" >> "$GITHUB_ENV"
     #-------- Python 2.6 --
     - name: Set up Python 2.6 environment
-      if: ${{ matrix.python-version == '2.6' }}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }}
       shell: bash
       run: |
         openssl_name=openssl-1.0.2u
@@ -228,7 +230,7 @@ jobs:
           ${{ env.openssl_dir }}
           ${{ env.PYENV_ROOT }}
     - name: Build and set up Python 2.6
-      if: ${{ matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }}
       # dl and build locally
       shell: bash
       run: |
@@ -266,7 +268,7 @@ jobs:
         export LD_LIBRARY_PATH="$openssl_lib"
         pyenv install 2.6.9
     - name: Locate Python 2.6
-      if: ${{ matrix.python-version == '2.6' }}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }}
       shell: bash
       run: |
         PYTHONHOME="${{ env.PYENV_ROOT }}/versions/2.6.9"
@@ -288,7 +290,7 @@ jobs:
         echo "PIP=pip" >> "$GITHUB_ENV"
     - name: Cache Jython
       id: cachejy
-      if: ${{ matrix.python-impl == 'jython' }}
+      if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' }}
       uses: actions/cache@v3
       with:
         # 2.7.3 now available, may solve SNI issue
@@ -296,7 +298,7 @@ jobs:
         path: |
           ${{ env.JYTHON_ROOT }}
     - name: Install Jython
-      if: ${{ matrix.python-impl == 'jython' && ! steps.cachejy.outputs.cache-hit }}
+      if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' && ! steps.cachejy.outputs.cache-hit }}
       shell: bash
       run: |
         JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
@@ -309,6 +311,11 @@ jobs:
       run: |
         JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
         echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH
+    - name: Install supporting Python 2.7 if possible
+      if: ${{ steps.cachejy.outputs.cache-hit }}
+      shell: bash
+      run: |
+        sudo apt-get install -y python2.7 || true
     #-------- pip ---------
     - name: Set up supported Python ${{ matrix.python-version }} pip
       if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }}
@@ -391,6 +398,11 @@ jobs:
       if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }}
       shell: bash
       run: |
+        # set PYTHON_VER
+        PYTHON_VER=${{ matrix.python-version }}
+        [ "${PYTHON_VER#*-}" != "$PYTHON_VER" ] || PYTHON_VER="${{ matrix.python-impl }}-${PYTHON_VER}"
+        echo "PYTHON_VER=$PYTHON_VER" >> "$GITHUB_ENV"
+        echo "PYTHON_IMPL=${{ matrix.python-impl }}" >> "$GITHUB_ENV"
         # define a test to validate the Python version used by nosetests
         printf '%s\n' \
           'from __future__ import unicode_literals' \
@@ -405,7 +417,7 @@ jobs:
           '    def test_python_ver(self):' \
           '        self.assertEqual(["%d" % v for v in sys.version_info[:2]], self.ver[-1].split(".")[:2])' \
           '        self.assertTrue(sys.version.startswith(self.ver[-1]))' \
-          '        self.assertIn(self.ver[0], sys.version.lower())' \
+          '        self.assertIn(self.ver[0], ",".join((sys.version, platform.python_implementation())).lower())' \
           '    def test_python_impl(self):' \
           '        self.assertIn(platform.python_implementation().lower(), (os.environ["PYTHON_IMPL"], self.ver[0]))' \
           > test/test_python.py
@@ -415,8 +427,6 @@ jobs:
       continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
       env:
         YTDL_TEST_SET: ${{ matrix.ytdl-test-set }}
-        PYTHON_VER: ${{ matrix.python-version }}
-        PYTHON_IMPL: ${{ matrix.python-impl }}
       run: |
         ./devscripts/run_tests.${{ matrix.run-tests-ext }}
   flake8:
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index 1a841a08b..dee9d6d91 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -118,3 +118,14 @@ module_src = '\n'.join(module_contents) + '\n'
 
 with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
     f.write(module_src)
+
+# work around JVM byte code module limit in Jython
+if sys.platform.startswith('java') and sys.version_info[:2] == (2, 7):
+    import subprocess
+    from youtube_dl.compat import compat_subprocess_get_DEVNULL
+    # if Python 2.7 is available, use it to compile the module for Jython
+    try:
+        # if Python 2.7 is available, use it to compile the module for Jython
+        subprocess.check_call(['python2.7', '-m', 'py_compile', lazy_extractors_filename], stdout=compat_subprocess_get_DEVNULL())
+    except Exception:
+        pass
diff --git a/test/test_http.py b/test/test_http.py
index 1a6b2e878..4ec8e13e3 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -45,6 +45,7 @@ from youtube_dl.utils import (
 )
 
 from test.helper import (
+    expectedFailureIf,
     FakeYDL,
     FakeLogger,
     http_server_port,
@@ -243,6 +244,11 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
 
 
 class TestHTTP(unittest.TestCase):
+    # when does it make sense to check the SSL certificate?
+    _check_cert = (
+        sys.version_info >= (3, 2)
+        or (sys.version_info[0] == 2 and sys.version_info[1:] >= (7, 19)))
+
     def setUp(self):
         # HTTP server
         self.http_httpd = compat_http_server.HTTPServer(
@@ -307,10 +313,7 @@ class TestHTTP(unittest.TestCase):
             else self.https_port if scheme == 'https'
             else self.http_port, path)
 
-    @unittest.skipUnless(
-        sys.version_info >= (3, 2)
-        or (sys.version_info[0] == 2 and sys.version_info[1:] >= (7, 9)),
-        'No support for certificate check in SSL')
+    @unittest.skipUnless(_check_cert, 'No support for certificate check in SSL')
     def test_nocheckcertificate(self):
         with FakeYDL({'logger': FakeLogger()}) as ydl:
             with self.assertRaises(compat_urllib_error.URLError):
@@ -376,6 +379,8 @@ class TestHTTP(unittest.TestCase):
                 with self.assertRaises(compat_urllib_HTTPError):
                     do_req(code, 'GET')
 
+    # Jython 2.7.1 times out for some reason
+    @expectedFailureIf(sys.platform.startswith('java') and sys.version_info < (2, 7, 2))
     def test_content_type(self):
         # https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28
         with FakeYDL({'nocheckcertificate': True}) as ydl:
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 1d784d90f..da6d70ec4 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -131,7 +131,7 @@ if sys.version_info[0] == 2 or sys.version_info < (3, 3):
         def load(self, rawdata):
             must_have_value = 0
             if not isinstance(rawdata, dict):
-                if sys.version_info[:2] != (2, 7):
+                if sys.version_info[:2] != (2, 7) or sys.platform.startswith('java'):
                     # attribute must have value for parsing
                     rawdata, must_have_value = re.subn(
                         r'(?i)(;\s*)(secure|httponly)(\s*(?:;|$))', r'\1\2=\2\3', rawdata)

From 44faa71b19c866b836e4433ddd3e4722ac6d282f Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Jul 2023 19:32:29 +0100
Subject: [PATCH 117/156] [test/test_execution.py] Use
 `compat_subprocess_get_DEVNULL()`

---
 test/test_execution.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/test/test_execution.py b/test/test_execution.py
index ae59e562a..56e1b679d 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -10,17 +10,14 @@ import os
 import subprocess
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from youtube_dl.compat import compat_register_utf8
+from youtube_dl.compat import compat_register_utf8, compat_subprocess_get_DEVNULL
 from youtube_dl.utils import encodeArgument
 
 compat_register_utf8()
 
 rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
-try:
-    _DEV_NULL = subprocess.DEVNULL
-except AttributeError:
-    _DEV_NULL = open(os.devnull, 'wb')
+_DEV_NULL = compat_subprocess_get_DEVNULL()
 
 
 class TestExecution(unittest.TestCase):

From 2b7dd3b2a2d7c6e228a42d1000a6f3296739ff1c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 24 Jul 2023 03:30:28 +0100
Subject: [PATCH 118/156] [utils] Fix update_Request() with empty data (not
 None)

---
 test/test_http.py   | 13 +++++++++++++
 youtube_dl/utils.py |  7 +++----
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/test/test_http.py b/test/test_http.py
index 4ec8e13e3..89580969d 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -41,6 +41,7 @@ from youtube_dl.compat import (
 
 from youtube_dl.utils import (
     sanitized_Request,
+    update_Request,
     urlencode_postdata,
 )
 
@@ -395,6 +396,18 @@ class TestHTTP(unittest.TestCase):
             headers = ydl.urlopen(r).read().decode('utf-8')
             self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
 
+    def test_update_req(self):
+        req = sanitized_Request('http://example.com')
+        assert req.data is None
+        assert req.get_method() == 'GET'
+        assert not req.has_header('Content-Type')
+        # Test that zero-byte payloads will be sent
+        req = update_Request(req, data=b'')
+        assert req.data == b''
+        assert req.get_method() == 'POST'
+        # yt-dl expects data to be encoded and Content-Type to be added by sender
+        # assert req.get_header('Content-Type') == 'application/x-www-form-urlencoded'
+
     def test_cookiejar(self):
         with FakeYDL() as ydl:
             ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 6d798f13a..b5475434f 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2996,8 +2996,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
         # Technically the Cookie header should be in unredirected_hdrs;
         # however in practice some may set it in normal headers anyway.
         # We will remove it here to prevent any leaks.
-        # Also remove unwanted and undocumented Host header for old URL
-        remove_headers = ['Cookie', 'Host']
+        remove_headers = ['Cookie']
 
         # A 303 must either use GET or HEAD for subsequent request
         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
@@ -3016,7 +3015,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
             remove_headers.extend(['Content-Length', 'Content-Type'])
 
         # NB: don't use dict comprehension for python 2.6 compatibility
-        new_headers = dict((k, v) for k, v in req.header_items()
+        new_headers = dict((k, v) for k, v in req.headers.items()
                            if k.title() not in remove_headers)
 
         return compat_urllib_request.Request(
@@ -4187,7 +4186,7 @@ def update_url_query(url, query):
 def update_Request(req, url=None, data=None, headers={}, query={}):
     req_headers = req.headers.copy()
     req_headers.update(headers)
-    req_data = data or req.data
+    req_data = data if data is not None else req.data
     req_url = update_url_query(url or req.get_full_url(), query)
     req_get_method = req.get_method()
     if req_get_method == 'HEAD':

From aac33155e40af3da96a2467dd05faea201815989 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 24 Jul 2023 23:43:36 +0100
Subject: [PATCH 119/156] [build] Add and use `devscripts/utils`

---
 devscripts/__init__.py             |  1 +
 devscripts/make_lazy_extractors.py | 22 +++++------
 devscripts/utils.py                | 62 ++++++++++++++++++++++++++++++
 test/test_execution.py             | 10 +++--
 4 files changed, 80 insertions(+), 15 deletions(-)
 create mode 100644 devscripts/__init__.py
 create mode 100644 devscripts/utils.py

diff --git a/devscripts/__init__.py b/devscripts/__init__.py
new file mode 100644
index 000000000..750dbdca7
--- /dev/null
+++ b/devscripts/__init__.py
@@ -0,0 +1 @@
+# Empty file needed to make devscripts.utils properly importable from outside
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index dee9d6d91..5b8b123a4 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -1,7 +1,6 @@
 from __future__ import unicode_literals, print_function
 
 from inspect import getsource
-import io
 import os
 from os.path import dirname as dirn
 import re
@@ -9,17 +8,20 @@ import sys
 
 print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
 
-sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
 
 lazy_extractors_filename = sys.argv[1]
 if os.path.exists(lazy_extractors_filename):
     os.remove(lazy_extractors_filename)
 # Py2: may be confused by leftover lazy_extractors.pyc
-try:
-    os.remove(lazy_extractors_filename + 'c')
-except OSError:
-    pass
+if sys.version_info[0] < 3:
+    for c in ('c', 'o'):
+        try:
+            os.remove(lazy_extractors_filename + 'c')
+        except OSError:
+            pass
 
+from devscripts.utils import read_file, write_file
 from youtube_dl.compat import compat_register_utf8
 
 compat_register_utf8()
@@ -27,8 +29,7 @@ compat_register_utf8()
 from youtube_dl.extractor import _ALL_CLASSES
 from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
 
-with open('devscripts/lazy_load_template.py', 'rt') as f:
-    module_template = f.read()
+module_template = read_file('devscripts/lazy_load_template.py')
 
 
 def get_source(m):
@@ -114,10 +115,9 @@ for ie in ordered_cls:
 module_contents.append(
     '_ALL_CLASSES = [{0}]'.format(', '.join(names)))
 
-module_src = '\n'.join(module_contents) + '\n'
+module_src = '\n'.join(module_contents)
 
-with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
-    f.write(module_src)
+write_file(lazy_extractors_filename, module_src + '\n')
 
 # work around JVM byte code module limit in Jython
 if sys.platform.startswith('java') and sys.version_info[:2] == (2, 7):
diff --git a/devscripts/utils.py b/devscripts/utils.py
new file mode 100644
index 000000000..2d072d2e0
--- /dev/null
+++ b/devscripts/utils.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import argparse
+import functools
+import os.path
+import subprocess
+import sys
+
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
+from youtube_dl.compat import (
+    compat_kwargs,
+    compat_open as open,
+)
+
+
+def read_file(fname):
+    with open(fname, encoding='utf-8') as f:
+        return f.read()
+
+
+def write_file(fname, content, mode='w'):
+    with open(fname, mode, encoding='utf-8') as f:
+        return f.write(content)
+
+
+def read_version(fname='youtube_dl/version.py'):
+    """Get the version without importing the package"""
+    exec(compile(read_file(fname), fname, 'exec'))
+    return locals()['__version__']
+
+
+def get_filename_args(has_infile=False, default_outfile=None):
+    parser = argparse.ArgumentParser()
+    if has_infile:
+        parser.add_argument('infile', help='Input file')
+    kwargs = {'nargs': '?', 'default': default_outfile} if default_outfile else {}
+    kwargs['help'] = 'Output file'
+    parser.add_argument('outfile', **compat_kwargs(kwargs))
+
+    opts = parser.parse_args()
+    if has_infile:
+        return opts.infile, opts.outfile
+    return opts.outfile
+
+
+def compose_functions(*functions):
+    return lambda x: functools.reduce(lambda y, f: f(y), functions, x)
+
+
+def run_process(*args, **kwargs):
+    kwargs.setdefault('text', True)
+    kwargs.setdefault('check', True)
+    kwargs.setdefault('capture_output', True)
+    if kwargs['text']:
+        kwargs.setdefault('encoding', 'utf-8')
+        kwargs.setdefault('errors', 'replace')
+        kwargs = compat_kwargs(kwargs)
+    return subprocess.run(args, **kwargs)
diff --git a/test/test_execution.py b/test/test_execution.py
index 56e1b679d..9daaafa6c 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -8,14 +8,16 @@ import unittest
 import sys
 import os
 import subprocess
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+sys.path.insert(0, rootDir)
 
 from youtube_dl.compat import compat_register_utf8, compat_subprocess_get_DEVNULL
 from youtube_dl.utils import encodeArgument
 
 compat_register_utf8()
 
-rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
 _DEV_NULL = compat_subprocess_get_DEVNULL()
 
@@ -49,10 +51,10 @@ class TestExecution(unittest.TestCase):
             subprocess.check_call([sys.executable, os.path.normpath('devscripts/make_lazy_extractors.py'), lazy_extractors], cwd=rootDir, stdout=_DEV_NULL)
             subprocess.check_call([sys.executable, os.path.normpath('test/test_all_urls.py')], cwd=rootDir, stdout=_DEV_NULL)
         finally:
-            for x in ['', 'c'] if sys.version_info[0] < 3 else ['']:
+            for x in ('', 'c') if sys.version_info[0] < 3 else ('',):
                 try:
                     os.remove(lazy_extractors + x)
-                except (IOError, OSError):
+                except OSError:
                     pass
 
 

From a25e9f3c84a34d43f78a4e5a6f6c2e98e2a0ade3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 25 Jul 2023 00:17:15 +0100
Subject: [PATCH 120/156] [compat] Use `compat_open()`

---
 devscripts/make_readme.py                  |  4 +++-
 test/helper.py                             |  6 +++---
 test/test_InfoExtractor.py                 | 18 +++++++++---------
 test/test_YoutubeDL.py                     |  7 ++++---
 test/test_download.py                      |  6 +++---
 test/test_swfinterp.py                     | 10 ++++++----
 test/test_unicode_literals.py              | 12 +++++++-----
 test/test_write_annotations.py             |  5 ++---
 test/test_youtube_signature.py             |  9 ++++++---
 youtube_dl/YoutubeDL.py                    | 19 +++++++------------
 youtube_dl/cache.py                        |  8 +++++---
 youtube_dl/extractor/common.py             |  1 +
 youtube_dl/extractor/openload.py           |  1 +
 youtube_dl/postprocessor/embedthumbnail.py |  2 ++
 youtube_dl/postprocessor/ffmpeg.py         |  8 ++++----
 youtube_dl/update.py                       |  7 +++++--
 16 files changed, 68 insertions(+), 55 deletions(-)

diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py
index 8fbce0796..c5d5dd4f1 100755
--- a/devscripts/make_readme.py
+++ b/devscripts/make_readme.py
@@ -4,6 +4,8 @@ import io
 import sys
 import re
 
+from youtube_dl.compat import compat_open as open
+
 README_FILE = 'README.md'
 helptext = sys.stdin.read()
 
@@ -20,7 +22,7 @@ options = helptext[helptext.index('  General Options:') + 19:]
 options = re.sub(r'(?m)^  (\w.+)$', r'## \1', options)
 options = '# OPTIONS\n' + options + '\n'
 
-with io.open(README_FILE, 'w', encoding='utf-8') as f:
+with open(README_FILE, 'w', encoding='utf-8') as f:
     f.write(header)
     f.write(options)
     f.write(footer)
diff --git a/test/helper.py b/test/helper.py
index aa99001b2..fc55c6b46 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -1,7 +1,6 @@
 from __future__ import unicode_literals
 
 import errno
-import io
 import hashlib
 import json
 import os.path
@@ -14,6 +13,7 @@ import unittest
 import youtube_dl.extractor
 from youtube_dl import YoutubeDL
 from youtube_dl.compat import (
+    compat_open as open,
     compat_os_name,
     compat_str,
 )
@@ -29,10 +29,10 @@ def get_params(override=None):
                                    "parameters.json")
     LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                          "local_parameters.json")
-    with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
+    with open(PARAMETERS_FILE, encoding='utf-8') as pf:
         parameters = json.load(pf)
     if os.path.exists(LOCAL_PARAMETERS_FILE):
-        with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
+        with open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
             parameters.update(json.load(pf))
     if override:
         parameters.update(override)
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 34773fbd0..3f96645de 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -3,7 +3,6 @@
 from __future__ import unicode_literals
 
 # Allow direct execution
-import io
 import os
 import sys
 import unittest
@@ -21,6 +20,7 @@ from test.helper import (
 from youtube_dl.compat import (
     compat_etree_fromstring,
     compat_http_server,
+    compat_open as open,
 )
 from youtube_dl.extractor.common import InfoExtractor
 from youtube_dl.extractor import (
@@ -902,8 +902,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
         ]
 
         for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
-            with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
-                         mode='r', encoding='utf-8') as f:
+            with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
+                      mode='r', encoding='utf-8') as f:
                 formats = self.ie._parse_m3u8_formats(
                     f.read(), m3u8_url, ext='mp4')
                 self.ie._sort_formats(formats)
@@ -1127,8 +1127,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
         ]
 
         for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES:
-            with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
-                         mode='r', encoding='utf-8') as f:
+            with open('./test/testdata/mpd/%s.mpd' % mpd_file,
+                      mode='r', encoding='utf-8') as f:
                 formats = self.ie._parse_mpd_formats(
                     compat_etree_fromstring(f.read().encode('utf-8')),
                     mpd_base_url=mpd_base_url, mpd_url=mpd_url)
@@ -1154,8 +1154,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
         ]
 
         for f4m_file, f4m_url, expected_formats in _TEST_CASES:
-            with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
-                         mode='r', encoding='utf-8') as f:
+            with open('./test/testdata/f4m/%s.f4m' % f4m_file,
+                      mode='r', encoding='utf-8') as f:
                 formats = self.ie._parse_f4m_formats(
                     compat_etree_fromstring(f.read().encode('utf-8')),
                     f4m_url, None)
@@ -1202,8 +1202,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
         ]
 
         for xspf_file, xspf_url, expected_entries in _TEST_CASES:
-            with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
-                         mode='r', encoding='utf-8') as f:
+            with open('./test/testdata/xspf/%s.xspf' % xspf_file,
+                      mode='r', encoding='utf-8') as f:
                 entries = self.ie._parse_xspf(
                     compat_etree_fromstring(f.read().encode('utf-8')),
                     xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 6cf555827..d994682b2 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -22,6 +22,7 @@ from youtube_dl.compat import (
     compat_http_cookiejar_Cookie,
     compat_http_cookies_SimpleCookie,
     compat_kwargs,
+    compat_open as open,
     compat_str,
     compat_urllib_error,
 )
@@ -701,12 +702,12 @@ class TestYoutubeDL(unittest.TestCase):
 
         class SimplePP(PostProcessor):
             def run(self, info):
-                with open(audiofile, 'wt') as f:
+                with open(audiofile, 'w') as f:
                     f.write('EXAMPLE')
                 return [info['filepath']], info
 
         def run_pp(params, PP):
-            with open(filename, 'wt') as f:
+            with open(filename, 'w') as f:
                 f.write('EXAMPLE')
             ydl = YoutubeDL(params)
             ydl.add_post_processor(PP())
@@ -725,7 +726,7 @@ class TestYoutubeDL(unittest.TestCase):
 
         class ModifierPP(PostProcessor):
             def run(self, info):
-                with open(info['filepath'], 'wt') as f:
+                with open(info['filepath'], 'w') as f:
                     f.write('MODIFIED')
                 return [], info
 
diff --git a/test/test_download.py b/test/test_download.py
index d50008307..e0bc8cb95 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -20,15 +20,15 @@ from test.helper import (
 
 
 import hashlib
-import io
 import json
 import socket
 
 import youtube_dl.YoutubeDL
 from youtube_dl.compat import (
     compat_http_client,
-    compat_urllib_error,
     compat_HTTPError,
+    compat_open as open,
+    compat_urllib_error,
 )
 from youtube_dl.utils import (
     DownloadError,
@@ -245,7 +245,7 @@ def generator(test_case, tname):
                 self.assertTrue(
                     os.path.exists(info_json_fn),
                     'Missing info file %s' % info_json_fn)
-                with io.open(info_json_fn, encoding='utf-8') as infof:
+                with open(info_json_fn, encoding='utf-8') as infof:
                     info_dict = json.load(infof)
                 expect_info_dict(self, info_dict, tc.get('info_dict', {}))
         finally:
diff --git a/test/test_swfinterp.py b/test/test_swfinterp.py
index 9f18055e6..7c282ee00 100644
--- a/test/test_swfinterp.py
+++ b/test/test_swfinterp.py
@@ -5,16 +5,18 @@ from __future__ import unicode_literals
 import os
 import sys
 import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
 
 import errno
-import io
 import json
 import re
 import subprocess
 
 from youtube_dl.swfinterp import SWFInterpreter
+from youtube_dl.compat import compat_open as open
 
 
 TEST_DIR = os.path.join(
@@ -43,7 +45,7 @@ def _make_testfunc(testfile):
                     '-static-link-runtime-shared-libraries', as_file])
             except OSError as ose:
                 if ose.errno == errno.ENOENT:
-                    print('mxmlc not found! Skipping test.')
+                    self.skipTest('mxmlc not found!')
                     return
                 raise
 
@@ -51,7 +53,7 @@ def _make_testfunc(testfile):
             swf_content = swf_f.read()
         swfi = SWFInterpreter(swf_content)
 
-        with io.open(as_file, 'r', encoding='utf-8') as as_f:
+        with open(as_file, 'r', encoding='utf-8') as as_f:
             as_content = as_f.read()
 
         def _find_spec(key):
diff --git a/test/test_unicode_literals.py b/test/test_unicode_literals.py
index c7c2252f5..0c83f2a0c 100644
--- a/test/test_unicode_literals.py
+++ b/test/test_unicode_literals.py
@@ -2,14 +2,15 @@ from __future__ import unicode_literals
 
 # Allow direct execution
 import os
+import re
 import sys
 import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-import io
-import re
+dirn = os.path.dirname
 
-rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+rootDir = dirn(dirn(os.path.abspath(__file__)))
+
+sys.path.insert(0, rootDir)
 
 IGNORED_FILES = [
     'setup.py',  # http://bugs.python.org/issue13943
@@ -24,6 +25,7 @@ IGNORED_DIRS = [
 ]
 
 from test.helper import assertRegexpMatches
+from youtube_dl.compat import compat_open as open
 
 
 class TestUnicodeLiterals(unittest.TestCase):
@@ -41,7 +43,7 @@ class TestUnicodeLiterals(unittest.TestCase):
                     continue
 
                 fn = os.path.join(dirpath, basename)
-                with io.open(fn, encoding='utf-8') as inf:
+                with open(fn, encoding='utf-8') as inf:
                     code = inf.read()
 
                 if "'" not in code and '"' not in code:
diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py
index 41abdfe3b..68e0a391d 100644
--- a/test/test_write_annotations.py
+++ b/test/test_write_annotations.py
@@ -11,12 +11,11 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from test.helper import get_params, try_rm
 
 
-import io
-
 import xml.etree.ElementTree
 
 import youtube_dl.YoutubeDL
 import youtube_dl.extractor
+from youtube_dl.compat import compat_open as open
 
 
 class YoutubeDL(youtube_dl.YoutubeDL):
@@ -51,7 +50,7 @@ class TestAnnotations(unittest.TestCase):
         ydl.download([TEST_ID])
         self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
         annoxml = None
-        with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
+        with open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
             annoxml = xml.etree.ElementTree.parse(annof)
         self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
         root = annoxml.getroot()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 5dcabaf95..f45dfec7c 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -8,11 +8,14 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-import io
 import re
 import string
 
-from youtube_dl.compat import compat_str, compat_urlretrieve
+from youtube_dl.compat import (
+    compat_open as open,
+    compat_str,
+    compat_urlretrieve,
+)
 
 from test.helper import FakeYDL
 from youtube_dl.extractor import YoutubeIE
@@ -208,7 +211,7 @@ def t_factory(name, sig_func, url_pattern):
 
             if not os.path.exists(fn):
                 compat_urlretrieve(url, fn)
-            with io.open(fn, encoding='utf-8') as testf:
+            with open(fn, encoding='utf-8') as testf:
                 jscode = testf.read()
             self.assertEqual(sig_func(jscode, sig_input), expected_sig)
 
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 98d080f43..6a12f91e4 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -4,11 +4,9 @@
 from __future__ import absolute_import, unicode_literals
 
 import collections
-import contextlib
 import copy
 import datetime
 import errno
-import fileinput
 import io
 import itertools
 import json
@@ -45,6 +43,7 @@ from .compat import (
     compat_kwargs,
     compat_map as map,
     compat_numeric_types,
+    compat_open as open,
     compat_os_name,
     compat_str,
     compat_tokenize_tokenize,
@@ -1977,7 +1976,7 @@ class YoutubeDL(object):
             else:
                 try:
                     self.to_screen('[info] Writing video description to: ' + descfn)
-                    with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+                    with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
                         descfile.write(info_dict['description'])
                 except (OSError, IOError):
                     self.report_error('Cannot write description file ' + descfn)
@@ -1992,7 +1991,7 @@ class YoutubeDL(object):
             else:
                 try:
                     self.to_screen('[info] Writing video annotations to: ' + annofn)
-                    with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
+                    with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
                         annofile.write(info_dict['annotations'])
                 except (KeyError, TypeError):
                     self.report_warning('There are no annotations to write.')
@@ -2019,7 +2018,7 @@ class YoutubeDL(object):
                         try:
                             # Use newline='' to prevent conversion of newline characters
                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
-                            with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
+                            with open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
                                 subfile.write(sub_info['data'])
                         except (OSError, IOError):
                             self.report_error('Cannot write subtitles file ' + sub_filename)
@@ -2028,7 +2027,7 @@ class YoutubeDL(object):
                         try:
                             sub_data = ie._request_webpage(
                                 sub_info['url'], info_dict['id'], note=False).read()
-                            with io.open(encodeFilename(sub_filename), 'wb') as subfile:
+                            with open(encodeFilename(sub_filename), 'wb') as subfile:
                                 subfile.write(sub_data)
                         except (ExtractorError, IOError, OSError, ValueError) as err:
                             self.report_warning('Unable to download subtitle for "%s": %s' %
@@ -2232,12 +2231,8 @@ class YoutubeDL(object):
         return self._download_retcode
 
     def download_with_info_file(self, info_filename):
-        with contextlib.closing(fileinput.FileInput(
-                [info_filename], mode='r',
-                openhook=fileinput.hook_encoded('utf-8'))) as f:
-            # FileInput doesn't have a read method, we can't call json.load
-            # TODO: let's use io.open(), then
-            info = self.filter_requested_info(json.loads('\n'.join(f)))
+        with open(info_filename, encoding='utf-8') as f:
+            info = self.filter_requested_info(json.load(f))
         try:
             self.process_ie_result(info, download=True)
         except DownloadError:
diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py
index 4822439d0..54123da0e 100644
--- a/youtube_dl/cache.py
+++ b/youtube_dl/cache.py
@@ -1,14 +1,16 @@
 from __future__ import unicode_literals
 
 import errno
-import io
 import json
 import os
 import re
 import shutil
 import traceback
 
-from .compat import compat_getenv
+from .compat import (
+    compat_getenv,
+    compat_open as open,
+)
 from .utils import (
     error_to_compat_str,
     expand_path,
@@ -83,7 +85,7 @@ class Cache(object):
         cache_fn = self._get_cache_fn(section, key, dtype)
         try:
             try:
-                with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
+                with open(cache_fn, 'r', encoding='utf-8') as cachef:
                     return self._validate(json.load(cachef), min_ver)
             except ValueError:
                 try:
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 7f416d312..0eca9f844 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -25,6 +25,7 @@ from ..compat import (
     compat_integer_types,
     compat_http_client,
     compat_map as map,
+    compat_open as open,
     compat_os_name,
     compat_str,
     compat_urllib_error,
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py
index b05d60435..45b1add73 100644
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@@ -7,6 +7,7 @@ import subprocess
 import tempfile
 
 from ..compat import (
+    compat_open as open,
     compat_urlparse,
     compat_kwargs,
 )
diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py
index 5e7b6e2df..b6c60e127 100644
--- a/youtube_dl/postprocessor/embedthumbnail.py
+++ b/youtube_dl/postprocessor/embedthumbnail.py
@@ -18,6 +18,8 @@ from ..utils import (
     shell_quote,
 )
 
+from ..compat import compat_open as open
+
 
 class EmbedThumbnailPPError(PostProcessingError):
     pass
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 8c29c8d59..801160e6c 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
-import io
 import os
 import subprocess
 import time
@@ -9,6 +8,7 @@ import re
 
 from .common import AudioConversionError, PostProcessor
 
+from ..compat import compat_open as open
 from ..utils import (
     encodeArgument,
     encodeFilename,
@@ -493,7 +493,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
         chapters = info.get('chapters', [])
         if chapters:
             metadata_filename = replace_extension(filename, 'meta')
-            with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
+            with open(metadata_filename, 'w', encoding='utf-8') as f:
                 def ffmpeg_escape(text):
                     return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
 
@@ -636,7 +636,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
                 with open(dfxp_file, 'rb') as f:
                     srt_data = dfxp2srt(f.read())
 
-                with io.open(srt_file, 'wt', encoding='utf-8') as f:
+                with open(srt_file, 'w', encoding='utf-8') as f:
                     f.write(srt_data)
                 old_file = srt_file
 
@@ -652,7 +652,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
 
             self.run_ffmpeg(old_file, new_file, ['-f', new_format])
 
-            with io.open(new_file, 'rt', encoding='utf-8') as f:
+            with open(new_file, 'r', encoding='utf-8') as f:
                 subs[lang] = {
                     'ext': new_ext,
                     'data': f.read(),
diff --git a/youtube_dl/update.py b/youtube_dl/update.py
index 84c964617..b5f26e4a9 100644
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -9,7 +9,10 @@ import subprocess
 import sys
 from zipimport import zipimporter
 
-from .compat import compat_realpath
+from .compat import (
+    compat_open as open,
+    compat_realpath,
+)
 from .utils import encode_compat_str
 
 from .version import __version__
@@ -127,7 +130,7 @@ def update_self(to_screen, verbose, opener):
 
         try:
             bat = os.path.join(directory, 'youtube-dl-updater.bat')
-            with io.open(bat, 'w') as batfile:
+            with open(bat, 'w') as batfile:
                 batfile.write('''
 @echo off
 echo Waiting for file handle to be closed ...

From b87018122995acb7e6a1be3f2464605259b93611 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 25 Jul 2023 00:22:54 +0100
Subject: [PATCH 121/156] [build] Extend use of `devscripts/utils`

---
 devscripts/bash-completion.py            | 11 +++++++----
 devscripts/create-github-release.py      |  9 +++++----
 devscripts/fish-completion.py            | 11 ++++++-----
 devscripts/gh-pages/add-version.py       | 15 ++++++++++-----
 devscripts/gh-pages/generate-download.py | 17 ++++++++++++-----
 devscripts/gh-pages/update-copyright.py  | 17 +++++++++++------
 devscripts/gh-pages/update-feed.py       | 11 ++++++++---
 devscripts/gh-pages/update-sites.py      | 11 ++++++-----
 devscripts/make_contributing.py          |  9 ++++-----
 devscripts/make_issue_template.py        | 17 +++++++----------
 devscripts/make_readme.py                | 11 +++++++----
 devscripts/make_supportedsites.py        | 15 ++++++++-------
 devscripts/prepare_manpage.py            | 10 ++++------
 devscripts/zsh-completion.py             |  8 ++++----
 youtube_dl/update.py                     |  1 -
 15 files changed, 99 insertions(+), 74 deletions(-)

diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py
index 3d1391334..7db396a77 100755
--- a/devscripts/bash-completion.py
+++ b/devscripts/bash-completion.py
@@ -5,8 +5,12 @@ import os
 from os.path import dirname as dirn
 import sys
 
-sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
 import youtube_dl
+from youtube_dl.compat import compat_open as open
+
+from utils import read_file
 
 BASH_COMPLETION_FILE = "youtube-dl.bash-completion"
 BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
@@ -18,9 +22,8 @@ def build_completion(opt_parser):
         for option in group.option_list:
             # for every long flag
             opts_flag.append(option.get_opt_string())
-    with open(BASH_COMPLETION_TEMPLATE) as f:
-        template = f.read()
-    with open(BASH_COMPLETION_FILE, "w") as f:
+    template = read_file(BASH_COMPLETION_TEMPLATE)
+    with open(BASH_COMPLETION_FILE, "w", encoding='utf-8') as f:
         # just using the special char
         filled_template = template.replace("{{flags}}", " ".join(opts_flag))
         f.write(filled_template)
diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py
index 2ddfa1096..320bcfc27 100644
--- a/devscripts/create-github-release.py
+++ b/devscripts/create-github-release.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 from __future__ import unicode_literals
 
-import io
 import json
 import mimetypes
 import netrc
@@ -10,7 +9,9 @@ import os
 import re
 import sys
 
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
 
 from youtube_dl.compat import (
     compat_basestring,
@@ -22,6 +23,7 @@ from youtube_dl.utils import (
     make_HTTPS_handler,
     sanitized_Request,
 )
+from utils import read_file
 
 
 class GitHubReleaser(object):
@@ -89,8 +91,7 @@ def main():
 
     changelog_file, version, build_path = args
 
-    with io.open(changelog_file, encoding='utf-8') as inf:
-        changelog = inf.read()
+    changelog = read_file(changelog_file)
 
     mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog)
     body = mobj.group(1) if mobj else ''
diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py
index 51d19dd33..267ba6a58 100755
--- a/devscripts/fish-completion.py
+++ b/devscripts/fish-completion.py
@@ -6,10 +6,13 @@ import os
 from os.path import dirname as dirn
 import sys
 
-sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
 import youtube_dl
 from youtube_dl.utils import shell_quote
 
+from utils import read_file, write_file
+
 FISH_COMPLETION_FILE = 'youtube-dl.fish'
 FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
 
@@ -38,11 +41,9 @@ def build_completion(opt_parser):
             complete_cmd.extend(EXTRA_ARGS.get(long_option, []))
             commands.append(shell_quote(complete_cmd))
 
-    with open(FISH_COMPLETION_TEMPLATE) as f:
-        template = f.read()
+    template = read_file(FISH_COMPLETION_TEMPLATE)
     filled_template = template.replace('{{commands}}', '\n'.join(commands))
-    with open(FISH_COMPLETION_FILE, 'w') as f:
-        f.write(filled_template)
+    write_file(filled_template)
 
 
 parser = youtube_dl.parseOpts()[0]
diff --git a/devscripts/gh-pages/add-version.py b/devscripts/gh-pages/add-version.py
index 867ea0048..b84908f85 100755
--- a/devscripts/gh-pages/add-version.py
+++ b/devscripts/gh-pages/add-version.py
@@ -6,16 +6,21 @@ import sys
 import hashlib
 import os.path
 
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__)))))
+
+from devscripts.utils import read_file, write_file
+from youtube_dl.compat import compat_open as open
 
 if len(sys.argv) <= 1:
     print('Specify the version number as parameter')
     sys.exit()
 version = sys.argv[1]
 
-with open('update/LATEST_VERSION', 'w') as f:
-    f.write(version)
+write_file('update/LATEST_VERSION', version)
 
-versions_info = json.load(open('update/versions.json'))
+versions_info = json.loads(read_file('update/versions.json'))
 if 'signature' in versions_info:
     del versions_info['signature']
 
@@ -39,5 +44,5 @@ for key, filename in filenames.items():
 versions_info['versions'][version] = new_version
 versions_info['latest'] = version
 
-with open('update/versions.json', 'w') as jsonf:
-    json.dump(versions_info, jsonf, indent=4, sort_keys=True)
+with open('update/versions.json', 'w', encoding='utf-8') as jsonf:
+    json.dumps(versions_info, jsonf, indent=4, sort_keys=True)
diff --git a/devscripts/gh-pages/generate-download.py b/devscripts/gh-pages/generate-download.py
index a873d32ee..3e38e9299 100755
--- a/devscripts/gh-pages/generate-download.py
+++ b/devscripts/gh-pages/generate-download.py
@@ -2,14 +2,21 @@
 from __future__ import unicode_literals
 
 import json
+import os.path
+import sys
 
-versions_info = json.load(open('update/versions.json'))
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+
+from utils import read_file, write_file
+
+versions_info = json.loads(read_file('update/versions.json'))
 version = versions_info['latest']
 version_dict = versions_info['versions'][version]
 
 # Read template page
-with open('download.html.in', 'r', encoding='utf-8') as tmplf:
-    template = tmplf.read()
+template = read_file('download.html.in')
 
 template = template.replace('@PROGRAM_VERSION@', version)
 template = template.replace('@PROGRAM_URL@', version_dict['bin'][0])
@@ -18,5 +25,5 @@ template = template.replace('@EXE_URL@', version_dict['exe'][0])
 template = template.replace('@EXE_SHA256SUM@', version_dict['exe'][1])
 template = template.replace('@TAR_URL@', version_dict['tar'][0])
 template = template.replace('@TAR_SHA256SUM@', version_dict['tar'][1])
-with open('download.html', 'w', encoding='utf-8') as dlf:
-    dlf.write(template)
+
+write_file('download.html', template)
diff --git a/devscripts/gh-pages/update-copyright.py b/devscripts/gh-pages/update-copyright.py
index 61487f925..444595c48 100755
--- a/devscripts/gh-pages/update-copyright.py
+++ b/devscripts/gh-pages/update-copyright.py
@@ -5,17 +5,22 @@ from __future__ import with_statement, unicode_literals
 
 import datetime
 import glob
-import io  # For Python 2 compatibility
 import os
 import re
+import sys
 
-year = str(datetime.datetime.now().year)
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__)))))
+
+from devscripts.utils import read_file, write_file
+from youtube_dl import compat_str
+
+year = compat_str(datetime.datetime.now().year)
 for fn in glob.glob('*.html*'):
-    with io.open(fn, encoding='utf-8') as f:
-        content = f.read()
+    content = read_file(fn)
     newc = re.sub(r'(?P<copyright>Copyright © 2011-)(?P<year>[0-9]{4})', 'Copyright © 2011-' + year, content)
     if content != newc:
         tmpFn = fn + '.part'
-        with io.open(tmpFn, 'wt', encoding='utf-8') as outf:
-            outf.write(newc)
+        write_file(tmpFn, newc)
         os.rename(tmpFn, fn)
diff --git a/devscripts/gh-pages/update-feed.py b/devscripts/gh-pages/update-feed.py
index 506a62377..13a367d34 100755
--- a/devscripts/gh-pages/update-feed.py
+++ b/devscripts/gh-pages/update-feed.py
@@ -2,10 +2,16 @@
 from __future__ import unicode_literals
 
 import datetime
-import io
 import json
+import os.path
 import textwrap
+import sys
 
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
+from utils import write_file
 
 atom_template = textwrap.dedent("""\
     <?xml version="1.0" encoding="utf-8"?>
@@ -72,5 +78,4 @@ for v in versions:
 entries_str = textwrap.indent(''.join(entries), '\t')
 atom_template = atom_template.replace('@ENTRIES@', entries_str)
 
-with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file:
-    atom_file.write(atom_template)
+write_file('update/releases.atom', atom_template)
diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py
index 531c93c70..06a8a474c 100755
--- a/devscripts/gh-pages/update-sites.py
+++ b/devscripts/gh-pages/update-sites.py
@@ -5,15 +5,17 @@ import sys
 import os
 import textwrap
 
+dirn = os.path.dirname
+
 # We must be able to import youtube_dl
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__)))))
 
 import youtube_dl
+from devscripts.utils import read_file, write_file
 
 
 def main():
-    with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
-        template = tmplf.read()
+    template = read_file('supportedsites.html.in')
 
     ie_htmls = []
     for ie in youtube_dl.list_extractors(age_limit=None):
@@ -29,8 +31,7 @@ def main():
 
     template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
 
-    with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
-        sitesf.write(template)
+    write_file('supportedsites.html', template)
 
 
 if __name__ == '__main__':
diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py
index 226d1a5d6..5a9eb194f 100755
--- a/devscripts/make_contributing.py
+++ b/devscripts/make_contributing.py
@@ -1,10 +1,11 @@
 #!/usr/bin/env python
 from __future__ import unicode_literals
 
-import io
 import optparse
 import re
 
+from utils import read_file, write_file
+
 
 def main():
     parser = optparse.OptionParser(usage='%prog INFILE OUTFILE')
@@ -14,8 +15,7 @@ def main():
 
     infile, outfile = args
 
-    with io.open(infile, encoding='utf-8') as inf:
-        readme = inf.read()
+    readme = read_file(infile)
 
     bug_text = re.search(
         r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1)
@@ -25,8 +25,7 @@ def main():
 
     out = bug_text + dev_text
 
-    with io.open(outfile, 'w', encoding='utf-8') as outf:
-        outf.write(out)
+    write_file(outfile, out)
 
 
 if __name__ == '__main__':
diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py
index b7ad23d83..65fa8169f 100644
--- a/devscripts/make_issue_template.py
+++ b/devscripts/make_issue_template.py
@@ -1,8 +1,11 @@
 #!/usr/bin/env python
 from __future__ import unicode_literals
 
-import io
 import optparse
+import os.path
+import sys
+
+from utils import read_file, read_version, write_file
 
 
 def main():
@@ -13,17 +16,11 @@ def main():
 
     infile, outfile = args
 
-    with io.open(infile, encoding='utf-8') as inf:
-        issue_template_tmpl = inf.read()
+    issue_template_tmpl = read_file(infile)
 
-    # Get the version from youtube_dl/version.py without importing the package
-    exec(compile(open('youtube_dl/version.py').read(),
-                 'youtube_dl/version.py', 'exec'))
+    out = issue_template_tmpl % {'version': read_version()}
 
-    out = issue_template_tmpl % {'version': locals()['__version__']}
-
-    with io.open(outfile, 'w', encoding='utf-8') as outf:
-        outf.write(out)
+    write_file(outfile, out)
 
 if __name__ == '__main__':
     main()
diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py
index c5d5dd4f1..7a5b04dcc 100755
--- a/devscripts/make_readme.py
+++ b/devscripts/make_readme.py
@@ -1,9 +1,13 @@
 from __future__ import unicode_literals
 
-import io
-import sys
+import os.path
 import re
+import sys
+dirn = os.path.dirname
 
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
+from utils import read_file
 from youtube_dl.compat import compat_open as open
 
 README_FILE = 'README.md'
@@ -12,8 +16,7 @@ helptext = sys.stdin.read()
 if isinstance(helptext, bytes):
     helptext = helptext.decode('utf-8')
 
-with io.open(README_FILE, encoding='utf-8') as f:
-    oldreadme = f.read()
+oldreadme = read_file(README_FILE)
 
 header = oldreadme[:oldreadme.index('# OPTIONS')]
 footer = oldreadme[oldreadme.index('# CONFIGURATION'):]
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
index 764795bc5..c424d18d7 100644
--- a/devscripts/make_supportedsites.py
+++ b/devscripts/make_supportedsites.py
@@ -1,17 +1,19 @@
 #!/usr/bin/env python
 from __future__ import unicode_literals
 
-import io
 import optparse
-import os
+import os.path
 import sys
 
-
 # Import youtube_dl
-ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
-sys.path.insert(0, ROOT_DIR)
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
 import youtube_dl
 
+from utils import write_file
+
 
 def main():
     parser = optparse.OptionParser(usage='%prog OUTFILE.md')
@@ -38,8 +40,7 @@ def main():
         ' - ' + md + '\n'
         for md in gen_ies_md(ies))
 
-    with io.open(outfile, 'w', encoding='utf-8') as outf:
-        outf.write(out)
+    write_file(outfile, out)
 
 
 if __name__ == '__main__':
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py
index 76bf873e1..0090ada3e 100644
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@@ -1,13 +1,13 @@
 from __future__ import unicode_literals
 
-import io
 import optparse
 import os.path
 import re
 
+from utils import read_file, write_file
+
 ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 README_FILE = os.path.join(ROOT_DIR, 'README.md')
-
 PREFIX = r'''%YOUTUBE-DL(1)
 
 # NAME
@@ -29,8 +29,7 @@ def main():
 
     outfile, = args
 
-    with io.open(README_FILE, encoding='utf-8') as f:
-        readme = f.read()
+    readme = read_file(README_FILE)
 
     readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
     readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
@@ -38,8 +37,7 @@ def main():
 
     readme = filter_options(readme)
 
-    with io.open(outfile, 'w', encoding='utf-8') as outf:
-        outf.write(readme)
+    write_file(outfile, readme)
 
 
 def filter_options(readme):
diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py
index 60aaf76cc..ebd552fcb 100755
--- a/devscripts/zsh-completion.py
+++ b/devscripts/zsh-completion.py
@@ -7,6 +7,8 @@ import sys
 
 sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
 import youtube_dl
+from utils import read_file, write_file
+
 
 ZSH_COMPLETION_FILE = "youtube-dl.zsh"
 ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in"
@@ -34,15 +36,13 @@ def build_completion(opt_parser):
 
     flags = [opt.get_opt_string() for opt in opts]
 
-    with open(ZSH_COMPLETION_TEMPLATE) as f:
-        template = f.read()
+    template = read_file(ZSH_COMPLETION_TEMPLATE)
 
     template = template.replace("{{fileopts}}", "|".join(fileopts))
     template = template.replace("{{diropts}}", "|".join(diropts))
     template = template.replace("{{flags}}", " ".join(flags))
 
-    with open(ZSH_COMPLETION_FILE, "w") as f:
-        f.write(template)
+    write_file(ZSH_COMPLETION_FILE, template)
 
 
 parser = youtube_dl.parseOpts()[0]
diff --git a/youtube_dl/update.py b/youtube_dl/update.py
index b5f26e4a9..a147b5253 100644
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
-import io
 import json
 import traceback
 import hashlib

From 0861812d7208310a03909502b1610f5e89d04401 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 25 Jul 2023 15:11:15 +0100
Subject: [PATCH 122/156] [build] Fix typo in `devscripts/fish-completion.py`
 (fix 2285605)

---
 devscripts/fish-completion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py
index 267ba6a58..ef8a39e0b 100755
--- a/devscripts/fish-completion.py
+++ b/devscripts/fish-completion.py
@@ -43,7 +43,7 @@ def build_completion(opt_parser):
 
     template = read_file(FISH_COMPLETION_TEMPLATE)
     filled_template = template.replace('{{commands}}', '\n'.join(commands))
-    write_file(filled_template)
+    write_file(FISH_COMPLETION_FILE, filled_template)
 
 
 parser = youtube_dl.parseOpts()[0]

From 87e578c9b891b29ab6559ac81ed391897b1e1ace Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 28 Jul 2023 10:52:04 +0100
Subject: [PATCH 123/156] [workflows/ci.yml] Update to setup-java@v3

* avoid Node 12 deprecation
---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 6b91edd6c..a73bedae1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -278,7 +278,7 @@ jobs:
     #-------- Jython ------
     - name: Set up Java 8
       if: ${{ matrix.python-impl == 'jython' }}
-      uses: actions/setup-java@v2
+      uses: actions/setup-java@v3
       with:
         java-version: 8
         distribution: 'zulu'

From e7926ae9f4e5fa258696551a39295402819280c9 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 28 Jul 2023 06:03:14 +0100
Subject: [PATCH 124/156] [utils] Rework decoding of `Content-Encoding`s

* support nested encodings
* support optional `br` encoding, if brotli package is installed
* support optional 'compress' encoding, if ncompress package is installed
* response `Content-Encoding` has only unprocessed encodings, or removed
* response `Content-Length` is decoded length (usable for filesize metadata)
* use zlib for both deflate and gzip decompression
* some elements taken from yt-dlp: thx especially coletdjnz
---
 test/test_http.py    |  16 ++----
 youtube_dl/compat.py |  14 +++++
 youtube_dl/utils.py  | 120 ++++++++++++++++++++++++++++++++-----------
 3 files changed, 107 insertions(+), 43 deletions(-)

diff --git a/test/test_http.py b/test/test_http.py
index 89580969d..793bea359 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -461,33 +461,23 @@ class TestHTTP(unittest.TestCase):
                 sanitized_Request(
                     self._test_url('content-encoding'),
                     headers={'ytdl-encoding': encoding}))
-            self.assertEqual(res.headers.get('Content-Encoding'), encoding)
+            # decoded encodings are removed: only check for valid decompressed data
             self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
 
     @unittest.skipUnless(brotli, 'brotli support is not installed')
-    @unittest.expectedFailure
     def test_brotli(self):
         self.__test_compression('br')
 
-    @unittest.expectedFailure
     def test_deflate(self):
         self.__test_compression('deflate')
 
-    @unittest.expectedFailure
     def test_gzip(self):
         self.__test_compression('gzip')
 
-    @unittest.expectedFailure  # not yet implemented
     def test_multiple_encodings(self):
         # https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4
-        with FakeYDL() as ydl:
-            for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
-                res = ydl.urlopen(
-                    sanitized_Request(
-                        self._test_url('content-encoding'),
-                        headers={'ytdl-encoding': pair}))
-                self.assertEqual(res.headers.get('Content-Encoding'), pair)
-                self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
+        for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
+            self.__test_compression(pair)
 
     def test_unsupported_encoding(self):
         # it should return the raw content
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index da6d70ec4..54ad64674 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -3200,6 +3200,18 @@ except AttributeError:
     def compat_datetime_timedelta_total_seconds(td):
         return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
 
+# optional decompression packages
+# PyPi brotli package implements 'br' Content-Encoding
+try:
+    import brotli as compat_brotli
+except ImportError:
+    compat_brotli = None
+# PyPi ncompress package implements 'compress' Content-Encoding
+try:
+    import ncompress as compat_ncompress
+except ImportError:
+    compat_ncompress = None
+
 
 legacy = [
     'compat_HTMLParseError',
@@ -3234,6 +3246,7 @@ __all__ = [
     'compat_Struct',
     'compat_base64_b64decode',
     'compat_basestring',
+    'compat_brotli',
     'compat_casefold',
     'compat_chr',
     'compat_collections_abc',
@@ -3259,6 +3272,7 @@ __all__ = [
     'compat_itertools_zip_longest',
     'compat_kwargs',
     'compat_map',
+    'compat_ncompress',
     'compat_numeric_types',
     'compat_open',
     'compat_ord',
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index b5475434f..e73291107 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -15,7 +15,6 @@ import email.utils
 import email.header
 import errno
 import functools
-import gzip
 import inspect
 import io
 import itertools
@@ -42,6 +41,7 @@ from .compat import (
     compat_HTMLParseError,
     compat_HTMLParser,
     compat_basestring,
+    compat_brotli as brotli,
     compat_casefold,
     compat_chr,
     compat_collections_abc,
@@ -55,6 +55,7 @@ from .compat import (
     compat_http_client,
     compat_integer_types,
     compat_kwargs,
+    compat_ncompress as ncompress,
     compat_os_name,
     compat_re_Match,
     compat_re_Pattern,
@@ -2638,11 +2639,44 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
             req)
 
     @staticmethod
-    def deflate(data):
+    def deflate_gz(data):
         try:
-            return zlib.decompress(data, -zlib.MAX_WBITS)
+            # format:zlib,gzip + windowsize:32768
+            return data and zlib.decompress(data, 32 + zlib.MAX_WBITS)
         except zlib.error:
-            return zlib.decompress(data)
+            # raw zlib * windowsize:32768 (RFC 9110: "non-conformant")
+            return zlib.decompress(data, -zlib.MAX_WBITS)
+
+    @staticmethod
+    def gzip(data):
+
+        from gzip import GzipFile
+
+        def _gzip(data):
+            with io.BytesIO(data) as data_buf:
+                gz = GzipFile(fileobj=data_buf, mode='rb')
+                return gz.read()
+
+        try:
+            return _gzip(data)
+        except IOError as original_ioerror:
+            # There may be junk at the end of the file
+            # See http://stackoverflow.com/q/4928560/35070 for details
+            for i in range(1, 1024):
+                try:
+                    return _gzip(data[:-i])
+                except IOError:
+                    continue
+            else:
+                raise original_ioerror
+
+    @staticmethod
+    def brotli(data):
+        return data and brotli.decompress(data)
+
+    @staticmethod
+    def compress(data):
+        return data and ncompress.decompress(data)
 
     def http_request(self, req):
         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
@@ -2679,33 +2713,59 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 
     def http_response(self, req, resp):
         old_resp = resp
-        # gzip
-        if resp.headers.get('Content-encoding', '') == 'gzip':
-            content = resp.read()
-            gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
-            try:
-                uncompressed = io.BytesIO(gz.read())
-            except IOError as original_ioerror:
-                # There may be junk at the end of the file
-                # See http://stackoverflow.com/q/4928560/35070 for details
-                for i in range(1, 1024):
-                    try:
-                        gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
-                        uncompressed = io.BytesIO(gz.read())
-                    except IOError:
-                        continue
-                    break
-                else:
-                    raise original_ioerror
-            resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
+
+        # Content-Encoding header lists the encodings in order that they were applied [1].
+        # To decompress, we simply do the reverse.
+        # [1]: https://datatracker.ietf.org/doc/html/rfc9110#name-content-encoding
+        decoded_response = None
+        decoders = {
+            'gzip': self.deflate_gz,
+            'deflate': self.deflate_gz,
+        }
+        if brotli:
+            decoders['br'] = self.brotli
+        if ncompress:
+            decoders['compress'] = self.compress
+        if sys.platform.startswith('java'):
+            # Jython zlib implementation misses gzip
+            decoders['gzip'] = self.gzip
+
+        def encodings(hdrs):
+            # A header field that allows multiple values can have multiple instances [2].
+            # [2]: https://datatracker.ietf.org/doc/html/rfc9110#name-fields
+            for e in reversed(','.join(hdrs).split(',')):
+                if e:
+                    yield e.strip()
+
+        encodings_left = []
+        try:
+            resp.headers.get_all
+            hdrs = resp.headers
+        except AttributeError:
+            # Py2 has no get_all() method: headers are rfc822.Message
+            from email.message import Message
+            hdrs = Message()
+            for k, v in resp.headers.items():
+                hdrs[k] = v
+
+        decoder, decoded_response = True, None
+        for encoding in encodings(hdrs.get_all('Content-Encoding', [])):
+            # "SHOULD consider" x-compress, x-gzip as compress, gzip
+            decoder = decoder and decoders.get(remove_start(encoding, 'x-'))
+            if not decoder:
+                encodings_left.insert(0, encoding)
+                continue
+            decoded_response = decoder(decoded_response or resp.read())
+        if decoded_response is not None:
+            resp = compat_urllib_request.addinfourl(
+                io.BytesIO(decoded_response), old_resp.headers, old_resp.url, old_resp.code)
             resp.msg = old_resp.msg
-            del resp.headers['Content-encoding']
-        # deflate
-        if resp.headers.get('Content-encoding', '') == 'deflate':
-            gz = io.BytesIO(self.deflate(resp.read()))
-            resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
-            resp.msg = old_resp.msg
-            del resp.headers['Content-encoding']
+            del resp.headers['Content-Length']
+            resp.headers['Content-Length'] = '%d' % len(decoded_response)
+        del resp.headers['Content-Encoding']
+        if encodings_left:
+            resp.headers['Content-Encoding'] = ', '.join(encodings_left)
+
         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
         # https://github.com/ytdl-org/youtube-dl/issues/6457).
         if 300 <= resp.code < 400:

From abef53466da1f7d2e79f5644718a2cf7524abc49 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 28 Jul 2023 06:19:15 +0100
Subject: [PATCH 125/156] [utils] Rework URL path munging for ., .. components

* move processing to YoutubeDLHandler
* also process `Location` header for redirect
* use tests from https://github.com/yt-dlp/yt-dlp/pull/7662
---
 test/test_http.py       | 14 +++++++++
 test/test_utils.py      | 29 ++++++++++++++++-
 youtube_dl/YoutubeDL.py | 23 --------------
 youtube_dl/utils.py     | 70 +++++++++++++++++++++++++++++++----------
 4 files changed, 95 insertions(+), 41 deletions(-)

diff --git a/test/test_http.py b/test/test_http.py
index 793bea359..485c4c6fc 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -180,6 +180,12 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
             respond()
         elif self.path == '/%c7%9f':
             respond()
+        elif self.path == '/redirect_dotsegments':
+            self.send_response(301)
+            # redirect to /headers but with dot segments before
+            self.send_header('Location', '/a/b/./../../headers')
+            self.send_header('Content-Length', '0')
+            self.end_headers()
         elif self.path.startswith('/redirect_'):
             self._redirect()
         elif self.path.startswith('/method'):
@@ -489,6 +495,14 @@ class TestHTTP(unittest.TestCase):
             self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')
             self.assertEqual(res.read(), b'raw')
 
+    def test_remove_dot_segments(self):
+        with FakeYDL() as ydl:
+            res = ydl.urlopen(sanitized_Request(self._test_url('a/b/./../../headers')))
+            self.assertEqual(compat_urllib_parse.urlparse(res.geturl()).path, '/headers')
+
+            res = ydl.urlopen(sanitized_Request(self._test_url('redirect_dotsegments')))
+            self.assertEqual(compat_urllib_parse.urlparse(res.geturl()).path, '/headers')
+
 
 def _build_proxy_handler(name):
     class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
diff --git a/test/test_utils.py b/test/test_utils.py
index e83977f29..fdae1f744 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -64,6 +64,7 @@ from youtube_dl.utils import (
     parse_age_limit,
     parse_duration,
     parse_filesize,
+    parse_codecs,
     parse_count,
     parse_iso8601,
     parse_resolution,
@@ -114,7 +115,7 @@ from youtube_dl.utils import (
     cli_option,
     cli_valueless_option,
     cli_bool_option,
-    parse_codecs,
+    YoutubeDLHandler,
 )
 from youtube_dl.compat import (
     compat_chr,
@@ -905,6 +906,32 @@ class TestUtil(unittest.TestCase):
         )
         self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
 
+    def test_remove_dot_segments(self):
+
+        def remove_dot_segments(p):
+            q = '' if p.startswith('/') else '/'
+            p = 'http://example.com' + q + p
+            p = compat_urlparse.urlsplit(YoutubeDLHandler._fix_path(p)).path
+            return p[1:] if q else p
+
+        self.assertEqual(remove_dot_segments('/a/b/c/./../../g'), '/a/g')
+        self.assertEqual(remove_dot_segments('mid/content=5/../6'), 'mid/6')
+        self.assertEqual(remove_dot_segments('/ad/../cd'), '/cd')
+        self.assertEqual(remove_dot_segments('/ad/../cd/'), '/cd/')
+        self.assertEqual(remove_dot_segments('/..'), '/')
+        self.assertEqual(remove_dot_segments('/./'), '/')
+        self.assertEqual(remove_dot_segments('/./a'), '/a')
+        self.assertEqual(remove_dot_segments('/abc/./.././d/././e/.././f/./../../ghi'), '/ghi')
+        self.assertEqual(remove_dot_segments('/'), '/')
+        self.assertEqual(remove_dot_segments('/t'), '/t')
+        self.assertEqual(remove_dot_segments('t'), 't')
+        self.assertEqual(remove_dot_segments(''), '')
+        self.assertEqual(remove_dot_segments('/../a/b/c'), '/a/b/c')
+        self.assertEqual(remove_dot_segments('../a'), 'a')
+        self.assertEqual(remove_dot_segments('./a'), 'a')
+        self.assertEqual(remove_dot_segments('.'), '')
+        self.assertEqual(remove_dot_segments('////'), '////')
+
     def test_js_to_json_vars_strings(self):
         self.assertDictEqual(
             json.loads(js_to_json(
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 6a12f91e4..13a41928f 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -71,7 +71,6 @@ from .utils import (
     format_bytes,
     formatSeconds,
     GeoRestrictedError,
-    HEADRequest,
     int_or_none,
     ISO3166Utils,
     join_nonempty,
@@ -88,7 +87,6 @@ from .utils import (
     preferredencoding,
     prepend_extension,
     process_communicate_or_kill,
-    PUTRequest,
     register_socks_protocols,
     render_table,
     replace_extension,
@@ -2460,27 +2458,6 @@ class YoutubeDL(object):
         """ Start an HTTP download """
         if isinstance(req, compat_basestring):
             req = sanitized_Request(req)
-        # an embedded /../ sequence is not automatically handled by urllib2
-        # see https://github.com/yt-dlp/yt-dlp/issues/3355
-        url = req.get_full_url()
-        parts = url.partition('/../')
-        if parts[1]:
-            url = compat_urllib_parse.urljoin(parts[0] + parts[1][:1], parts[1][1:] + parts[2])
-        if url:
-            # worse, URL path may have initial /../ against RFCs: work-around
-            # by stripping such prefixes, like eg Firefox
-            parts = compat_urllib_parse.urlsplit(url)
-            path = parts.path
-            while path.startswith('/../'):
-                path = path[3:]
-            url = parts._replace(path=path).geturl()
-            # get a new Request with the munged URL
-            if url != req.get_full_url():
-                req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get(
-                    req.get_method(), compat_urllib_request.Request)
-                req = req_type(
-                    url, data=req.data, headers=dict(req.header_items()),
-                    origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
         return self._opener.open(req, timeout=self._socket_timeout)
 
     def print_debug_header(self):
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index e73291107..36204c8fa 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2678,17 +2678,52 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
     def compress(data):
         return data and ncompress.decompress(data)
 
+    @staticmethod
+    def _fix_path(url):
+        # an embedded /../ or /./ sequence is not automatically handled by urllib2
+        # see https://github.com/yt-dlp/yt-dlp/issues/3355
+        parsed_url = compat_urllib_parse.urlsplit(url)
+        path = parsed_url.path
+        if not path.endswith('/'):
+            path += '/'
+        parts = path.partition('/./')
+        if not parts[1]:
+            parts = path.partition('/../')
+        if parts[1]:
+            path = compat_urllib_parse.urljoin(
+                parts[0] + parts[1][:1],
+                parts[1][1:] + (parts[2] if parsed_url.path.endswith('/') else parts[2][:-1]))
+            url = parsed_url._replace(path=path).geturl()
+        if '/.' in url:
+            # worse, URL path may have initial /../ against RFCs: work-around
+            # by stripping such prefixes, like eg Firefox
+            path = parsed_url.path + '/'
+            while path.startswith('/.'):
+                if path.startswith('/../'):
+                    path = path[3:]
+                elif path.startswith('/./'):
+                    path = path[2:]
+                else:
+                    break
+            path = path[:-1]
+            if not path.startswith('/') and parsed_url.path.startswith('/'):
+                path = '/' + path
+            url = parsed_url._replace(path=path).geturl()
+        return url
+
     def http_request(self, req):
-        # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
-        # always respected by websites, some tend to give out URLs with non percent-encoded
+        url = req.get_full_url()
+        # resolve embedded . and ..
+        url_fixed = self._fix_path(url)
+        # According to RFC 3986, URLs can not contain non-ASCII characters; however this is not
+        # always respected by websites: some tend to give out URLs with non percent-encoded
         # non-ASCII characters (see telemb.py, ard.py [#3412])
         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
         # To work around aforementioned issue we will replace request's original URL with
         # percent-encoded one
         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
         # the code of this workaround has been moved here from YoutubeDL.urlopen()
-        url = req.get_full_url()
-        url_escaped = escape_url(url)
+        url_escaped = escape_url(url_fixed)
 
         # Substitute URL if any change after escaping
         if url != url_escaped:
@@ -2702,10 +2737,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 
         req.headers = handle_youtubedl_headers(req.headers)
 
-        if sys.version_info < (2, 7) and '#' in req.get_full_url():
-            # Python 2.6 is brain-dead when it comes to fragments
-            req._Request__original = req._Request__original.partition('#')[0]
-            req._Request__r_type = req._Request__r_type.partition('#')[0]
+        if sys.version_info < (2, 7):
+            # avoid possible race where __r_type may be unset
+            req.get_type()
+            if '#' in req.get_full_url():
+                # Python 2.6 is brain-dead when it comes to fragments
+                req._Request__original = req._Request__original.partition('#')[0]
+                req._Request__r_type = req._Request__r_type.partition('#')[0]
 
         # Use the totally undocumented AbstractHTTPHandler per
         # https://github.com/yt-dlp/yt-dlp/pull/4158
@@ -2775,10 +2813,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
                 if sys.version_info >= (3, 0):
                     location = location.encode('iso-8859-1')
                 location = location.decode('utf-8')
-                location_escaped = escape_url(location)
+                # resolve embedded . and ..
+                location_fixed = self._fix_path(location)
+                location_escaped = escape_url(location_fixed)
                 if location != location_escaped:
                     del resp.headers['Location']
-                    if sys.version_info < (3, 0):
+                    # if sys.version_info < (3, 0):
+                    if not isinstance(location_escaped, str):
                         location_escaped = location_escaped.encode('utf-8')
                     resp.headers['Location'] = location_escaped
         return resp
@@ -4248,13 +4289,8 @@ def update_Request(req, url=None, data=None, headers={}, query={}):
     req_headers.update(headers)
     req_data = data if data is not None else req.data
     req_url = update_url_query(url or req.get_full_url(), query)
-    req_get_method = req.get_method()
-    if req_get_method == 'HEAD':
-        req_type = HEADRequest
-    elif req_get_method == 'PUT':
-        req_type = PUTRequest
-    else:
-        req_type = compat_urllib_request.Request
+    req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get(
+        req.get_method(), compat_urllib_request.Request)
     new_req = req_type(
         req_url, data=req_data, headers=req_headers,
         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)

From 7d965e6b65655f2a5fbae34219fc87359a3d7061 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 30 Jul 2023 21:45:57 +0100
Subject: [PATCH 126/156] [utils] Avoid comparing `type(var)`, etc, to pass new
 Linter rules

---
 youtube_dl/swfinterp.py |  2 +-
 youtube_dl/utils.py     | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py
index 0c7158575..e79e0b17f 100644
--- a/youtube_dl/swfinterp.py
+++ b/youtube_dl/swfinterp.py
@@ -727,7 +727,7 @@ class SWFInterpreter(object):
                             stack.append(res)
                             continue
 
-                        assert isinstance(obj, (dict, _ScopeDict)),\
+                        assert isinstance(obj, (dict, _ScopeDict)), \
                             'Accessing member %r on %r' % (pname, obj)
                         res = obj.get(pname, undefined)
                         stack.append(res)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 36204c8fa..1da5a7a38 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2235,7 +2235,7 @@ def _htmlentity_transform(entity_with_semicolon):
 def unescapeHTML(s):
     if s is None:
         return None
-    assert type(s) == compat_str
+    assert isinstance(s, compat_str)
 
     return re.sub(
         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
@@ -3418,7 +3418,7 @@ def _windows_write_string(s, out):
 def write_string(s, out=None, encoding=None):
     if out is None:
         out = sys.stderr
-    assert type(s) == compat_str
+    assert isinstance(s, compat_str)
 
     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
         if _windows_write_string(s, out):
@@ -4459,8 +4459,10 @@ TV_PARENTAL_GUIDELINES = {
 
 
 def parse_age_limit(s):
-    if type(s) == int:
-        return s if 0 <= s <= 21 else None
+    if not isinstance(s, bool):
+        age = int_or_none(s)
+        if age is not None:
+            return age if 0 <= age <= 21 else None
     if not isinstance(s, compat_basestring):
         return None
     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)

From 2d2a4bc8324fc4bc5a235cbd1ee0b0769912bfd1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 30 Jul 2023 21:47:48 +0100
Subject: [PATCH 127/156] [utils] Revise `isinstance()` tests (especially for
 str/unicode/bytes) to complete Linter fix

---
 youtube_dl/compat.py |   2 +-
 youtube_dl/utils.py  | 153 ++++++++++++++++++++-----------------------
 2 files changed, 73 insertions(+), 82 deletions(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 54ad64674..3c526a78d 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -36,7 +36,7 @@ try:
     )
 except NameError:
     compat_str, compat_basestring, compat_chr = (
-        str, str, chr
+        str, (str, bytes), chr
     )
 
 # casefold
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 1da5a7a38..94b339b1d 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1826,11 +1826,11 @@ def write_json_file(obj, fn):
     if sys.version_info < (3, 0) and sys.platform != 'win32':
         encoding = get_filesystem_encoding()
         # os.path.basename returns a bytes object, but NamedTemporaryFile
-        # will fail if the filename contains non ascii characters unless we
+        # will fail if the filename contains non-ascii characters unless we
         # use a unicode object
-        path_basename = lambda f: os.path.basename(fn).decode(encoding)
+        path_basename = lambda f: os.path.basename(f).decode(encoding)
         # the same for os.path.dirname
-        path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
+        path_dirname = lambda f: os.path.dirname(f).decode(encoding)
     else:
         path_basename = os.path.basename
         path_dirname = os.path.dirname
@@ -1894,10 +1894,10 @@ else:
                 return f
         return None
 
+
 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 # the namespace parameter
 
-
 def xpath_with_ns(path, ns_map):
     components = [c.split(':') for c in path.split('/')]
     replaced = []
@@ -1914,7 +1914,7 @@ def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
     def _find_xpath(xpath):
         return node.find(compat_xpath(xpath))
 
-    if isinstance(xpath, (str, compat_str)):
+    if isinstance(xpath, compat_basestring):
         n = _find_xpath(xpath)
     else:
         for xp in xpath:
@@ -2262,39 +2262,32 @@ def get_subprocess_encoding():
     return encoding
 
 
-def encodeFilename(s, for_subprocess=False):
-    """
-    @param s The name of the file
-    """
+# Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
+if sys.version_info < (3, 0) and not sys.platform.startswith('java'):
 
-    assert type(s) == compat_str
+    def encodeFilename(s, for_subprocess=False):
+        """
+        @param s The name of the file
+        """
+
+        # Pass '' directly to use Unicode APIs on Windows 2000 and up
+        # (Detecting Windows NT 4 is tricky because 'major >= 4' would
+        # match Windows 9x series as well. Besides, NT 4 is obsolete.)
+        if (not for_subprocess
+                and sys.platform == 'win32'
+                and sys.getwindowsversion()[0] >= 5
+                and isinstance(s, compat_str)):
+            return s
+
+        return _encode_compat_str(s, get_subprocess_encoding(), 'ignore')
+
+    def decodeFilename(b, for_subprocess=False):
+        return _decode_compat_str(b, get_subprocess_encoding(), 'ignore')
+
+else:
 
     # Python 3 has a Unicode API
-    if sys.version_info >= (3, 0):
-        return s
-
-    # Pass '' directly to use Unicode APIs on Windows 2000 and up
-    # (Detecting Windows NT 4 is tricky because 'major >= 4' would
-    # match Windows 9x series as well. Besides, NT 4 is obsolete.)
-    if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
-        return s
-
-    # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
-    if sys.platform.startswith('java'):
-        return s
-
-    return s.encode(get_subprocess_encoding(), 'ignore')
-
-
-def decodeFilename(b, for_subprocess=False):
-
-    if sys.version_info >= (3, 0):
-        return b
-
-    if not isinstance(b, bytes):
-        return b
-
-    return b.decode(get_subprocess_encoding(), 'ignore')
+    encodeFilename = decodeFilename = lambda *s, **k: s[0]
 
 
 def encodeArgument(s):
@@ -2313,11 +2306,7 @@ def decodeArgument(b):
 def decodeOption(optval):
     if optval is None:
         return optval
-    if isinstance(optval, bytes):
-        optval = optval.decode(preferredencoding())
-
-    assert isinstance(optval, compat_str)
-    return optval
+    return _decode_compat_str(optval)
 
 
 def formatSeconds(secs):
@@ -2363,7 +2352,7 @@ def make_HTTPS_handler(params, **kwargs):
 
     if sys.version_info < (3, 2):
         return YoutubeDLHTTPSHandler(params, **kwargs)
-    else:  # Python < 3.4
+    else:  # Python3 < 3.4
         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
         context.verify_mode = (ssl.CERT_NONE
                                if opts_no_check_certificate
@@ -2818,8 +2807,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
                 location_escaped = escape_url(location_fixed)
                 if location != location_escaped:
                     del resp.headers['Location']
-                    # if sys.version_info < (3, 0):
-                    if not isinstance(location_escaped, str):
+                    if not isinstance(location_escaped, str):  # Py 2 case
                         location_escaped = location_escaped.encode('utf-8')
                     resp.headers['Location'] = location_escaped
         return resp
@@ -3086,8 +3074,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
         # On python 2 urlh.geturl() may sometimes return redirect URL
         # as a byte string instead of unicode. This workaround forces
         # it to return unicode.
-        if sys.version_info[0] < 3:
-            newurl = compat_str(newurl)
+        newurl = _decode_compat_str(newurl)
 
         # Be conciliant with URIs containing a space.  This is mainly
         # redundant with the more complete encoding done in http_error_302(),
@@ -3333,11 +3320,7 @@ class DateRange(object):
 def platform_name():
     """ Returns the platform name as a compat_str """
     res = platform.platform()
-    if isinstance(res, bytes):
-        res = res.decode(preferredencoding())
-
-    assert isinstance(res, compat_str)
-    return res
+    return _decode_compat_str(res)
 
 
 def _windows_write_string(s, out):
@@ -3567,9 +3550,8 @@ def shell_quote(args):
     quoted_args = []
     encoding = get_filesystem_encoding()
     for a in args:
-        if isinstance(a, bytes):
-            # We may get a filename encoded with 'encodeFilename'
-            a = a.decode(encoding)
+        # We may get a filename encoded with 'encodeFilename'
+        a = _decode_compat_str(a, encoding)
         quoted_args.append(compat_shlex_quote(a))
     return ' '.join(quoted_args)
 
@@ -3733,8 +3715,9 @@ def parse_resolution(s):
 
 
 def parse_bitrate(s):
-    if not isinstance(s, compat_str):
-        return
+    s = txt_or_none(s)
+    if not s:
+        return None
     mobj = re.search(r'\b(\d+)\s*kbps', s)
     if mobj:
         return int(mobj.group(1))
@@ -3822,18 +3805,17 @@ def base_url(url):
 
 
 def urljoin(base, path):
-    if isinstance(path, bytes):
-        path = path.decode('utf-8')
-    if not isinstance(path, compat_str) or not path:
+    path = _decode_compat_str(path, encoding='utf-8', or_none=True)
+    if not path:
         return None
     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
         return path
-    if isinstance(base, bytes):
-        base = base.decode('utf-8')
-    if not isinstance(base, compat_str) or not re.match(
-            r'^(?:https?:)?//', base):
+    base = _decode_compat_str(base, encoding='utf-8', or_none=True)
+    if not base:
         return None
-    return compat_urllib_parse.urljoin(base, path)
+    return (
+        re.match(r'^(?:https?:)?//', base)
+        and compat_urllib_parse.urljoin(base, path))
 
 
 class HEADRequest(compat_urllib_request.Request):
@@ -3998,8 +3980,7 @@ def get_exe_version(exe, args=['--version'],
             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
     except OSError:
         return False
-    if isinstance(out, bytes):  # Python 2.x
-        out = out.decode('ascii', 'ignore')
+    out = _decode_compat_str(out, 'ascii', 'ignore')
     return detect_exe_version(out, version_re, unrecognized)
 
 
@@ -4218,8 +4199,8 @@ def lowercase_escape(s):
 
 def escape_rfc3986(s):
     """Escape non-ASCII characters as suggested by RFC 3986"""
-    if sys.version_info < (3, 0) and isinstance(s, compat_str):
-        s = s.encode('utf-8')
+    if sys.version_info < (3, 0):
+        s = _encode_compat_str(s, 'utf-8')
     # ensure unicode: after quoting, it can always be converted
     return compat_str(compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]"))
 
@@ -4242,8 +4223,7 @@ def parse_qs(url, **kwargs):
 
 def read_batch_urls(batch_fd):
     def fixup(url):
-        if not isinstance(url, compat_str):
-            url = url.decode('utf-8', 'replace')
+        url = _decode_compat_str(url, 'utf-8', 'replace')
         BOM_UTF8 = '\xef\xbb\xbf'
         if url.startswith(BOM_UTF8):
             url = url[len(BOM_UTF8):]
@@ -4305,10 +4285,8 @@ def _multipart_encode_impl(data, boundary):
     out = b''
     for k, v in data.items():
         out += b'--' + boundary.encode('ascii') + b'\r\n'
-        if isinstance(k, compat_str):
-            k = k.encode('utf-8')
-        if isinstance(v, compat_str):
-            v = v.encode('utf-8')
+        k = _encode_compat_str(k, 'utf-8')
+        v = _encode_compat_str(v, 'utf-8')
         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
@@ -4435,8 +4413,26 @@ def merge_dicts(*dicts, **kwargs):
     return merged
 
 
-def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
-    return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
+# very poor choice of name, as if Python string encodings weren't confusing enough
+def encode_compat_str(s, encoding=preferredencoding(), errors='strict'):
+    assert isinstance(s, compat_basestring)
+    return s if isinstance(s, compat_str) else compat_str(s, encoding, errors)
+
+
+# what it could have been
+def _decode_compat_str(s, encoding=preferredencoding(), errors='strict', or_none=False):
+    if not or_none:
+        assert isinstance(s, compat_basestring)
+    return (
+        s if isinstance(s, compat_str)
+        else compat_str(s, encoding, errors) if isinstance(s, compat_basestring)
+        else None)
+
+
+# the real encode_compat_str, but only for internal use
+def _encode_compat_str(s, encoding=preferredencoding(), errors='strict'):
+    assert isinstance(s, compat_basestring)
+    return s.encode(encoding, errors) if isinstance(s, compat_str) else s
 
 
 US_RATINGS = {
@@ -4639,12 +4635,7 @@ def args_to_str(args):
 
 
 def error_to_compat_str(err):
-    err_str = str(err)
-    # On python 2 error byte string must be decoded with proper
-    # encoding rather than ascii
-    if sys.version_info[0] < 3:
-        err_str = err_str.decode(preferredencoding())
-    return err_str
+    return _decode_compat_str(str(err))
 
 
 def mimetype2ext(mt):

From e4178b5af3428f29feca622d531090f10f54af35 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 30 Jul 2023 21:49:58 +0100
Subject: [PATCH 128/156] [utils] Add and use `filter_dict()` from yt-dlp

---
 youtube_dl/utils.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 94b339b1d..c530ed5a2 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2586,7 +2586,7 @@ def handle_youtubedl_headers(headers):
     filtered_headers = headers
 
     if 'Youtubedl-no-compression' in filtered_headers:
-        filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
+        filtered_headers = filter_dict(filtered_headers, cndn=lambda k, _: k.lower() != 'accept-encoding')
         del filtered_headers['Youtubedl-no-compression']
 
     return filtered_headers
@@ -3102,9 +3102,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
             new_data = None
             remove_headers.extend(['Content-Length', 'Content-Type'])
 
-        # NB: don't use dict comprehension for python 2.6 compatibility
-        new_headers = dict((k, v) for k, v in req.headers.items()
-                           if k.title() not in remove_headers)
+        new_headers = filter_dict(req.headers, cndn=lambda k, _: k.title() not in remove_headers)
 
         return compat_urllib_request.Request(
             newurl, headers=new_headers, origin_req_host=req.origin_req_host,
@@ -4377,6 +4375,11 @@ def try_get(src, getter, expected_type=None):
                 return v
 
 
+def filter_dict(dct, cndn=lambda _, v: v is not None):
+    # NB: don't use dict comprehension for python 2.6 compatibility
+    return dict((k, v) for k, v in dct.items() if cndn(k, v))
+
+
 def merge_dicts(*dicts, **kwargs):
     """
         Merge the `dict`s in `dicts` using the first valid value for each key.

From 2efc8de4d2299e08e0c84d674d7fc7f3fa669487 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 30 Jul 2023 21:50:52 +0100
Subject: [PATCH 129/156] [utils] Advertise optional supported
 `Content-Encoding`s

---
 youtube_dl/utils.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index c530ed5a2..81ff78807 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1678,9 +1678,7 @@ def random_user_agent():
 
 std_headers = {
     'User-Agent': random_user_agent(),
-    'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-    'Accept-Encoding': 'gzip, deflate',
     'Accept-Language': 'en-us,en;q=0.5',
 }
 
@@ -2724,6 +2722,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
             if h.capitalize() not in req.headers:
                 req.add_header(h, v)
 
+        # Similarly, 'Accept-encoding'
+        if 'Accept-encoding' not in req.headers:
+            req.add_header(
+                'Accept-Encoding', join_nonempty(
+                    'gzip', 'deflate', brotli and 'br', ncompress and 'compress',
+                    delim=', '))
+
         req.headers = handle_youtubedl_headers(req.headers)
 
         if sys.version_info < (2, 7):

From 86e3cf5e5849aefcc540c19bb5fa5ab7f470d1c1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 4 Aug 2023 22:54:12 +0100
Subject: [PATCH 130/156] [S4C] Add extractor for Sianel Pedwar Cymru

* from https://github.com/yt-dlp/yt-dlp/pull/7730, thx ifan-t, bashonly
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/s4c.py        | 76 ++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 youtube_dl/extractor/s4c.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 42b009ef5..cb39876c2 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1087,6 +1087,7 @@ from .rutube import (
 from .rutv import RUTVIE
 from .ruutu import RuutuIE
 from .ruv import RuvIE
+from .s4c import S4CIE
 from .safari import (
     SafariIE,
     SafariApiIE,
diff --git a/youtube_dl/extractor/s4c.py b/youtube_dl/extractor/s4c.py
new file mode 100644
index 000000000..21d40c2d3
--- /dev/null
+++ b/youtube_dl/extractor/s4c.py
@@ -0,0 +1,76 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    merge_dicts,
+    T,
+    traverse_obj,
+    txt_or_none,
+)
+
+
+class S4CIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/programme/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.s4c.cymru/clic/programme/861362209',
+        'info_dict': {
+            'id': '861362209',
+            'ext': 'mp4',
+            'title': 'Y Swn',
+            'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
+            'duration': 5340
+        },
+    }, {
+        'url': 'https://www.s4c.cymru/clic/programme/856636948',
+        'info_dict': {
+            'id': '856636948',
+            'ext': 'mp4',
+            'title': 'Am Dro',
+            'duration': 2880,
+            'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        details = self._download_json(
+            'https://www.s4c.cymru/df/full_prog_details',
+            video_id, query={
+                'lang': 'e',
+                'programme_id': video_id,
+            }, fatal=False)
+
+        filename = self._download_json(
+            'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
+                'programme_id': video_id,
+                'signed': '0',
+                'lang': 'en',
+                'mode': 'od',
+                'appId': 'clic',
+                'streamName': '',
+            }, note='Downloading player config JSON')['filename']
+        m3u8_url = self._download_json(
+            'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
+                'mode': 'od',
+                'application': 'clic',
+                'region': 'WW',
+                'extra': 'false',
+                'thirdParty': 'false',
+                'filename': filename,
+            }, note='Downloading streaming urls JSON')['hls']
+        # ... self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
+        formats, subtitles = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native'), {}
+
+        return merge_dicts({
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+        }, traverse_obj(details, ('full_prog_details', 0, {
+            'title': (('programme_title', 'series_title'), T(txt_or_none)),
+            'description': ('full_billing', T(txt_or_none)),
+            'duration': ('duration', T(lambda x: float_or_none(x, invscale=60))),
+        }), get_all=False),
+            rev=True)

From 7d58f0769a8f08e46ea77432041577cef94c07e2 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 31 Aug 2023 17:16:47 +0100
Subject: [PATCH 131/156] [ci.yml] Improve conditions for nosetest
 installations

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a73bedae1..7fb8f9f83 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -368,7 +368,7 @@ jobs:
         done
     #-------- nose --------
     - name: Install nose for Python ${{ matrix.python-version }}
-      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' || matrix.python-version == '3.12' }}
+      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || (matrix.python-impl == 'cpython' && (matrix.python-version == '2.7' || matrix.python-version == '3.12')) }}
       shell: bash
       run: |
         echo "$PATH"
@@ -380,7 +380,7 @@ jobs:
         [ "$py3ver" -ge 9 ] && nose=pynose || nose=nose
         $PIP -qq show $nose || $PIP install $nose
     - name: Install nose for other Python 2
-      if: ${{ matrix.python-impl == 'jython' || matrix.python-version == '2.6' }}
+      if: ${{ matrix.python-impl == 'jython' || (matrix.python-impl == 'cpython' && matrix.python-version == '2.6') }}
       shell: bash
       run: |
         # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)

From 31f50c8194f12c27ac6fbfe336f1d515aa8677ae Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 27 Aug 2023 19:08:28 +0100
Subject: [PATCH 132/156] [S4C] Add thumbnail extraction, extract series as
 playlist

Based on https://github.com/yt-dlp/yt-dlp/pull/7776: thx ifan-t, bashonly
---
 youtube_dl/extractor/extractors.py |  5 ++-
 youtube_dl/extractor/s4c.py        | 62 ++++++++++++++++++++++++++----
 2 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index cb39876c2..d9289e5bf 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1087,7 +1087,10 @@ from .rutube import (
 from .rutv import RUTVIE
 from .ruutu import RuutuIE
 from .ruv import RuvIE
-from .s4c import S4CIE
+from .s4c import (
+    S4CIE,
+    S4CSeriesIE,
+)
 from .safari import (
     SafariIE,
     SafariApiIE,
diff --git a/youtube_dl/extractor/s4c.py b/youtube_dl/extractor/s4c.py
index 21d40c2d3..b152e6680 100644
--- a/youtube_dl/extractor/s4c.py
+++ b/youtube_dl/extractor/s4c.py
@@ -2,6 +2,8 @@
 
 from __future__ import unicode_literals
 
+from functools import partial as partial_f
+
 from .common import InfoExtractor
 from ..utils import (
     float_or_none,
@@ -9,6 +11,7 @@ from ..utils import (
     T,
     traverse_obj,
     txt_or_none,
+    url_or_none,
 )
 
 
@@ -21,7 +24,8 @@ class S4CIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Y Swn',
             'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
-            'duration': 5340
+            'duration': 5340,
+            'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg',
         },
     }, {
         'url': 'https://www.s4c.cymru/clic/programme/856636948',
@@ -31,6 +35,7 @@ class S4CIE(InfoExtractor):
             'title': 'Am Dro',
             'duration': 2880,
             'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
+            'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg',
         },
     }]
 
@@ -43,7 +48,7 @@ class S4CIE(InfoExtractor):
                 'programme_id': video_id,
             }, fatal=False)
 
-        filename = self._download_json(
+        player_config = self._download_json(
             'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
                 'programme_id': video_id,
                 'signed': '0',
@@ -51,7 +56,8 @@ class S4CIE(InfoExtractor):
                 'mode': 'od',
                 'appId': 'clic',
                 'streamName': '',
-            }, note='Downloading player config JSON')['filename']
+            }, note='Downloading player config JSON')
+
         m3u8_url = self._download_json(
             'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
                 'mode': 'od',
@@ -59,18 +65,60 @@ class S4CIE(InfoExtractor):
                 'region': 'WW',
                 'extra': 'false',
                 'thirdParty': 'false',
-                'filename': filename,
+                'filename': player_config['filename'],
             }, note='Downloading streaming urls JSON')['hls']
-        # ... self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
-        formats, subtitles = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native'), {}
+        formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native')
+        self._sort_formats(formats)
+
+        subtitles = {}
+        for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))):
+            subtitles.setdefault(sub.get('3', 'en'), []).append({
+                'url': sub['0'],
+                'name': sub.get('1'),
+            })
 
         return merge_dicts({
             'id': video_id,
             'formats': formats,
             'subtitles': subtitles,
+            'thumbnail': url_or_none(player_config.get('poster')),
         }, traverse_obj(details, ('full_prog_details', 0, {
             'title': (('programme_title', 'series_title'), T(txt_or_none)),
             'description': ('full_billing', T(txt_or_none)),
-            'duration': ('duration', T(lambda x: float_or_none(x, invscale=60))),
+            'duration': ('duration', T(partial_f(float_or_none, invscale=60))),
         }), get_all=False),
             rev=True)
+
+
+class S4CSeriesIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.s4c.cymru/clic/series/864982911',
+        'playlist_mincount': 6,
+        'info_dict': {
+            'id': '864982911',
+            'title': 'Iaith ar Daith',
+        },
+    }, {
+        'url': 'https://www.s4c.cymru/clic/series/866852587',
+        'playlist_mincount': 8,
+        'info_dict': {
+            'id': '866852587',
+            'title': 'FFIT Cymru',
+        },
+    }]
+
+    def _real_extract(self, url):
+        series_id = self._match_id(url)
+        series_details = self._download_json(
+            'https://www.s4c.cymru/df/series_details', series_id, query={
+                'lang': 'e',
+                'series_id': series_id,
+                'show_prog_in_series': 'Y'
+            }, note='Downloading series details JSON')
+
+        return self.playlist_result(
+            (self.url_result('https://www.s4c.cymru/clic/programme/' + episode_id, S4CIE, episode_id)
+             for episode_id in traverse_obj(series_details, ('other_progs_in_series', Ellipsis, 'id'))),
+            playlist_id=series_id, playlist_title=traverse_obj(
+                series_details, ('full_prog_details', 0, 'series_title', T(txt_or_none))))

From 21caaf23800c95451cec27dfac56df2c0f8de85a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 3 Sep 2023 01:13:40 +0100
Subject: [PATCH 133/156] [test] Remove redundancy from lambda expected value
 regex

---
 test/helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/helper.py b/test/helper.py
index fc55c6b46..5b7e3dfe2 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -142,7 +142,7 @@ def expect_value(self, got, expected, field):
         self.assertTrue(
             contains_str in got,
             'field %s (value: %r) should contain %r' % (field, got, contains_str))
-    elif isinstance(expected, compat_str) and re.match(r'^lambda \w+:', expected):
+    elif isinstance(expected, compat_str) and re.match(r'lambda \w+:', expected):
         fn = eval(expected)
         suite = expected.split(':', 1)[1].strip()
         self.assertTrue(

From bbd3e7e9999877104e1e47a8ed49f3b90257f083 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 3 Sep 2023 01:18:22 +0100
Subject: [PATCH 134/156] [utils] Properly handle list values in update_url()

An actual list value in a query update could have been treated
as a list of values because of the key:list parse_qs format.
---
 youtube_dl/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 81ff78807..fdf41b025 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4257,7 +4257,7 @@ def update_url(url, **kwargs):
     query = kwargs.pop('query_update', None)
     if query:
         qs = compat_parse_qs(url.query)
-        qs.update(query)
+        qs.update((k, [v]) for k, v in query.items())
         kwargs['query'] = compat_urllib_parse_urlencode(qs, True)
         kwargs = compat_kwargs(kwargs)
     return compat_urllib_parse.urlunparse(url._replace(**kwargs))

From 66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 3 Sep 2023 23:15:19 +0100
Subject: [PATCH 135/156] [utils] Revert bbd3e7e, updating docstring, test
 instead

---
 test/test_utils.py  | 46 ++++++++++++++++++++++-----------------------
 youtube_dl/utils.py |  3 ++-
 2 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index fdae1f744..102420fcb 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -62,13 +62,14 @@ from youtube_dl.utils import (
     OnDemandPagedList,
     orderedSet,
     parse_age_limit,
+    parse_bitrate,
     parse_duration,
     parse_filesize,
     parse_codecs,
     parse_count,
     parse_iso8601,
     parse_resolution,
-    parse_bitrate,
+    parse_qs,
     pkcs1pad,
     prepend_extension,
     read_batch_urls,
@@ -125,7 +126,6 @@ from youtube_dl.compat import (
     compat_setenv,
     compat_str,
     compat_urlparse,
-    compat_parse_qs,
 )
 
 
@@ -683,38 +683,36 @@ class TestUtil(unittest.TestCase):
         self.assertTrue(isinstance(data, bytes))
 
     def test_update_url_query(self):
-        def query_dict(url):
-            return compat_parse_qs(compat_urlparse.urlparse(url).query)
-        self.assertEqual(query_dict(update_url_query(
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
-            query_dict('http://example.com/path?quality=HD&format=mp4'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?quality=HD&format=mp4'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
-            query_dict('http://example.com/path?system=LINUX&system=WINDOWS'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?system=LINUX&system=WINDOWS'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path', {'fields': 'id,formats,subtitles'})),
-            query_dict('http://example.com/path?fields=id,formats,subtitles'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?fields=id,formats,subtitles'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
-            query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path?manifest=f4m', {'manifest': []})),
-            query_dict('http://example.com/path'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
-            query_dict('http://example.com/path?system=LINUX'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?system=LINUX'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path', {'fields': b'id,formats,subtitles'})),
-            query_dict('http://example.com/path?fields=id,formats,subtitles'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?fields=id,formats,subtitles'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path', {'width': 1080, 'height': 720})),
-            query_dict('http://example.com/path?width=1080&height=720'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?width=1080&height=720'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path', {'bitrate': 5020.43})),
-            query_dict('http://example.com/path?bitrate=5020.43'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?bitrate=5020.43'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path', {'test': '第二行тест'})),
-            query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
+            parse_qs('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
 
     def test_multipart_encode(self):
         self.assertEqual(
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index fdf41b025..443d2609c 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4248,6 +4248,7 @@ def update_url(url, **kwargs):
        url: compat_str or parsed URL tuple
        if query_update is in kwargs, update query with
        its value instead of replacing (overrides any `query`)
+       NB: query_update expects parse_qs() format: [key: value_list, ...]
        returns: compat_str
     """
     if not kwargs:
@@ -4257,7 +4258,7 @@ def update_url(url, **kwargs):
     query = kwargs.pop('query_update', None)
     if query:
         qs = compat_parse_qs(url.query)
-        qs.update((k, [v]) for k, v in query.items())
+        qs.update(query)
         kwargs['query'] = compat_urllib_parse_urlencode(qs, True)
         kwargs = compat_kwargs(kwargs)
     return compat_urllib_parse.urlunparse(url._replace(**kwargs))

From 00ef748cc0e35ee60efd0f7a00e373ab8d1af86b Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 24 Sep 2023 22:00:13 +0100
Subject: [PATCH 136/156] [downloader] Fix baa6c5e: show ETA of http download
 as ETA instead of total d/l time

---
 youtube_dl/downloader/common.py | 2 +-
 youtube_dl/downloader/http.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index afb4ee33d..91e691776 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -96,7 +96,7 @@ class FileDownloader(object):
                 return None
             return int(float(remaining) / rate)
         start, now = (start_or_rate, now_or_remaining)
-        total, current = args
+        total, current = args[:2]
         if total is None:
             return None
         if now is None:
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index 28a49b9e8..3cad87420 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -294,7 +294,7 @@ class HttpFD(FileDownloader):
 
                 # Progress message
                 speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
-                eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - ctx.resume_len))
+                eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - byte_counter))
 
                 self._hook_progress({
                     'status': 'downloading',

From b7fca0fab36c71fee02d6ecf81acbbaa46942be4 Mon Sep 17 00:00:00 2001
From: ReenigneArcher <42013603+ReenigneArcher@users.noreply.github.com>
Date: Wed, 15 Nov 2023 18:54:31 -0500
Subject: [PATCH 137/156] [Youtube] Update consent cookie handling to match
 site

Apologies for force push!
[skip ci]
---
 youtube_dl/extractor/youtube.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 9c419c002..3bf483c1c 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -260,16 +260,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         cookies = self._get_cookies('https://www.youtube.com/')
         if cookies.get('__Secure-3PSID'):
             return
-        consent_id = None
-        consent = cookies.get('CONSENT')
-        if consent:
-            if 'YES' in consent.value:
-                return
-            consent_id = self._search_regex(
-                r'PENDING\+(\d+)', consent.value, 'consent', default=None)
-        if not consent_id:
-            consent_id = random.randint(100, 999)
-        self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
+        socs = cookies.get('SOCS')
+        if socs and not socs.value.startswith('CAA'):  # not consented
+            return
+        self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True)  # accept all (required for mixes)
 
     def _real_initialize(self):
         self._initialize_consent()

From 4e115e18cbb02ecde30edb736a030cf84bf813e9 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 18 Oct 2023 14:28:10 +0100
Subject: [PATCH 138/156] [workflows/ci.yml] Run apt-get update before
 installing

---
 .github/workflows/ci.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7fb8f9f83..f00fd0c6b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -121,6 +121,12 @@ jobs:
           ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download'  || 'nodownload' }}
           run-tests-ext: sh
     steps:
+    - name: Prepare Linux
+      if: ${{ startswith(matrix.os, 'ubuntu') }}
+      shell: bash
+      run: |
+        # apt in runner, if needed, may not be up-to-date
+        sudo apt-get update
     - name: Checkout
       uses: actions/checkout@v3
     #-------- Python 3 -----
@@ -128,6 +134,7 @@ jobs:
       id: setup-python
       if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != '3.12'}}
       # wrap broken actions/setup-python@v4
+      # NB may run apt-get install in Linux
       uses: ytdl-org/setup-python@v1
       with:
         python-version: ${{ matrix.python-version }}

From 8d227cb97b00a36fa9389bcba2a63ef6db3dbff7 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 28 Nov 2023 16:17:07 +0000
Subject: [PATCH 139/156] [workflows/ci.yml] Actually use default values for
 push and pull_request

---
 .github/workflows/ci.yml | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f00fd0c6b..ca52e0e43 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -9,6 +9,7 @@ env:
 
 on:
   push:
+    # push inputs aren't known to GitHub
     inputs:
       cpython-versions:
         type: string
@@ -17,6 +18,7 @@ on:
         type: string
         default: core
   pull_request:
+    # pull_request inputs aren't known to GitHub
     inputs:
       cpython-versions:
         type: string
@@ -56,6 +58,23 @@ jobs:
       test-set: ${{ steps.run.outputs.test-set }}
       own-pip-versions: ${{ steps.run.outputs.own-pip-versions }}
     steps:
+    # push and pull_request inputs aren't known to GitHub (pt3)
+    - name: Set push defaults
+      if: ${{ github.event_name == 'push' }}
+      env:
+        cpython-versions: all
+        test-set: core
+      run: |
+        echo "cpython-versions=${{env.cpython-versions}}" >> "$GITHUB_ENV"
+        echo "test_set=${{env.test_set}}" >> "$GITHUB_ENV"
+    - name: Get pull_request inputs
+      if: ${{ github.event_name == 'pull_request' }}
+      env:
+        cpython-versions: main
+        test-set: both
+      run: |
+        echo "cpython-versions=${{env.cpython-versions}}" >> "$GITHUB_ENV"
+        echo "test_set=${{env.test_set}}" >> "$GITHUB_ENV"
     - name: Make version array
       id: run
       run: |
@@ -79,6 +98,7 @@ jobs:
         # versions with a special get-pip.py in a per-version subdirectory
         printf 'own-pip-versions=%s\n' \
           "$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT"
+
   tests:
     name: Run tests
     needs: select

From c6538ed323409707fc73e81fb7c93bc62ad11ac1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 28 Nov 2023 18:06:40 +0000
Subject: [PATCH 140/156] [workflows/ci.yml] Use setup-python for now released
 Python 3.12

---
 .github/workflows/ci.yml | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ca52e0e43..93562afd7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -6,6 +6,9 @@ env:
   pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7
   cpython-versions: main
   test-set: core
+  # Python beta version to be built using pyenv before setup-python support
+  # Must also be included in all-cpython-versions 
+  next: 3.13
 
 on:
   push:
@@ -152,7 +155,7 @@ jobs:
     #-------- Python 3 -----
     - name: Set up supported Python ${{ matrix.python-version }}
       id: setup-python
-      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != '3.12'}}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != env.next }}
       # wrap broken actions/setup-python@v4
       # NB may run apt-get install in Linux
       uses: ytdl-org/setup-python@v1
@@ -191,23 +194,23 @@ jobs:
             'import sys' \
             'print(sys.path)' \
             | ${expected} -
-    #-------- Python 3.12 -
-    - name: Set up CPython 3.12 environment
-      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }}
+    #-------- Python next (was 3.12) -
+    - name: Set up CPython 3.next environment
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }}
       shell: bash
       run: |
         PYENV_ROOT=$HOME/.local/share/pyenv
         echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV"
-    - name: Cache Python 3.12
-      id: cache312
-      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }}
+    - name: Cache Python 3.next 
+      id: cachenext
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }}
       uses: actions/cache@v3
       with:
-        key: python-3.12
+        key: python-${{ env.next }}
         path: |
           ${{ env.PYENV_ROOT }}
-    - name: Build and set up Python 3.12
-      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' && ! steps.cache312.outputs.cache-hit }}
+    - name: Build and set up Python 3.next
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next && ! steps.cachenext.outputs.cache-hit }}
       # dl and build locally
       shell: bash
       run: |
@@ -219,12 +222,13 @@ jobs:
         export PYENV_ROOT=${{ env.PYENV_ROOT }}
         export PATH=$PYENV_ROOT/bin:$PATH
         git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
-        pyenv install 3.12.0b4
-    - name: Locate Python 3.12
-      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }}
+        pyenv install ${{ env.next }}
+    - name: Locate Python 3.next
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }}
       shell: bash
       run: |
-        PYTHONHOME="${{ env.PYENV_ROOT }}/versions/3.12.0b4"
+        PYTHONHOME="$(echo "${{ env.PYENV_ROOT }}/versions/${{ env.next }}."*)"
+        test -n "$PYTHONHOME"
         echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV"
         echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV"
     #-------- Python 2.7 --
@@ -395,7 +399,7 @@ jobs:
         done
     #-------- nose --------
     - name: Install nose for Python ${{ matrix.python-version }}
-      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || (matrix.python-impl == 'cpython' && (matrix.python-version == '2.7' || matrix.python-version == '3.12')) }}
+      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || (matrix.python-impl == 'cpython' && (matrix.python-version == '2.7' || matrix.python-version == env.next)) }}
       shell: bash
       run: |
         echo "$PATH"

From 427472351ce6b2fcf5bb35dde32bf9ee5beddd89 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 28 Nov 2023 17:26:37 +0000
Subject: [PATCH 141/156] [utils] Make restricted filenames ignore characters
 in Unicode categories Mark, Other

Resolves #32629
---
 youtube_dl/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 443d2609c..61b94d84c 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2121,7 +2121,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
             return '_'
         if restricted and ord(char) > 127:
-            return '_'
+            return '' if unicodedata.category(char)[0] in 'CM' else '_'
+
         return char
 
     # Replace look-alike Unicode glyphs

From c62936a5f20d941e67d566e74a7c3fc8d8188f7a Mon Sep 17 00:00:00 2001
From: mimvahedi <61986916+mimvahedi@users.noreply.github.com>
Date: Sat, 2 Dec 2023 18:55:09 +0330
Subject: [PATCH 142/156] [telewebion] Fix extraction (#32634)

* [telewebion] fix extraction

Resolves https://github.com/ytdl-org/youtube-dl/issues/5135#issuecomment-932952119

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/telewebion.py | 47 +++++++++++++++---------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/youtube_dl/extractor/telewebion.py b/youtube_dl/extractor/telewebion.py
index 1207b1a1b..30192d74e 100644
--- a/youtube_dl/extractor/telewebion.py
+++ b/youtube_dl/extractor/telewebion.py
@@ -3,17 +3,23 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    url_or_none,
+)
+
 
 class TelewebionIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?telewebion\.com/#!/episode/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?telewebion\.com/(episode|clip)/(?P<id>[a-zA-Z0-9]+)'
 
     _TEST = {
-        'url': 'http://www.telewebion.com/#!/episode/1263668/',
+        'url': 'http://www.telewebion.com/episode/0x1b3139c/',
         'info_dict': {
-            'id': '1263668',
+            'id': '0x1b3139c',
             'ext': 'mp4',
             'title': 'قرعه\u200cکشی لیگ قهرمانان اروپا',
-            'thumbnail': r're:^https?://.*\.jpg',
+            'thumbnail': r're:^https?://static\.telewebion\.com/episodeImages/.*/default',
             'view_count': int,
         },
         'params': {
@@ -25,31 +31,24 @@ class TelewebionIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        secure_token = self._download_webpage(
-            'http://m.s2.telewebion.com/op/op?action=getSecurityToken', video_id)
-        episode_details = self._download_json(
-            'http://m.s2.telewebion.com/op/op', video_id,
-            query={'action': 'getEpisodeDetails', 'episode_id': video_id})
+        episode_details = self._download_json('https://gateway.telewebion.ir/kandoo/episode/getEpisodeDetail/?EpisodeId={0}'.format(video_id), video_id)
+        episode_details = episode_details['body']['queryEpisode'][0]
 
-        m3u8_url = 'http://m.s1.telewebion.com/smil/%s.m3u8?filepath=%s&m3u8=1&secure_token=%s' % (
-            video_id, episode_details['file_path'], secure_token)
+        channel_id = episode_details['channel']['descriptor']
+        episode_image_id = episode_details.get('image')
+        episode_image = 'https://static.telewebion.com/episodeImages/{0}/default'.format(episode_image_id) if episode_image_id else None
+
+        m3u8_url = 'https://cdna.telewebion.com/{0}/episode/{1}/playlist.m3u8'.format(channel_id, video_id)
         formats = self._extract_m3u8_formats(
-            m3u8_url, video_id, ext='mp4', m3u8_id='hls')
-
-        picture_paths = [
-            episode_details.get('picture_path'),
-            episode_details.get('large_picture_path'),
-        ]
-
-        thumbnails = [{
-            'url': picture_path,
-            'preference': idx,
-        } for idx, picture_path in enumerate(picture_paths) if picture_path is not None]
+            m3u8_url, video_id, ext='mp4', m3u8_id='hls',
+            entry_protocol='m3u8_native')
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
             'title': episode_details['title'],
             'formats': formats,
-            'thumbnails': thumbnails,
-            'view_count': episode_details.get('view_count'),
+            'thumbnail': url_or_none(episode_image),
+            'view_count': int_or_none(episode_details.get('view_count')),
+            'duration': float_or_none(episode_details.get('duration')),
         }

From 55a442adaea1eb3dae332fe00179f6dbd437b398 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 5 Dec 2023 20:02:30 +0000
Subject: [PATCH 143/156] [Imgur] Overhaul extractor module (#32612)

Revise extractors for new API and page formats
---
 youtube_dl/extractor/imgur.py | 348 +++++++++++++++++++++++++++-------
 1 file changed, 279 insertions(+), 69 deletions(-)

diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py
index a5ba03efa..59f129d6a 100644
--- a/youtube_dl/extractor/imgur.py
+++ b/youtube_dl/extractor/imgur.py
@@ -1,101 +1,267 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
 
 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
+    ExtractorError,
+    float_or_none,
     int_or_none,
     js_to_json,
+    merge_dicts,
     mimetype2ext,
-    ExtractorError,
+    parse_iso8601,
+    T,
+    traverse_obj,
+    txt_or_none,
+    url_or_none,
 )
 
 
-class ImgurIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P<id>[a-zA-Z0-9]+)'
+class ImgurBaseIE(InfoExtractor):
+    # hard-coded value, as also used by ArchiveTeam
+    _CLIENT_ID = '546c25a59c58ad7'
+
+    @classmethod
+    def _imgur_result(cls, item_id):
+        return cls.url_result('imgur:%s' % item_id, ImgurIE.ie_key(), item_id)
+
+    def _call_api(self, endpoint, video_id, **kwargs):
+        return self._download_json(
+            'https://api.imgur.com/post/v1/%s/%s?client_id=%s&include=media,account' % (endpoint, video_id, self._CLIENT_ID),
+            video_id, **kwargs)
+
+    @staticmethod
+    def get_description(s):
+        if 'Discover the magic of the internet at Imgur' in s:
+            return None
+        return txt_or_none(s)
+
+
+class ImgurIE(ImgurBaseIE):
+    _VALID_URL = r'''(?x)
+        (?:
+            https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)|
+            imgur:
+        )(?P<id>[a-zA-Z0-9]+)
+    '''
 
     _TESTS = [{
-        'url': 'https://i.imgur.com/A61SaA1.gifv',
+        'url': 'https://imgur.com/A61SaA1',
         'info_dict': {
             'id': 'A61SaA1',
             'ext': 'mp4',
             'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
+            'timestamp': 1416446068,
+            'upload_date': '20141120',
         },
     }, {
-        'url': 'https://imgur.com/A61SaA1',
+        'url': 'https://i.imgur.com/A61SaA1.gifv',
         'only_matching': True,
     }, {
         'url': 'https://i.imgur.com/crGpqCV.mp4',
         'only_matching': True,
     }, {
-        # no title
+        # previously, no title
         'url': 'https://i.imgur.com/jxBXAMC.gifv',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'jxBXAMC',
+            'ext': 'mp4',
+            'title': 'Fahaka puffer feeding',
+            'timestamp': 1533835503,
+            'upload_date': '20180809',
+        },
     }]
 
+    def _extract_twitter_formats(self, html, tw_id='twitter', **kwargs):
+        fatal = kwargs.pop('fatal', False)
+        tw_stream = self._html_search_meta('twitter:player:stream', html, fatal=fatal, **kwargs)
+        if not tw_stream:
+            return []
+        ext = mimetype2ext(self._html_search_meta(
+            'twitter:player:stream:content_type', html, default=None))
+        width, height = (int_or_none(self._html_search_meta('twitter:player:' + v, html, default=None))
+                         for v in ('width', 'height'))
+        return [{
+            'format_id': tw_id,
+            'url': tw_stream,
+            'ext': ext or determine_ext(tw_stream),
+            'width': width,
+            'height': height,
+        }]
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        data = self._call_api('media', video_id, fatal=False, expected_status=404)
         webpage = self._download_webpage(
-            'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id)
+            'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id, fatal=not data) or ''
 
-        width = int_or_none(self._og_search_property(
-            'video:width', webpage, default=None))
-        height = int_or_none(self._og_search_property(
-            'video:height', webpage, default=None))
+        if not traverse_obj(data, ('media', 0, (
+                ('type', T(lambda t: t == 'video' or None)),
+                ('metadata', 'is_animated'))), get_all=False):
+            raise ExtractorError(
+                '%s is not a video or animated image' % video_id,
+                expected=True)
+
+        media_fmt = traverse_obj(data, ('media', 0, {
+            'url': ('url', T(url_or_none)),
+            'ext': 'ext',
+            'width': ('width', T(int_or_none)),
+            'height': ('height', T(int_or_none)),
+            'filesize': ('size', T(int_or_none)),
+            'acodec': ('metadata', 'has_sound', T(lambda b: None if b else 'none')),
+        }))
+
+        media_url = traverse_obj(media_fmt, 'url')
+        if media_url:
+            if not media_fmt.get('ext'):
+                media_fmt['ext'] = mimetype2ext(traverse_obj(
+                    data, ('media', 0, 'mime_type'))) or determine_ext(media_url)
+            if traverse_obj(data, ('media', 0, 'type')) == 'image':
+                media_fmt['acodec'] = 'none'
+                media_fmt.setdefault('preference', -10)
+
+        tw_formats = self._extract_twitter_formats(webpage)
+        if traverse_obj(tw_formats, (0, 'url')) == media_url:
+            tw_formats = []
+        else:
+            # maybe this isn't an animated image/video?
+            self._check_formats(tw_formats, video_id)
 
         video_elements = self._search_regex(
             r'(?s)<div class="video-elements">(.*?)</div>',
             webpage, 'video elements', default=None)
-        if not video_elements:
+        if not (video_elements or tw_formats or media_url):
             raise ExtractorError(
-                'No sources found for video %s. Maybe an image?' % video_id,
+                'No sources found for video %s. Maybe a plain image?' % video_id,
                 expected=True)
 
-        formats = []
-        for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
-            formats.append({
-                'format_id': m.group('type').partition('/')[2],
-                'url': self._proto_relative_url(m.group('src')),
-                'ext': mimetype2ext(m.group('type')),
-                'width': width,
-                'height': height,
+        def mung_format(fmt, *extra):
+            fmt.update({
                 'http_headers': {
                     'User-Agent': 'youtube-dl (like wget)',
                 },
             })
+            for d in extra:
+                fmt.update(d)
+            return fmt
 
-        gif_json = self._search_regex(
-            r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
-            webpage, 'GIF code', fatal=False)
-        if gif_json:
-            gifd = self._parse_json(
-                gif_json, video_id, transform_source=js_to_json)
-            formats.append({
-                'format_id': 'gif',
-                'preference': -10,
-                'width': width,
-                'height': height,
-                'ext': 'gif',
-                'acodec': 'none',
-                'vcodec': 'gif',
-                'container': 'gif',
-                'url': self._proto_relative_url(gifd['gifUrl']),
-                'filesize': gifd.get('size'),
-                'http_headers': {
-                    'User-Agent': 'youtube-dl (like wget)',
-                },
-            })
+        if video_elements:
+            def og_get_size(media_type):
+                return dict((p, int_or_none(self._og_search_property(
+                    ':'.join((media_type, p)), webpage, default=None)))
+                    for p in ('width', 'height'))
+
+            size = og_get_size('video')
+            if all(v is None for v in size.values()):
+                size = og_get_size('image')
+
+            formats = traverse_obj(
+                re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements),
+                (Ellipsis, {
+                    'format_id': ('type', T(lambda s: s.partition('/')[2])),
+                    'url': ('src', T(self._proto_relative_url)),
+                    'ext': ('type', T(mimetype2ext)),
+                }, T(lambda f: mung_format(f, size))))
+
+            gif_json = self._search_regex(
+                r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
+                webpage, 'GIF code', fatal=False)
+            MUST_BRANCH = (None, T(lambda _: None))
+            formats.extend(traverse_obj(gif_json, (
+                T(lambda j: self._parse_json(
+                    j, video_id, transform_source=js_to_json, fatal=False)), {
+                        'url': ('gifUrl', T(self._proto_relative_url)),
+                        'filesize': ('size', T(int_or_none)),
+                }, T(lambda f: mung_format(f, size, {
+                    'format_id': 'gif',
+                    'preference': -10,  # gifs are worse than videos
+                    'ext': 'gif',
+                    'acodec': 'none',
+                    'vcodec': 'gif',
+                    'container': 'gif',
+                })), MUST_BRANCH)))
+        else:
+            formats = []
+
+        # maybe add formats from JSON or page Twitter metadata
+        if not any((u == media_url) for u in traverse_obj(formats, (Ellipsis, 'url'))):
+            formats.append(mung_format(media_fmt))
+        tw_url = traverse_obj(tw_formats, (0, 'url'))
+        if not any((u == tw_url) for u in traverse_obj(formats, (Ellipsis, 'url'))):
+            formats.extend(mung_format(f) for f in tw_formats)
 
         self._sort_formats(formats)
 
-        return {
+        return merge_dicts(traverse_obj(data, {
+            'uploader_id': ('account_id', T(txt_or_none),
+                            T(lambda a: a if int_or_none(a) != 0 else None)),
+            'uploader': ('account', 'username', T(txt_or_none)),
+            'uploader_url': ('account', 'avatar_url', T(url_or_none)),
+            'like_count': ('upvote_count', T(int_or_none)),
+            'dislike_count': ('downvote_count', T(int_or_none)),
+            'comment_count': ('comment_count', T(int_or_none)),
+            'age_limit': ('is_mature', T(lambda x: 18 if x else None)),
+            'timestamp': (('updated_at', 'created_at'), T(parse_iso8601)),
+            'release_timestamp': ('created_at', T(parse_iso8601)),
+        }, get_all=False), traverse_obj(data, ('media', 0, 'metadata', {
+            'title': ('title', T(txt_or_none)),
+            'description': ('description', T(self.get_description)),
+            'duration': ('duration', T(float_or_none)),
+            'timestamp': (('updated_at', 'created_at'), T(parse_iso8601)),
+            'release_timestamp': ('created_at', T(parse_iso8601)),
+        })), {
             'id': video_id,
             'formats': formats,
-            'title': self._og_search_title(webpage, default=video_id),
-        }
+            'title': self._og_search_title(webpage, default='Imgur video ' + video_id),
+            'description': self.get_description(self._og_search_description(webpage)),
+            'thumbnail': url_or_none(self._html_search_meta('thumbnailUrl', webpage, default=None)),
+        })
 
 
-class ImgurGalleryIE(InfoExtractor):
+class ImgurGalleryBaseIE(ImgurBaseIE):
+    _GALLERY = True
+
+    def _real_extract(self, url):
+        gallery_id = self._match_id(url)
+
+        data = self._call_api('albums', gallery_id, fatal=False, expected_status=404)
+
+        info = traverse_obj(data, {
+            'title': ('title', T(txt_or_none)),
+            'description': ('description', T(self.get_description)),
+        })
+
+        if traverse_obj(data, 'is_album'):
+
+            def yield_media_ids():
+                for m_id in traverse_obj(data, (
+                        'media', lambda _, v: v.get('type') == 'video' or v['metadata']['is_animated'],
+                        'id', T(txt_or_none))):
+                    yield m_id
+
+            # if a gallery with exactly one video, apply album metadata to video
+            media_id = (
+                self._GALLERY
+                and traverse_obj(data, ('image_count', T(lambda c: c == 1)))
+                and next(yield_media_ids(), None))
+
+            if not media_id:
+                result = self.playlist_result(
+                    map(self._imgur_result, yield_media_ids()), gallery_id)
+                result.update(info)
+                return result
+            gallery_id = media_id
+
+        result = self._imgur_result(gallery_id)
+        info['_type'] = 'url_transparent'
+        result.update(info)
+        return result
+
+
+class ImgurGalleryIE(ImgurGalleryBaseIE):
     IE_NAME = 'imgur:gallery'
     _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
 
@@ -106,49 +272,93 @@ class ImgurGalleryIE(InfoExtractor):
             'title': 'Adding faces make every GIF better',
         },
         'playlist_count': 25,
+        'skip': 'Zoinks! You\'ve taken a wrong turn.',
     }, {
+        # TODO: static images - replace with animated/video gallery
         'url': 'http://imgur.com/topic/Aww/ll5Vk',
         'only_matching': True,
     }, {
         'url': 'https://imgur.com/gallery/YcAQlkx',
+        'add_ies': ['Imgur'],
         'info_dict': {
             'id': 'YcAQlkx',
             'ext': 'mp4',
             'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
-        }
+            'timestamp': 1358554297,
+            'upload_date': '20130119',
+            'uploader_id': '1648642',
+            'uploader': 'wittyusernamehere',
+        },
     }, {
+        # TODO: static image - replace with animated/video gallery
         'url': 'http://imgur.com/topic/Funny/N8rOudd',
         'only_matching': True,
     }, {
         'url': 'http://imgur.com/r/aww/VQcQPhM',
-        'only_matching': True,
+        'add_ies': ['Imgur'],
+        'info_dict': {
+            'id': 'VQcQPhM',
+            'ext': 'mp4',
+            'title': 'The boss is here',
+            'timestamp': 1476494751,
+            'upload_date': '20161015',
+            'uploader_id': '19138530',
+            'uploader': 'thematrixcam',
+        },
+    },
+        # from PR #16674
+        {
+        'url': 'https://imgur.com/t/unmuted/6lAn9VQ',
+        'info_dict': {
+            'id': '6lAn9VQ',
+            'title': 'Penguins !',
+        },
+        'playlist_count': 3,
+    }, {
+        'url': 'https://imgur.com/t/unmuted/kx2uD3C',
+        'add_ies': ['Imgur'],
+        'info_dict': {
+            'id': 'ZVMv45i',
+            'ext': 'mp4',
+            'title': 'Intruder',
+            'timestamp': 1528129683,
+            'upload_date': '20180604',
+        },
+    }, {
+        'url': 'https://imgur.com/t/unmuted/wXSK0YH',
+        'add_ies': ['Imgur'],
+        'info_dict': {
+            'id': 'JCAP4io',
+            'ext': 'mp4',
+            'title': 're:I got the blues$',
+            'description': 'Luka’s vocal stylings.\n\nFP edit: don’t encourage me. I’ll never stop posting Luka and friends.',
+            'timestamp': 1527809525,
+            'upload_date': '20180531',
+        },
     }]
 
-    def _real_extract(self, url):
-        gallery_id = self._match_id(url)
 
-        data = self._download_json(
-            'https://imgur.com/gallery/%s.json' % gallery_id,
-            gallery_id)['data']['image']
-
-        if data.get('is_album'):
-            entries = [
-                self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash'])
-                for image in data['album_images']['images'] if image.get('hash')]
-            return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description'))
-
-        return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
-
-
-class ImgurAlbumIE(ImgurGalleryIE):
+class ImgurAlbumIE(ImgurGalleryBaseIE):
     IE_NAME = 'imgur:album'
     _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
-
+    _GALLERY = False
     _TESTS = [{
+        # TODO: only static images - replace with animated/video gallery
         'url': 'http://imgur.com/a/j6Orj',
+        'only_matching': True,
+    },
+        # from PR #21693
+        {
+        'url': 'https://imgur.com/a/iX265HX',
         'info_dict': {
-            'id': 'j6Orj',
-            'title': 'A Literary Analysis of "Star Wars: The Force Awakens"',
+            'id': 'iX265HX',
+            'title': 'enen-no-shouboutai'
         },
-        'playlist_count': 12,
+        'playlist_count': 2,
+    }, {
+        'url': 'https://imgur.com/a/8pih2Ed',
+        'info_dict': {
+            'id': '8pih2Ed'
+        },
+        'playlist_mincount': 1,
     }]

From b1bbc1e50277e240419eb1308e444ac8a5da4320 Mon Sep 17 00:00:00 2001
From: Robotix <82544307+realRobotix@users.noreply.github.com>
Date: Wed, 6 Dec 2023 02:17:57 +0100
Subject: [PATCH 144/156] [Epidemic Sound] Add new extractor (#32628)

* Add simple extractor
* Support separate tracks
* Use index as id instead of slug

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/epidemicsound.py | 101 ++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py    |   1 +
 2 files changed, 102 insertions(+)
 create mode 100644 youtube_dl/extractor/epidemicsound.py

diff --git a/youtube_dl/extractor/epidemicsound.py b/youtube_dl/extractor/epidemicsound.py
new file mode 100644
index 000000000..1a52738aa
--- /dev/null
+++ b/youtube_dl/extractor/epidemicsound.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    T,
+    traverse_obj,
+    txt_or_none,
+    unified_timestamp,
+    url_or_none,
+)
+
+
+class EpidemicSoundIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/track/(?P<id>[0-9a-zA-Z]+)'
+    _TESTS = [{
+        'url': 'https://www.epidemicsound.com/track/yFfQVRpSPz/',
+        'md5': 'd98ff2ddb49e8acab9716541cbc9dfac',
+        'info_dict': {
+            'id': '45014',
+            'display_id': 'yFfQVRpSPz',
+            'ext': 'mp3',
+            'tags': ['foley', 'door', 'knock', 'glass', 'window', 'glass door knock'],
+            'title': 'Door Knock Door 1',
+            'duration': 1,
+            'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/default-sfx/3000x3000.jpg',
+            'timestamp': 1415320353,
+            'upload_date': '20141107',
+            'age_limit': None,
+            # check that the "best" format was found, since test file MD5 doesn't
+            # distinguish the formats
+            'format': 'full',
+        },
+    }, {
+        'url': 'https://www.epidemicsound.com/track/mj8GTTwsZd/',
+        'md5': 'c82b745890f9baf18dc2f8d568ee3830',
+        'info_dict': {
+            'id': '148700',
+            'display_id': 'mj8GTTwsZd',
+            'ext': 'mp3',
+            'tags': ['liquid drum n bass', 'energetic'],
+            'title': 'Noplace',
+            'duration': 237,
+            'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/11138/3000x3000.jpg',
+            'timestamp': 1694426482,
+            'release_timestamp': 1700535606,
+            'upload_date': '20230911',
+            'age_limit': None,
+            'format': 'full',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        json_data = self._download_json('https://www.epidemicsound.com/json/track/' + video_id, video_id)
+
+        def fmt_or_none(f):
+            if not f.get('format'):
+                f['format'] = f.get('format_id')
+            elif not f.get('format_id'):
+                f['format_id'] = f['format']
+            if not (f['url'] and f['format']):
+                return
+            if f.get('format_note'):
+                f['format_note'] = 'track ID ' + f['format_note']
+            f['preference'] = -1 if f['format'] == 'full' else -2
+            return f
+
+        formats = traverse_obj(json_data, (
+            'stems', T(dict.items), Ellipsis, {
+                'format': (0, T(txt_or_none)),
+                'format_note': (1, 's3TrackId', T(txt_or_none)),
+                'format_id': (1, 'stemType', T(txt_or_none)),
+                'url': (1, 'lqMp3Url', T(url_or_none)),
+            }, T(fmt_or_none)))
+
+        self._sort_formats(formats)
+
+        info = traverse_obj(json_data, {
+            'id': ('id', T(txt_or_none)),
+            'tags': ('metadataTags', Ellipsis, T(txt_or_none)),
+            'title': ('title', T(txt_or_none)),
+            'duration': ('length', T(float_or_none)),
+            'timestamp': ('added', T(unified_timestamp)),
+            'thumbnail': (('imageUrl', 'cover'), T(url_or_none)),
+            'age_limit': ('isExplicit', T(lambda b: 18 if b else None)),
+            'release_timestamp': ('releaseDate', T(unified_timestamp)),
+        }, get_all=False)
+
+        info.update(traverse_obj(json_data, {
+            'categories': ('genres', Ellipsis, 'tag', T(txt_or_none)),
+            'tags': ('metadataTags', Ellipsis, T(txt_or_none)),
+        }))
+
+        info.update({
+            'display_id': video_id,
+            'formats': formats,
+        })
+
+        return info
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index d9289e5bf..82221445f 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -357,6 +357,7 @@ from .ellentube import (
 from .elpais import ElPaisIE
 from .embedly import EmbedlyIE
 from .engadget import EngadgetIE
+from .epidemicsound import EpidemicSoundIE
 from .eporner import EpornerIE
 from .eroprofile import EroProfileIE
 from .escapist import EscapistIE

From be008e657d79832642e2158557c899249c9e31cd Mon Sep 17 00:00:00 2001
From: mk-pmb <mk-pmb@users.noreply.github.com>
Date: Wed, 13 Sep 2023 20:57:05 +0200
Subject: [PATCH 145/156] [core] Fix format string injection for metadata JSON
 filename message.

---
 youtube_dl/YoutubeDL.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 13a41928f..6f2aba5ac 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -2635,12 +2635,12 @@ class YoutubeDL(object):
             self.to_screen(msg('[info] %s is already present', label.title()))
             return 'exists'
         else:
-            self.to_screen(msg('[info] Writing %s as JSON to: ' + infofn, label))
+            self.to_screen(msg('[info] Writing %s as JSON to: ', label) + infofn)
             try:
                 write_json_file(self.filter_requested_info(info_dict), infofn)
                 return True
             except (OSError, IOError):
-                self.report_error(msg('Cannot write %s to JSON file ' + infofn, label))
+                self.report_error(msg('Cannot write %s to JSON file ', label) + infofn)
                 return
 
     def _write_thumbnails(self, info_dict, filename):

From 66518714169185195a359e173cef73fba31d76b8 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 20 Jan 2024 18:28:52 +0000
Subject: [PATCH 146/156] [compat] Rework compat for `method` parameter of
 `compat_urllib_request.Request` constructor * fixes #32573 * does not break
 `utils.HEADrequest` (eg)

---
 test/test_compat.py  | 14 ++++++++++++++
 youtube_dl/compat.py | 27 +++++++++++++++++----------
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/test/test_compat.py b/test/test_compat.py
index e233b1ae1..b83c8cb41 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -23,6 +23,7 @@ from youtube_dl.compat import (
     compat_urllib_parse_unquote,
     compat_urllib_parse_unquote_plus,
     compat_urllib_parse_urlencode,
+    compat_urllib_request,
 )
 
 
@@ -135,6 +136,19 @@ class TestCompat(unittest.TestCase):
         self.assertEqual(compat_casefold('\u03a3'), '\u03c3')
         self.assertEqual(compat_casefold('A\u0345\u03a3'), 'a\u03b9\u03c3')
 
+    def test_compat_urllib_request_Request(self):
+        self.assertEqual(
+            compat_urllib_request.Request('http://127.0.0.1', method='PUT').get_method(),
+            'PUT')
+
+        class PUTrequest(compat_urllib_request.Request):
+            def get_method(self):
+                return 'PUT'
+
+        self.assertEqual(
+            PUTrequest('http://127.0.0.1').get_method(),
+            'PUT')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 3c526a78d..818ccebd0 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -58,19 +58,26 @@ except ImportError:  # Python 2
 
 # Also fix up lack of method arg in old Pythons
 try:
-    _req = compat_urllib_request.Request
-    _req('http://127.0.0.1', method='GET')
+    type(compat_urllib_request.Request('http://127.0.0.1', method='GET'))
 except TypeError:
-    class _request(object):
-        def __new__(cls, url, *args, **kwargs):
-            method = kwargs.pop('method', None)
-            r = _req(url, *args, **kwargs)
-            if method:
-                r.get_method = types.MethodType(lambda _: method, r)
-            return r
+    def _add_init_method_arg(cls):
 
-    compat_urllib_request.Request = _request
+        init = cls.__init__
 
+        def wrapped_init(self, *args, **kwargs):
+            method = kwargs.pop('method', 'GET')
+            init(self, *args, **kwargs)
+            if any(callable(x.__dict__.get('get_method')) for x in (self.__class__, self) if x != cls):
+                # allow instance or its subclass to override get_method()
+                return
+            if self.has_data() and method == 'GET':
+                method = 'POST'
+            self.get_method = types.MethodType(lambda _: method, self)
+
+        cls.__init__ = wrapped_init
+
+    _add_init_method_arg(compat_urllib_request.Request)
+    del _add_init_method_arg
 
 try:
     import urllib.error as compat_urllib_error

From 640d39f03ae80a0b8d0605a711d97c10f6edbd3f Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 15 Jan 2024 18:32:06 +0000
Subject: [PATCH 147/156] [InfoExtractor] Support some warning and
 `._downloader` shortcut methods from yt-dlp

---
 youtube_dl/extractor/common.py | 56 ++++++++++++++++++++++++++++++++--
 1 file changed, 53 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 0eca9f844..d33557135 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -596,6 +596,14 @@ class InfoExtractor(object):
         """Sets the downloader for this IE."""
         self._downloader = downloader
 
+    @property
+    def cache(self):
+        return self._downloader.cache
+
+    @property
+    def cookiejar(self):
+        return self._downloader.cookiejar
+
     def _real_initialize(self):
         """Real initialization process. Redefine in subclasses."""
         pass
@@ -942,14 +950,47 @@ class InfoExtractor(object):
             else:
                 self.report_warning(errmsg + str(ve))
 
-    def report_warning(self, msg, video_id=None):
+    def __ie_msg(self, *msg):
+        return '[{0}] {1}'.format(self.IE_NAME, ''.join(msg))
+
+    # msg, video_id=None, *args, only_once=False, **kwargs
+    def report_warning(self, msg, *args, **kwargs):
+        if len(args) > 0:
+            video_id = args[0]
+            args = args[1:]
+        else:
+            video_id = kwargs.pop('video_id', None)
         idstr = '' if video_id is None else '%s: ' % video_id
         self._downloader.report_warning(
-            '[%s] %s%s' % (self.IE_NAME, idstr, msg))
+            self.__ie_msg(idstr, msg), *args, **kwargs)
 
     def to_screen(self, msg):
         """Print msg to screen, prefixing it with '[ie_name]'"""
-        self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg))
+        self._downloader.to_screen(self.__ie_msg(msg))
+
+    def write_debug(self, msg, only_once=False, _cache=[]):
+        '''Log debug message or Print message to stderr'''
+        if not self.get_param('verbose', False):
+            return
+        message = '[debug] ' + self.__ie_msg(msg)
+        logger = self.get_param('logger')
+        if logger:
+            logger.debug(message)
+        else:
+            if only_once and hash(message) in _cache:
+                return
+            self._downloader.to_stderr(message)
+            _cache.append(hash(message))
+
+    # name, default=None, *args, **kwargs
+    def get_param(self, name, *args, **kwargs):
+        default, args = (args[0], args[1:]) if len(args) > 0 else (kwargs.pop('default', None), args)
+        if self._downloader:
+            return self._downloader.params.get(name, default, *args, **kwargs)
+        return default
+
+    def report_drm(self, video_id):
+        self.raise_no_formats('This video is DRM protected', expected=True, video_id=video_id)
 
     def report_extraction(self, id_or_name):
         """Report information extraction."""
@@ -977,6 +1018,15 @@ class InfoExtractor(object):
     def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
         raise GeoRestrictedError(msg, countries=countries)
 
+    def raise_no_formats(self, msg, expected=False, video_id=None):
+        if expected and (
+                self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
+            self.report_warning(msg, video_id)
+        elif isinstance(msg, ExtractorError):
+            raise msg
+        else:
+            raise ExtractorError(msg, expected=expected, video_id=video_id)
+
     # Methods for following #608
     @staticmethod
     def url_result(url, ie=None, video_id=None, video_title=None):

From f8b0135850f39609f72002f5426883859579fc51 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 15 Jan 2024 18:34:21 +0000
Subject: [PATCH 148/156] [YouTube] Rework n-sig processing, realigning with
 yt-dlp * apply n-sig before chunked fragments, fixes #32692

---
 youtube_dl/extractor/youtube.py | 488 +++++++++++++++++++-------------
 1 file changed, 296 insertions(+), 192 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 3bf483c1c..cd4b3ef60 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2,6 +2,7 @@
 
 from __future__ import unicode_literals
 
+import collections
 import itertools
 import json
 import os.path
@@ -23,10 +24,10 @@ from ..compat import (
 )
 from ..jsinterp import JSInterpreter
 from ..utils import (
-    ExtractorError,
     clean_html,
     dict_get,
     error_to_compat_str,
+    ExtractorError,
     float_or_none,
     extract_attributes,
     get_element_by_attribute,
@@ -36,6 +37,7 @@ from ..utils import (
     LazyList,
     merge_dicts,
     mimetype2ext,
+    NO_DEFAULT,
     parse_codecs,
     parse_duration,
     parse_qs,
@@ -45,6 +47,7 @@ from ..utils import (
     str_or_none,
     str_to_int,
     traverse_obj,
+    try_call,
     try_get,
     txt_or_none,
     unescapeHTML,
@@ -1460,6 +1463,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         self._code_cache = {}
         self._player_cache = {}
 
+    # *ytcfgs, webpage=None
+    def _extract_player_url(self, *ytcfgs, **kw_webpage):
+        if ytcfgs and not isinstance(ytcfgs[0], dict):
+            webpage = kw_webpage.get('webpage') or ytcfgs[0]
+        if webpage:
+            player_url = self._search_regex(
+                r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
+                webpage or '', 'player URL', fatal=False)
+            if player_url:
+                ytcfgs = ytcfgs + ({'PLAYER_JS_URL': player_url},)
+        return traverse_obj(
+            ytcfgs, (Ellipsis, 'PLAYER_JS_URL'), (Ellipsis, 'WEB_PLAYER_CONTEXT_CONFIGS', Ellipsis, 'jsUrl'),
+            get_all=False, expected_type=lambda u: urljoin('https://www.youtube.com', u))
+
+    def _download_player_url(self, video_id, fatal=False):
+        res = self._download_webpage(
+            'https://www.youtube.com/iframe_api',
+            note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
+        player_version = self._search_regex(
+            r'player\\?/([0-9a-fA-F]{8})\\?/', res or '', 'player version', fatal=fatal,
+            default=NO_DEFAULT if res else None)
+        if player_version:
+            return 'https://www.youtube.com/s/player/{0}/player_ias.vflset/en_US/base.js'.format(player_version)
+
     def _signature_cache_id(self, example_sig):
         """ Return a string representation of a signature """
         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
@@ -1474,46 +1501,49 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             raise ExtractorError('Cannot identify player %r' % player_url)
         return id_m.group('id')
 
-    def _get_player_code(self, video_id, player_url, player_id=None):
+    def _load_player(self, video_id, player_url, fatal=True, player_id=None):
         if not player_id:
             player_id = self._extract_player_info(player_url)
-
         if player_id not in self._code_cache:
-            self._code_cache[player_id] = self._download_webpage(
-                player_url, video_id,
+            code = self._download_webpage(
+                player_url, video_id, fatal=fatal,
                 note='Downloading player ' + player_id,
                 errnote='Download of %s failed' % player_url)
-        return self._code_cache[player_id]
+            if code:
+                self._code_cache[player_id] = code
+        return self._code_cache[player_id] if fatal else self._code_cache.get(player_id)
 
     def _extract_signature_function(self, video_id, player_url, example_sig):
         player_id = self._extract_player_info(player_url)
 
         # Read from filesystem cache
-        func_id = 'js_%s_%s' % (
+        func_id = 'js_{0}_{1}'.format(
             player_id, self._signature_cache_id(example_sig))
         assert os.path.basename(func_id) == func_id
 
-        cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
-        if cache_spec is not None:
-            return lambda s: ''.join(s[i] for i in cache_spec)
+        self.write_debug('Extracting signature function {0}'.format(func_id))
+        cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
 
-        code = self._get_player_code(video_id, player_url, player_id)
-        res = self._parse_sig_js(code)
+        if not cache_spec:
+            code = self._load_player(video_id, player_url, player_id)
+        if code:
+            res = self._parse_sig_js(code)
+            test_string = ''.join(map(compat_chr, range(len(example_sig))))
+            cache_spec = [ord(c) for c in res(test_string)]
+            self.cache.store('youtube-sigfuncs', func_id, cache_spec)
 
-        test_string = ''.join(map(compat_chr, range(len(example_sig))))
-        cache_res = res(test_string)
-        cache_spec = [ord(c) for c in cache_res]
-
-        self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
-        return res
+        return lambda s: ''.join(s[i] for i in cache_spec)
 
     def _print_sig_code(self, func, example_sig):
+        if not self.get_param('youtube_print_sig_code'):
+            return
+
         def gen_sig_code(idxs):
             def _genslice(start, end, step):
                 starts = '' if start == 0 else str(start)
                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
                 steps = '' if step == 1 else (':%d' % step)
-                return 's[%s%s%s]' % (starts, ends, steps)
+                return 's[{0}{1}{2}]'.format(starts, ends, steps)
 
             step = None
             # Quelch pyflakes warnings - start will be set when step is set
@@ -1564,143 +1594,137 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             jscode, 'Initial JS player signature function name', group='sig')
 
         jsi = JSInterpreter(jscode)
-
         initial_function = jsi.extract_function(funcname)
-
         return lambda s: initial_function([s])
 
+    def _cached(self, func, *cache_id):
+        def inner(*args, **kwargs):
+            if cache_id not in self._player_cache:
+                try:
+                    self._player_cache[cache_id] = func(*args, **kwargs)
+                except ExtractorError as e:
+                    self._player_cache[cache_id] = e
+                except Exception as e:
+                    self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
+
+            ret = self._player_cache[cache_id]
+            if isinstance(ret, Exception):
+                raise ret
+            return ret
+        return inner
+
     def _decrypt_signature(self, s, video_id, player_url):
         """Turn the encrypted s field into a working signature"""
-
-        if player_url is None:
-            raise ExtractorError('Cannot decrypt signature without player_url')
-
-        try:
-            player_id = (player_url, self._signature_cache_id(s))
-            if player_id not in self._player_cache:
-                func = self._extract_signature_function(
-                    video_id, player_url, s
-                )
-                self._player_cache[player_id] = func
-            func = self._player_cache[player_id]
-            if self._downloader.params.get('youtube_print_sig_code'):
-                self._print_sig_code(func, s)
-            return func(s)
-        except Exception as e:
-            tb = traceback.format_exc()
-            raise ExtractorError(
-                'Signature extraction failed: ' + tb, cause=e)
-
-    def _extract_player_url(self, webpage):
-        player_url = self._search_regex(
-            r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
-            webpage or '', 'player URL', fatal=False)
-        if not player_url:
-            return
-        if player_url.startswith('//'):
-            player_url = 'https:' + player_url
-        elif not re.match(r'https?://', player_url):
-            player_url = compat_urllib_parse.urljoin(
-                'https://www.youtube.com', player_url)
-        return player_url
+        extract_sig = self._cached(
+            self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
+        func = extract_sig(video_id, player_url, s)
+        self._print_sig_code(func, s)
+        return func(s)
 
     # from yt-dlp
     # See also:
     # 1. https://github.com/ytdl-org/youtube-dl/issues/29326#issuecomment-894619419
     # 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
     # 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
-    def _extract_n_function_name(self, jscode):
-        target = r'(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?'
-        nfunc_and_idx = self._search_regex(
-            r'\.get\("n"\)\)&&\(b=(%s)\([\w$]+\)' % (target, ),
-            jscode, 'Initial JS player n function name')
-        nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
-        if not idx:
-            return nfunc
-
-        VAR_RE_TMPL = r'var\s+%s\s*=\s*(?P<name>\[(?P<alias>%s)\])[;,]'
-        note = 'Initial JS player n function {0} (%s[%s])' % (nfunc, idx)
-
-        def search_function_code(needle, group):
-            return self._search_regex(
-                VAR_RE_TMPL % (re.escape(nfunc), needle), jscode,
-                note.format(group), group=group)
-
-        if int_or_none(idx) == 0:
-            real_nfunc = search_function_code(r'[a-zA-Z_$][\w$]*', group='alias')
-            if real_nfunc:
-                return real_nfunc
-        return self._parse_json(
-            search_function_code('.+?', group='name'),
-            nfunc, transform_source=js_to_json)[int(idx)]
-
-    def _extract_n_function(self, video_id, player_url):
-        player_id = self._extract_player_info(player_url)
-        func_code = self._downloader.cache.load('youtube-nsig', player_id)
-
-        if func_code:
-            jsi = JSInterpreter(func_code)
-        else:
-            jscode = self._get_player_code(video_id, player_url, player_id)
-            funcname = self._extract_n_function_name(jscode)
-            jsi = JSInterpreter(jscode)
-            func_code = jsi.extract_function_code(funcname)
-            self._downloader.cache.store('youtube-nsig', player_id, func_code)
-
-        if self._downloader.params.get('youtube_print_sig_code'):
-            self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(player_id, func_code[1]))
-
-        return lambda s: jsi.extract_function_from_code(*func_code)([s])
-
-    def _n_descramble(self, n_param, player_url, video_id):
-        """Compute the response to YT's "n" parameter challenge,
-           or None
-
-        Args:
-        n_param     -- challenge string that is the value of the
-                       URL's "n" query parameter
-        player_url  -- URL of YT player JS
-        video_id
-        """
-
-        sig_id = ('nsig_value', n_param)
-        if sig_id in self._player_cache:
-            return self._player_cache[sig_id]
+    def _decrypt_nsig(self, n, video_id, player_url):
+        """Turn the encrypted n field into a working signature"""
+        if player_url is None:
+            raise ExtractorError('Cannot decrypt nsig without player_url')
 
         try:
-            player_id = ('nsig', player_url)
-            if player_id not in self._player_cache:
-                self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
-            func = self._player_cache[player_id]
-            ret = func(n_param)
-            if ret.startswith('enhanced_except_'):
-                raise ExtractorError('Unhandled exception in decode')
-            self._player_cache[sig_id] = ret
-            if self._downloader.params.get('verbose', False):
-                self._downloader.to_screen('[debug] [%s] %s' % (self.IE_NAME, 'Decrypted nsig {0} => {1}'.format(n_param, self._player_cache[sig_id])))
-            return self._player_cache[sig_id]
-        except Exception as e:
-            self._downloader.report_warning(
-                '[%s] %s (%s %s)' % (
-                    self.IE_NAME,
-                    'Unable to decode n-parameter: download likely to be throttled',
+            jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
+        except ExtractorError as e:
+            raise ExtractorError('Unable to extract nsig jsi, player_id, func_codefunction code', cause=e)
+        if self.get_param('youtube_print_sig_code'):
+            self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(
+                player_id, func_code[1]))
+
+        try:
+            extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
+            ret = extract_nsig(jsi, func_code)(n)
+        except JSInterpreter.Exception as e:
+            self.report_warning(
+                '%s (%s %s)' % (
+                    self.__ie_msg(
+                        'Unable to decode n-parameter: download likely to be throttled'),
                     error_to_compat_str(e),
                     traceback.format_exc()))
+            return
+
+        self.write_debug('Decrypted nsig {0} => {1}'.format(n, ret))
+        return ret
+
+    def _extract_n_function_name(self, jscode):
+        func_name, idx = self._search_regex(
+            r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?\([\w$]+\)',
+            jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
+        if not idx:
+            return func_name
+
+        return self._parse_json(self._search_regex(
+            r'var {0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
+            'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
+            func_name, transform_source=js_to_json)[int(idx)]
+
+    def _extract_n_function_code(self, video_id, player_url):
+        player_id = self._extract_player_info(player_url)
+        func_code = self.cache.load('youtube-nsig', player_id)
+        jscode = func_code or self._load_player(video_id, player_url)
+        jsi = JSInterpreter(jscode)
+
+        if func_code:
+            return jsi, player_id, func_code
+
+        func_name = self._extract_n_function_name(jscode)
+
+        # For redundancy
+        func_code = self._search_regex(
+            r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
+                     # NB: The end of the regex is intentionally kept strict
+                     {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
+            jscode, 'nsig function', group=('var', 'code'), default=None)
+        if func_code:
+            func_code = ([func_code[0]], func_code[1])
+        else:
+            self.write_debug('Extracting nsig function with jsinterp')
+            func_code = jsi.extract_function_code(func_name)
+
+        self.cache.store('youtube-nsig', player_id, func_code)
+        return jsi, player_id, func_code
+
+    def _extract_n_function_from_code(self, jsi, func_code):
+        func = jsi.extract_function_from_code(*func_code)
+
+        def extract_nsig(s):
+            try:
+                ret = func([s])
+            except JSInterpreter.Exception:
+                raise
+            except Exception as e:
+                raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
+
+            if ret.startswith('enhanced_except_'):
+                raise JSInterpreter.Exception('Signature function returned an exception')
+            return ret
+
+        return extract_nsig
+
+    def _unthrottle_format_urls(self, video_id, player_url, *formats):
+
+        def decrypt_nsig(n):
+            return self._cached(self._decrypt_nsig, 'nsig', n, player_url)
 
-    def _unthrottle_format_urls(self, video_id, player_url, formats):
         for fmt in formats:
             parsed_fmt_url = compat_urllib_parse.urlparse(fmt['url'])
             n_param = compat_parse_qs(parsed_fmt_url.query).get('n')
             if not n_param:
                 continue
             n_param = n_param[-1]
-            n_response = self._n_descramble(n_param, player_url, video_id)
+            n_response = decrypt_nsig(n_param)(n_param, video_id, player_url)
             if n_response is None:
                 # give up if descrambling failed
                 break
-            for fmt_dct in traverse_obj(fmt, (None, (None, ('fragments', Ellipsis))), expected_type=dict):
-                fmt_dct['url'] = update_url(
-                    fmt_dct['url'], query_update={'n': [n_response]})
+            fmt['url'] = update_url_query(fmt['url'], {'n': n_response})
 
     # from yt-dlp, with tweaks
     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
@@ -1708,16 +1732,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         Extract signatureTimestamp (sts)
         Required to tell API what sig/player version is in use.
         """
-        sts = int_or_none(ytcfg.get('STS')) if isinstance(ytcfg, dict) else None
+        sts = traverse_obj(ytcfg, 'STS', expected_type=int)
         if not sts:
             # Attempt to extract from player
             if player_url is None:
                 error_msg = 'Cannot extract signature timestamp without player_url.'
                 if fatal:
                     raise ExtractorError(error_msg)
-                self._downloader.report_warning(error_msg)
+                self.report_warning(error_msg)
                 return
-            code = self._get_player_code(video_id, player_url)
+            code = self._load_player(video_id, player_url, fatal=fatal)
             sts = int_or_none(self._search_regex(
                 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '',
                 'JS player signature timestamp', group='sts', fatal=fatal))
@@ -1733,12 +1757,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         # cpn generation algorithm is reverse engineered from base.js.
         # In fact it works even with dummy cpn.
         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
-        cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
+        cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
 
-        playback_url = update_url(
-            playback_url, query_update={
-                'ver': ['2'],
-                'cpn': [cpn],
+        # more consistent results setting it to right before the end
+        qs = parse_qs(playback_url)
+        video_length = '{0}'.format(float((qs.get('len') or ['1.5'])[0]) - 1)
+
+        playback_url = update_url_query(
+            playback_url, {
+                'ver': '2',
+                'cpn': cpn,
+                'cmt': video_length,
+                'el': 'detailpage',  # otherwise defaults to "shorts"
             })
 
         self._download_webpage(
@@ -1986,8 +2016,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             else:
                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
 
+        if not player_url:
+            player_url = self._extract_player_url(webpage)
+
         formats = []
-        itags = []
+        itags = collections.defaultdict(set)
         itag_qualities = {}
         q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
         CHUNK_SIZE = 10 << 20
@@ -2003,58 +2036,92 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 })
             } for range_start in range(0, f['filesize'], CHUNK_SIZE))
 
+        lower = lambda s: s.lower()
+
         for fmt in streaming_formats:
-            if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
+            if fmt.get('targetDurationSec'):
                 continue
 
             itag = str_or_none(fmt.get('itag'))
-            quality = fmt.get('quality')
-            if itag and quality:
+            audio_track = traverse_obj(fmt, ('audioTrack', T(dict))) or {}
+
+            quality = traverse_obj(fmt, ((
+                # The 3gp format (17) in android client has a quality of "small",
+                # but is actually worse than other formats
+                T(lambda _: 'tiny' if itag == 17 else None),
+                ('quality', T(lambda q: q if q and q != 'tiny' else None)),
+                ('audioQuality', T(lower)),
+                'quality'), T(txt_or_none)), get_all=False)
+            if quality and itag:
                 itag_qualities[itag] = quality
             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
-            # number of fragment that would subsequently requested with (`&sq=N`)
+            # number of fragments that would subsequently be requested with (`&sq=N`)
             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
                 continue
 
             fmt_url = fmt.get('url')
             if not fmt_url:
                 sc = compat_parse_qs(fmt.get('signatureCipher'))
-                fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
-                encrypted_sig = try_get(sc, lambda x: x['s'][0])
-                if not (sc and fmt_url and encrypted_sig):
+                fmt_url = traverse_obj(sc, ('url', -1, T(url_or_none)))
+                encrypted_sig = traverse_obj(sc, ('s', -1))
+                if not (fmt_url and encrypted_sig):
                     continue
-                if not player_url:
-                    player_url = self._extract_player_url(webpage)
+                player_url = player_url or self._extract_player_url(webpage)
                 if not player_url:
                     continue
-                signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
-                sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
-                fmt_url += '&' + sp + '=' + signature
+                try:
+                    fmt_url = update_url_query(fmt_url, {
+                        traverse_obj(sc, ('sp', -1)) or 'signature':
+                            [self._decrypt_signature(encrypted_sig, video_id, player_url)],
+                    })
+                except ExtractorError as e:
+                    self.report_warning('Signature extraction failed: Some formats may be missing',
+                                        video_id=video_id, only_once=True)
+                    self.write_debug(error_to_compat_str(e), only_once=True)
+                    continue
 
-            if itag:
-                itags.append(itag)
-            tbr = float_or_none(
-                fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
+            language_preference = (
+                10 if audio_track.get('audioIsDefault')
+                else -10 if 'descriptive' in (traverse_obj(audio_track, ('displayName', T(lower))) or '')
+                else -1)
+            name = (
+                traverse_obj(fmt, ('qualityLabel', T(txt_or_none)))
+                or quality.replace('audio_quality_', ''))
             dct = {
-                'asr': int_or_none(fmt.get('audioSampleRate')),
-                'filesize': int_or_none(fmt.get('contentLength')),
-                'format_id': itag,
-                'format_note': fmt.get('qualityLabel') or quality,
-                'fps': int_or_none(fmt.get('fps')),
-                'height': int_or_none(fmt.get('height')),
-                'quality': q(quality),
-                'tbr': tbr,
+                'format_id': join_nonempty(itag, fmt.get('isDrc') and 'drc'),
                 'url': fmt_url,
-                'width': fmt.get('width'),
+                # Format 22 is likely to be damaged: see https://github.com/yt-dlp/yt-dlp/issues/3372
+                'source_preference': ((-5 if itag == '22' else -1)
+                                      + (100 if 'Premium' in name else 0)),
+                'quality': q(quality),
+                'language': join_nonempty(audio_track.get('id', '').split('.')[0],
+                                          'desc' if language_preference < -1 else '') or None,
+                'language_preference': language_preference,
+                # Strictly de-prioritize 3gp formats
+                'preference': -2 if itag == '17' else None,
             }
-            mimetype = fmt.get('mimeType')
-            if mimetype:
-                mobj = re.match(
-                    r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
-                if mobj:
-                    dct['ext'] = mimetype2ext(mobj.group(1))
-                    dct.update(parse_codecs(mobj.group(2)))
+            if itag:
+                itags[itag].add(('https', dct.get('language')))
+            self._unthrottle_format_urls(video_id, player_url, dct)
+            dct.update(traverse_obj(fmt, {
+                'asr': ('audioSampleRate', T(int_or_none)),
+                'filesize': ('contentLength', T(int_or_none)),
+                'format_note': ('qualityLabel', T(lambda x: x or quality)),
+                # for some formats, fps is wrongly returned as 1
+                'fps': ('fps', T(int_or_none), T(lambda f: f if f > 1 else None)),
+                'audio_channels': ('audioChannels', T(int_or_none)),
+                'height': ('height', T(int_or_none)),
+                'has_drm': ('drmFamilies', T(bool)),
+                'tbr': (('averageBitrate', 'bitrate'), T(lambda t: float_or_none(t, 1000))),
+                'width': ('width', T(int_or_none)),
+                '_duration_ms': ('approxDurationMs', T(int_or_none)),
+            }, get_all=False))
+            mime_mobj = re.match(
+                r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
+            if mime_mobj:
+                dct['ext'] = mimetype2ext(mime_mobj.group(1))
+                dct.update(parse_codecs(mime_mobj.group(2)))
             single_stream = 'none' in (dct.get(c) for c in ('acodec', 'vcodec'))
             if single_stream and dct.get('ext'):
                 dct['container'] = dct['ext'] + '_dash'
@@ -2069,32 +2136,62 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
             formats.append(dct)
 
+        def process_manifest_format(f, proto, client_name, itag, all_formats=False):
+            key = (proto, f.get('language'))
+            if not all_formats and key in itags[itag]:
+                return False
+            itags[itag].add(key)
+
+            if itag:
+                f['format_id'] = (
+                    '{0}-{1}'.format(itag, proto)
+                    if all_formats or any(p != proto for p, _ in itags[itag])
+                    else itag)
+
+            if f.get('source_preference') is None:
+                f['source_preference'] = -1
+
+            if itag in ('616', '235'):
+                f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
+                f['source_preference'] += 100
+
+            f['quality'] = q(traverse_obj(f, (
+                'format_id', T(lambda s: itag_qualities[s.split('-')[0]])), default=-1))
+            if try_call(lambda: f['fps'] <= 1):
+                del f['fps']
+
+            if proto == 'hls' and f.get('has_drm'):
+                f['has_drm'] = 'maybe'
+                f['source_preference'] -= 5
+            return True
+
         hls_manifest_url = streaming_data.get('hlsManifestUrl')
         if hls_manifest_url:
             for f in self._extract_m3u8_formats(
                     hls_manifest_url, video_id, 'mp4', fatal=False):
-                itag = self._search_regex(
-                    r'/itag/(\d+)', f['url'], 'itag', default=None)
-                if itag:
-                    f['format_id'] = itag
-                formats.append(f)
+                if process_manifest_format(
+                        f, 'hls', None, self._search_regex(
+                            r'/itag/(\d+)', f['url'], 'itag', default=None)):
+                    formats.append(f)
 
         if self._downloader.params.get('youtube_include_dash_manifest', True):
             dash_manifest_url = streaming_data.get('dashManifestUrl')
             if dash_manifest_url:
                 for f in self._extract_mpd_formats(
                         dash_manifest_url, video_id, fatal=False):
-                    itag = f['format_id']
-                    if itag in itags:
-                        continue
-                    if itag in itag_qualities:
-                        f['quality'] = q(itag_qualities[itag])
-                    filesize = int_or_none(self._search_regex(
-                        r'/clen/(\d+)', f.get('fragment_base_url')
-                        or f['url'], 'file size', default=None))
-                    if filesize:
-                        f['filesize'] = filesize
-                    formats.append(f)
+                    if process_manifest_format(
+                            f, 'dash', None, f['format_id']):
+                        f['filesize'] = traverse_obj(f, (
+                            ('fragment_base_url', 'url'), T(lambda u: self._search_regex(
+                                r'/clen/(\d+)', u, 'file size', default=None)),
+                            T(int_or_none)), get_all=False)
+                        formats.append(f)
+
+        playable_formats = [f for f in formats if not f.get('has_drm')]
+        if formats and not playable_formats:
+            # If there are no formats that definitely don't have DRM, all have DRM
+            self.report_drm(video_id)
+        formats[:] = playable_formats
 
         if not formats:
             if streaming_data.get('licenseInfos'):
@@ -2166,6 +2263,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             video_details.get('lengthSeconds')
             or microformat.get('lengthSeconds')) \
             or parse_duration(search_meta('duration'))
+
+        for f in formats:
+            # Some formats may have much smaller duration than others (possibly damaged during encoding)
+            # but avoid false positives with small duration differences.
+            # Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
+            if try_call(lambda x: float(x.pop('_duration_ms')) / duration < 500, args=(f,)):
+                self.report_warning(
+                    '{0}: Some possibly damaged formats will be deprioritized'.format(video_id), only_once=True)
+                # Strictly de-prioritize damaged formats
+                f['preference'] = -10
+
         is_live = video_details.get('isLive')
 
         owner_profile_url = self._yt_urljoin(self._extract_author_var(
@@ -2174,10 +2282,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         uploader = self._extract_author_var(
             webpage, 'name', videodetails=video_details, metadata=microformat)
 
-        if not player_url:
-            player_url = self._extract_player_url(webpage)
-        self._unthrottle_format_urls(video_id, player_url, formats)
-
         info = {
             'id': video_id,
             'title': self._live_title(video_title) if is_live else video_title,

From dc512e3a8a26a8e3fc7f1f67e5ee5e7699db8659 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 15 Jan 2024 18:38:43 +0000
Subject: [PATCH 149/156] [YouTube] Fix `like_count` extraction using
 `likeButtonViewModel` * also fix various tests * TODO: check against yt-dlp
 tests

---
 test/test_subtitles.py          |  1 +
 youtube_dl/extractor/youtube.py | 18 ++++++++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 1197721ff..e005c78fc 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -295,6 +295,7 @@ class TestNRKSubtitles(BaseTestSubtitles):
     def test_allsubtitles(self):
         self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
+        self.DL.params['format'] = 'best/bestvideo'
         subtitles = self.getSubtitles()
         self.assertEqual(set(subtitles.keys()), set(['nb-ttv']))
         self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index cd4b3ef60..db840fc45 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -39,6 +39,7 @@ from ..utils import (
     mimetype2ext,
     NO_DEFAULT,
     parse_codecs,
+    parse_count,
     parse_duration,
     parse_qs,
     qualities,
@@ -46,6 +47,7 @@ from ..utils import (
     smuggle_url,
     str_or_none,
     str_to_int,
+    T,
     traverse_obj,
     try_call,
     try_get,
@@ -1250,7 +1252,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'IMG 3456',
                 'description': '',
                 'upload_date': '20170613',
-                'uploader': 'ElevageOrVert',
+                'uploader': "l'Or Vert asbl",
                 'uploader_id': '@ElevageOrVert',
             },
             'params': {
@@ -2474,6 +2476,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             'like_count': str_to_int(like_count),
                             'dislike_count': str_to_int(dislike_count),
                         })
+                    else:
+                        info['like_count'] = traverse_obj(vpir, (
+                            'videoActions', 'menuRenderer', 'topLevelButtons', Ellipsis,
+                            'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
+                            'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
+                            'buttonViewModel', (('title', ('accessibilityText', T(lambda s: s.split()), Ellipsis))), T(parse_count)),
+                            get_all=False)
+
                 vsir = content.get('videoSecondaryInfoRenderer')
                 if vsir:
                     rows = try_get(
@@ -2588,7 +2598,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         'playlist_mincount': 94,
         'info_dict': {
             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
-            'title': 'Igor Kleiner - Playlists',
+            'title': r're:Igor Kleiner(?: Ph\.D\.)? - Playlists',
             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
             'uploader': 'Igor Kleiner',
             'uploader_id': '@IgorDataScience',
@@ -2599,7 +2609,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         'playlist_mincount': 94,
         'info_dict': {
             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
-            'title': 'Igor Kleiner - Playlists',
+            'title': r're:Igor Kleiner(?: Ph\.D\.)? - Playlists',
             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
             'uploader': 'Igor Kleiner',
             'uploader_id': '@IgorDataScience',
@@ -2711,7 +2721,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
         'info_dict': {
             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
-            'title': 'lex will - Channels',
+            'title': r're:lex will - (?:Home|Channels)',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
             'uploader': 'lex will',
             'uploader_id': '@lexwill718',

From c58b655a9ef255eb9d02b4d57706c46cfdf35975 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 27 Jan 2024 15:29:25 +0000
Subject: [PATCH 150/156] [InfoExtractor] Support DASH subtitle extraction
 (yt-dlp back-port)

---
 test/test_InfoExtractor.py     | 113 +++++++++++-
 youtube_dl/extractor/common.py | 312 ++++++++++++++++++++-------------
 2 files changed, 300 insertions(+), 125 deletions(-)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 3f96645de..043b62243 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -993,7 +993,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'tbr': 5997.485,
                     'width': 1920,
                     'height': 1080,
-                }]
+                }],
+                {},
             ), (
                 # https://github.com/ytdl-org/youtube-dl/pull/14844
                 'urls_only',
@@ -1076,7 +1077,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'tbr': 4400,
                     'width': 1920,
                     'height': 1080,
-                }]
+                }],
+                {},
             ), (
                 # https://github.com/ytdl-org/youtube-dl/issues/20346
                 # Media considered unfragmented even though it contains
@@ -1122,18 +1124,119 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'width': 360,
                     'height': 360,
                     'fps': 30,
-                }]
+                }],
+                {},
+            ), (
+                'subtitles',
+                'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/',
+                [{
+                    'format_id': 'audio=128001',
+                    'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'ext': 'm4a',
+                    'tbr': 128.001,
+                    'asr': 48000,
+                    'format_note': 'DASH audio',
+                    'container': 'm4a_dash',
+                    'vcodec': 'none',
+                    'acodec': 'mp4a.40.2',
+                    'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+                    'protocol': 'http_dash_segments',
+                }, {
+                    'format_id': 'video=100000',
+                    'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'ext': 'mp4',
+                    'width': 336,
+                    'height': 144,
+                    'tbr': 100,
+                    'format_note': 'DASH video',
+                    'container': 'mp4_dash',
+                    'vcodec': 'avc1.4D401F',
+                    'acodec': 'none',
+                    'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+                    'protocol': 'http_dash_segments',
+                }, {
+                    'format_id': 'video=326000',
+                    'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'ext': 'mp4',
+                    'width': 562,
+                    'height': 240,
+                    'tbr': 326,
+                    'format_note': 'DASH video',
+                    'container': 'mp4_dash',
+                    'vcodec': 'avc1.4D401F',
+                    'acodec': 'none',
+                    'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+                    'protocol': 'http_dash_segments',
+                }, {
+                    'format_id': 'video=698000',
+                    'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'ext': 'mp4',
+                    'width': 844,
+                    'height': 360,
+                    'tbr': 698,
+                    'format_note': 'DASH video',
+                    'container': 'mp4_dash',
+                    'vcodec': 'avc1.4D401F',
+                    'acodec': 'none',
+                    'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+                    'protocol': 'http_dash_segments',
+                }, {
+                    'format_id': 'video=1493000',
+                    'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'ext': 'mp4',
+                    'width': 1126,
+                    'height': 480,
+                    'tbr': 1493,
+                    'format_note': 'DASH video',
+                    'container': 'mp4_dash',
+                    'vcodec': 'avc1.4D401F',
+                    'acodec': 'none',
+                    'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+                    'protocol': 'http_dash_segments',
+                }, {
+                    'format_id': 'video=4482000',
+                    'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'ext': 'mp4',
+                    'width': 1688,
+                    'height': 720,
+                    'tbr': 4482,
+                    'format_note': 'DASH video',
+                    'container': 'mp4_dash',
+                    'vcodec': 'avc1.4D401F',
+                    'acodec': 'none',
+                    'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+                    'protocol': 'http_dash_segments',
+                }],
+                {
+                    'en': [
+                        {
+                            'ext': 'mp4',
+                            'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                            'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                            'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+                            'protocol': 'http_dash_segments',
+                        }
+                    ]
+                },
             )
         ]
 
-        for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES:
+        for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES:
             with open('./test/testdata/mpd/%s.mpd' % mpd_file,
                       mode='r', encoding='utf-8') as f:
-                formats = self.ie._parse_mpd_formats(
+                formats, subtitles = self.ie._parse_mpd_formats_and_subtitles(
                     compat_etree_fromstring(f.read().encode('utf-8')),
                     mpd_base_url=mpd_base_url, mpd_url=mpd_url)
                 self.ie._sort_formats(formats)
                 expect_value(self, formats, expected_formats, None)
+                expect_value(self, subtitles, expected_subtitles, None)
 
     def test_parse_f4m_formats(self):
         _TEST_CASES = [
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index d33557135..ed55d3e07 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import base64
+import collections
 import datetime
 import functools
 import hashlib
@@ -58,6 +59,7 @@ from ..utils import (
     GeoRestrictedError,
     GeoUtils,
     int_or_none,
+    join_nonempty,
     js_to_json,
     JSON_LD_RE,
     mimetype2ext,
@@ -74,6 +76,7 @@ from ..utils import (
     str_or_none,
     str_to_int,
     strip_or_none,
+    T,
     traverse_obj,
     try_get,
     unescapeHTML,
@@ -1751,6 +1754,12 @@ class InfoExtractor(object):
             'format_note': 'Quality selection URL',
         }
 
+    def _report_ignoring_subs(self, name):
+        self.report_warning(bug_reports_message(
+            'Ignoring subtitle tracks found in the {0} manifest; '
+            'if any subtitle tracks are missing,'.format(name)
+        ), only_once=True)
+
     def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
                               entry_protocol='m3u8', preference=None,
                               m3u8_id=None, note=None, errnote=None,
@@ -2191,23 +2200,46 @@ class InfoExtractor(object):
             })
         return entries
 
-    def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
+    def _extract_mpd_formats(self, *args, **kwargs):
+        fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self._report_ignoring_subs('DASH')
+        return fmts
+
+    def _extract_mpd_formats_and_subtitles(
+            self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
+            fatal=True, data=None, headers=None, query=None):
+
+        # TODO: or not? param not yet implemented
+        if self.get_param('ignore_no_formats_error'):
+            fatal = False
+
         res = self._download_xml_handle(
             mpd_url, video_id,
-            note=note or 'Downloading MPD manifest',
-            errnote=errnote or 'Failed to download MPD manifest',
-            fatal=fatal, data=data, headers=headers, query=query)
+            note='Downloading MPD manifest' if note is None else note,
+            errnote='Failed to download MPD manifest' if errnote is None else errnote,
+            fatal=fatal, data=data, headers=headers or {}, query=query or {})
         if res is False:
-            return []
+            return [], {}
         mpd_doc, urlh = res
         if mpd_doc is None:
-            return []
-        mpd_base_url = base_url(urlh.geturl())
+            return [], {}
 
-        return self._parse_mpd_formats(
+        # We could have been redirected to a new url when we retrieved our mpd file.
+        mpd_url = urlh.geturl()
+        mpd_base_url = base_url(mpd_url)
+
+        return self._parse_mpd_formats_and_subtitles(
             mpd_doc, mpd_id, mpd_base_url, mpd_url)
 
-    def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
+    def _parse_mpd_formats(self, *args, **kwargs):
+        fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self._report_ignoring_subs('DASH')
+        return fmts
+
+    def _parse_mpd_formats_and_subtitles(
+            self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
         """
         Parse formats from MPD manifest.
         References:
@@ -2215,8 +2247,10 @@ class InfoExtractor(object):
             http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
          2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
         """
-        if mpd_doc.get('type') == 'dynamic':
-            return []
+        # TODO: param not yet implemented: default like previous yt-dl logic
+        if not self.get_param('dynamic_mpd', False):
+            if mpd_doc.get('type') == 'dynamic':
+                return [], {}
 
         namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
 
@@ -2228,6 +2262,7 @@ class InfoExtractor(object):
 
         def extract_multisegment_info(element, ms_parent_info):
             ms_info = ms_parent_info.copy()
+            base_url = ms_info.get('base_url')
 
             # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
             # common attributes and elements.  We will only extract relevant
@@ -2285,7 +2320,8 @@ class InfoExtractor(object):
             return ms_info
 
         mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
-        formats = []
+        formats, subtitles = [], {}
+        stream_numbers = collections.defaultdict(int)
         for period in mpd_doc.findall(_add_ns('Period')):
             period_duration = parse_duration(period.get('duration')) or mpd_duration
             period_ms_info = extract_multisegment_info(period, {
@@ -2295,7 +2331,7 @@ class InfoExtractor(object):
             for adaptation_set in period.findall(_add_ns('AdaptationSet')):
                 if is_drm_protected(adaptation_set):
                     continue
-                adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
+                adaptation_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
                 for representation in adaptation_set.findall(_add_ns('Representation')):
                     if is_drm_protected(representation):
                         continue
@@ -2303,27 +2339,35 @@ class InfoExtractor(object):
                     representation_attrib.update(representation.attrib)
                     # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
                     mime_type = representation_attrib['mimeType']
-                    content_type = mime_type.split('/')[0]
-                    if content_type == 'text':
-                        # TODO implement WebVTT downloading
-                        pass
-                    elif content_type in ('video', 'audio'):
-                        base_url = ''
-                        for element in (representation, adaptation_set, period, mpd_doc):
-                            base_url_e = element.find(_add_ns('BaseURL'))
-                            if base_url_e is not None:
-                                base_url = base_url_e.text + base_url
-                                if re.match(r'^https?://', base_url):
-                                    break
-                        if mpd_base_url and not re.match(r'^https?://', base_url):
-                            if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
-                                mpd_base_url += '/'
-                            base_url = mpd_base_url + base_url
-                        representation_id = representation_attrib.get('id')
-                        lang = representation_attrib.get('lang')
-                        url_el = representation.find(_add_ns('BaseURL'))
-                        filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
-                        bandwidth = int_or_none(representation_attrib.get('bandwidth'))
+                    content_type = representation_attrib.get('contentType') or mime_type.split('/')[0]
+                    codec_str = representation_attrib.get('codecs', '')
+                    # Some kind of binary subtitle found in some youtube livestreams
+                    if mime_type == 'application/x-rawcc':
+                        codecs = {'scodec': codec_str}
+                    else:
+                        codecs = parse_codecs(codec_str)
+                    if content_type not in ('video', 'audio', 'text'):
+                        if mime_type == 'image/jpeg':
+                            content_type = mime_type
+                        elif codecs.get('vcodec', 'none') != 'none':
+                            content_type = 'video'
+                        elif codecs.get('acodec', 'none') != 'none':
+                            content_type = 'audio'
+                        elif codecs.get('scodec', 'none') != 'none':
+                            content_type = 'text'
+                        elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'):
+                            content_type = 'text'
+                        else:
+                            self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
+                            continue
+
+                    representation_id = representation_attrib.get('id')
+                    lang = representation_attrib.get('lang')
+                    url_el = representation.find(_add_ns('BaseURL'))
+                    filesize = int_or_none(url_el.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
+                    bandwidth = int_or_none(representation_attrib.get('bandwidth'))
+                    format_id = join_nonempty(representation_id or content_type, mpd_id)
+                    if content_type in ('video', 'audio'):
                         f = {
                             'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
                             'manifest_url': mpd_url,
@@ -2338,104 +2382,125 @@ class InfoExtractor(object):
                             'filesize': filesize,
                             'container': mimetype2ext(mime_type) + '_dash',
                         }
-                        f.update(parse_codecs(representation_attrib.get('codecs')))
-                        representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
+                        f.update(codecs)
+                    elif content_type == 'text':
+                        f = {
+                            'ext': mimetype2ext(mime_type),
+                            'manifest_url': mpd_url,
+                            'filesize': filesize,
+                        }
+                    elif content_type == 'image/jpeg':
+                        # See test case in VikiIE
+                        # https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1
+                        f = {
+                            'format_id': format_id,
+                            'ext': 'mhtml',
+                            'manifest_url': mpd_url,
+                            'format_note': 'DASH storyboards (jpeg)',
+                            'acodec': 'none',
+                            'vcodec': 'none',
+                        }
+                    if is_drm_protected(adaptation_set) or is_drm_protected(representation):
+                        f['has_drm'] = True
+                    representation_ms_info = extract_multisegment_info(representation, adaptation_set_ms_info)
 
-                        def prepare_template(template_name, identifiers):
-                            tmpl = representation_ms_info[template_name]
-                            # First of, % characters outside $...$ templates
-                            # must be escaped by doubling for proper processing
-                            # by % operator string formatting used further (see
-                            # https://github.com/ytdl-org/youtube-dl/issues/16867).
-                            t = ''
-                            in_template = False
-                            for c in tmpl:
+                    def prepare_template(template_name, identifiers):
+                        tmpl = representation_ms_info[template_name]
+                        # First of, % characters outside $...$ templates
+                        # must be escaped by doubling for proper processing
+                        # by % operator string formatting used further (see
+                        # https://github.com/ytdl-org/youtube-dl/issues/16867).
+                        t = ''
+                        in_template = False
+                        for c in tmpl:
+                            t += c
+                            if c == '$':
+                                in_template = not in_template
+                            elif c == '%' and not in_template:
                                 t += c
-                                if c == '$':
-                                    in_template = not in_template
-                                elif c == '%' and not in_template:
-                                    t += c
-                            # Next, $...$ templates are translated to their
-                            # %(...) counterparts to be used with % operator
-                            t = t.replace('$RepresentationID$', representation_id)
-                            t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
-                            t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
-                            t.replace('$$', '$')
-                            return t
+                        # Next, $...$ templates are translated to their
+                        # %(...) counterparts to be used with % operator
+                        t = t.replace('$RepresentationID$', representation_id)
+                        t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
+                        t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
+                        t.replace('$$', '$')
+                        return t
 
-                        # @initialization is a regular template like @media one
-                        # so it should be handled just the same way (see
-                        # https://github.com/ytdl-org/youtube-dl/issues/11605)
-                        if 'initialization' in representation_ms_info:
-                            initialization_template = prepare_template(
-                                'initialization',
-                                # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
-                                # $Time$ shall not be included for @initialization thus
-                                # only $Bandwidth$ remains
-                                ('Bandwidth', ))
-                            representation_ms_info['initialization_url'] = initialization_template % {
-                                'Bandwidth': bandwidth,
-                            }
+                    # @initialization is a regular template like @media one
+                    # so it should be handled just the same way (see
+                    # https://github.com/ytdl-org/youtube-dl/issues/11605)
+                    if 'initialization' in representation_ms_info:
+                        initialization_template = prepare_template(
+                            'initialization',
+                            # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
+                            # $Time$ shall not be included for @initialization thus
+                            # only $Bandwidth$ remains
+                            ('Bandwidth', ))
+                        representation_ms_info['initialization_url'] = initialization_template % {
+                            'Bandwidth': bandwidth,
+                        }
 
-                        def location_key(location):
-                            return 'url' if re.match(r'^https?://', location) else 'path'
+                    def location_key(location):
+                        return 'url' if re.match(r'^https?://', location) else 'path'
 
-                        if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
+                    if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
 
-                            media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
-                            media_location_key = location_key(media_template)
+                        media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
+                        media_location_key = location_key(media_template)
 
-                            # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
-                            # can't be used at the same time
-                            if '%(Number' in media_template and 's' not in representation_ms_info:
-                                segment_duration = None
-                                if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
-                                    segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
-                                    representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
-                                representation_ms_info['fragments'] = [{
-                                    media_location_key: media_template % {
-                                        'Number': segment_number,
-                                        'Bandwidth': bandwidth,
-                                    },
-                                    'duration': segment_duration,
-                                } for segment_number in range(
-                                    representation_ms_info['start_number'],
-                                    representation_ms_info['total_number'] + representation_ms_info['start_number'])]
-                            else:
-                                # $Number*$ or $Time$ in media template with S list available
-                                # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
-                                # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
-                                representation_ms_info['fragments'] = []
-                                segment_time = 0
-                                segment_d = None
-                                segment_number = representation_ms_info['start_number']
+                        # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
+                        # can't be used at the same time
+                        if '%(Number' in media_template and 's' not in representation_ms_info:
+                            segment_duration = None
+                            if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
+                                segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
+                                representation_ms_info['total_number'] = int(math.ceil(
+                                    float_or_none(period_duration, segment_duration, default=0)))
+                            representation_ms_info['fragments'] = [{
+                                media_location_key: media_template % {
+                                    'Number': segment_number,
+                                    'Bandwidth': bandwidth,
+                                },
+                                'duration': segment_duration,
+                            } for segment_number in range(
+                                representation_ms_info['start_number'],
+                                representation_ms_info['total_number'] + representation_ms_info['start_number'])]
+                        else:
+                            # $Number*$ or $Time$ in media template with S list available
+                            # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
+                            # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
+                            representation_ms_info['fragments'] = []
+                            segment_time = 0
+                            segment_d = None
+                            segment_number = representation_ms_info['start_number']
 
-                                def add_segment_url():
-                                    segment_url = media_template % {
-                                        'Time': segment_time,
-                                        'Bandwidth': bandwidth,
-                                        'Number': segment_number,
-                                    }
-                                    representation_ms_info['fragments'].append({
-                                        media_location_key: segment_url,
-                                        'duration': float_or_none(segment_d, representation_ms_info['timescale']),
-                                    })
+                            def add_segment_url():
+                                segment_url = media_template % {
+                                    'Time': segment_time,
+                                    'Bandwidth': bandwidth,
+                                    'Number': segment_number,
+                                }
+                                representation_ms_info['fragments'].append({
+                                    media_location_key: segment_url,
+                                    'duration': float_or_none(segment_d, representation_ms_info['timescale']),
+                                })
 
-                                for num, s in enumerate(representation_ms_info['s']):
-                                    segment_time = s.get('t') or segment_time
-                                    segment_d = s['d']
+                            for num, s in enumerate(representation_ms_info['s']):
+                                segment_time = s.get('t') or segment_time
+                                segment_d = s['d']
+                                add_segment_url()
+                                segment_number += 1
+                                for r in range(s.get('r', 0)):
+                                    segment_time += segment_d
                                     add_segment_url()
                                     segment_number += 1
-                                    for r in range(s.get('r', 0)):
-                                        segment_time += segment_d
-                                        add_segment_url()
-                                        segment_number += 1
-                                    segment_time += segment_d
-                        elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
+                                segment_time += segment_d
+                    elif 'segment_urls' in representation_ms_info:
+                        fragments = []
+                        if 's' in representation_ms_info:
                             # No media template
                             # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
                             # or any YouTube dashsegments video
-                            fragments = []
                             segment_index = 0
                             timescale = representation_ms_info['timescale']
                             for s in representation_ms_info['s']:
@@ -2487,8 +2552,15 @@ class InfoExtractor(object):
                             f['url'] = base_url
                         formats.append(f)
                     else:
-                        self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
-        return formats
+                        # Assuming direct URL to unfragmented media.
+                        f['url'] = representation_ms_info['base_url']
+                    if content_type in ('video', 'audio', 'image/jpeg'):
+                        f['manifest_stream_number'] = stream_numbers[f['url']]
+                        stream_numbers[f['url']] += 1
+                        formats.append(f)
+                    elif content_type == 'text':
+                        subtitles.setdefault(lang or 'und', []).append(f)
+        return formats, subtitles
 
     def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
         res = self._download_xml_handle(

From bec9180e8904a12c55cfa838b0541879d16bf20f Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 27 Jan 2024 00:07:14 +0000
Subject: [PATCH 151/156] [downloader/dash] Support `range` in fragment (format
 f'{start}-{end}')  * adapted from
 https://github.com/ytdl-org/youtube-dl/pull/30279  * thx former GH user
 kikuyan

---
 youtube_dl/downloader/dash.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
index 2800d4260..f3c058879 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@@ -35,6 +35,7 @@ class DashSegmentsFD(FragmentFD):
         for frag_index, fragment in enumerate(fragments, 1):
             if frag_index <= ctx['fragment_index']:
                 continue
+            success = False
             # In DASH, the first segment contains necessary headers to
             # generate a valid MP4 file, so always abort for the first segment
             fatal = frag_index == 1 or not skip_unavailable_fragments
@@ -42,10 +43,14 @@ class DashSegmentsFD(FragmentFD):
             if not fragment_url:
                 assert fragment_base_url
                 fragment_url = urljoin(fragment_base_url, fragment['path'])
-            success = False
+            headers = info_dict.get('http_headers')
+            fragment_range = fragment.get('range')
+            if fragment_range:
+                headers = headers.copy() if headers else {}
+                headers['Range'] = 'bytes=%s' % (fragment_range,)
             for count in itertools.count():
                 try:
-                    success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
+                    success, frag_content = self._download_fragment(ctx, fragment_url, info_dict, headers)
                     if not success:
                         return False
                     self._append_fragment(ctx, frag_content)

From 4eaeb9b2c680ed097770ce976c3b37a1b05c0800 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 27 Jan 2024 15:37:08 +0000
Subject: [PATCH 152/156] [InfoExtractor] Support byte range for DASH * adapted
 from https://github.com/ytdl-org/youtube-dl/pull/30279 * thx former GH user
 kikuyan

---
 test/test_InfoExtractor.py          |  66 ++++++
 test/testdata/mpd/range_only.mpd    |  35 +++
 test/testdata/mpd/subtitles.mpd     | 351 ++++++++++++++++++++++++++++
 test/testdata/mpd/url_and_range.mpd |  32 +++
 youtube_dl/extractor/common.py      | 114 ++++++---
 5 files changed, 562 insertions(+), 36 deletions(-)
 create mode 100644 test/testdata/mpd/range_only.mpd
 create mode 100644 test/testdata/mpd/subtitles.mpd
 create mode 100644 test/testdata/mpd/url_and_range.mpd

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 043b62243..d55d6ad54 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -1126,6 +1126,72 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'fps': 30,
                 }],
                 {},
+            ), (
+                # https://github.com/ytdl-org/youtube-dl/issues/30235
+                # Bento4 generated test mpd
+                # mp4dash --mpd-name=manifest.mpd --no-split --use-segment-list mediafiles
+                'url_and_range',
+                'http://unknown/manifest.mpd',  # mpd_url
+                'http://unknown/',  # mpd_base_url
+                [{
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'fragment_base_url': 'http://unknown/',
+                    'ext': 'm4a',
+                    'format_id': 'audio-und-mp4a.40.2',
+                    'format_note': 'DASH audio',
+                    'container': 'm4a_dash',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'none',
+                    'tbr': 98.808,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'fragment_base_url': 'http://unknown/',
+                    'ext': 'mp4',
+                    'format_id': 'video-avc1',
+                    'format_note': 'DASH video',
+                    'container': 'mp4_dash',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.4D401E',
+                    'tbr': 699.597,
+                    'width': 768,
+                    'height': 432
+                }],
+                {},
+            ), (
+                # https://github.com/ytdl-org/youtube-dl/issues/27575
+                # GPAC generated test mpd
+                # MP4Box -dash 10000 -single-file -out manifest.mpd mediafiles
+                'range_only',
+                'http://unknown/manifest.mpd',  # mpd_url
+                'http://unknown/',  # mpd_base_url
+                [{
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'fragment_base_url': 'http://unknown/audio_dashinit.mp4',
+                    'ext': 'm4a',
+                    'format_id': '2',
+                    'format_note': 'DASH audio',
+                    'container': 'm4a_dash',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'none',
+                    'tbr': 98.096,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'fragment_base_url': 'http://unknown/video_dashinit.mp4',
+                    'ext': 'mp4',
+                    'format_id': '1',
+                    'format_note': 'DASH video',
+                    'container': 'mp4_dash',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.4D401E',
+                    'tbr': 526.987,
+                    'width': 768,
+                    'height': 432
+                }],
+                {},
             ), (
                 'subtitles',
                 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
diff --git a/test/testdata/mpd/range_only.mpd b/test/testdata/mpd/range_only.mpd
new file mode 100644
index 000000000..e0c2152d1
--- /dev/null
+++ b/test/testdata/mpd/range_only.mpd
@@ -0,0 +1,35 @@
+<?xml version="1.0"?>
+<!-- MPD file Generated with GPAC version 1.0.1-revrelease at 2021-11-27T20:53:11.690Z -->
+<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" minBufferTime="PT1.500S" type="static" mediaPresentationDuration="PT0H0M30.196S" maxSegmentDuration="PT0H0M10.027S" profiles="urn:mpeg:dash:profile:full:2011">
+ <ProgramInformation moreInformationURL="http://gpac.io">
+  <Title>manifest.mpd generated by GPAC</Title>
+ </ProgramInformation>
+
+ <Period duration="PT0H0M30.196S">
+  <AdaptationSet segmentAlignment="true" maxWidth="768" maxHeight="432" maxFrameRate="30000/1001" par="16:9" lang="und" startWithSAP="1">
+   <Representation id="1" mimeType="video/mp4" codecs="avc1.4D401E" width="768" height="432" frameRate="30000/1001" sar="1:1" bandwidth="526987">
+    <BaseURL>video_dashinit.mp4</BaseURL>
+    <SegmentList timescale="90000" duration="900000">
+     <Initialization range="0-881"/>
+     <SegmentURL mediaRange="882-876094" indexRange="882-925"/>
+     <SegmentURL mediaRange="876095-1466732" indexRange="876095-876138"/>
+     <SegmentURL mediaRange="1466733-1953615" indexRange="1466733-1466776"/>
+     <SegmentURL mediaRange="1953616-1994211" indexRange="1953616-1953659"/>
+    </SegmentList>
+   </Representation>
+  </AdaptationSet>
+  <AdaptationSet segmentAlignment="true" lang="und" startWithSAP="1">
+   <Representation id="2" mimeType="audio/mp4" codecs="mp4a.40.2" audioSamplingRate="48000" bandwidth="98096">
+    <AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
+    <BaseURL>audio_dashinit.mp4</BaseURL>
+    <SegmentList timescale="48000" duration="480000">
+     <Initialization range="0-752"/>
+     <SegmentURL mediaRange="753-124129" indexRange="753-796"/>
+     <SegmentURL mediaRange="124130-250544" indexRange="124130-124173"/>
+     <SegmentURL mediaRange="250545-374929" indexRange="250545-250588"/>
+    </SegmentList>
+   </Representation>
+  </AdaptationSet>
+ </Period>
+</MPD>
+
diff --git a/test/testdata/mpd/subtitles.mpd b/test/testdata/mpd/subtitles.mpd
new file mode 100644
index 000000000..6f948adba
--- /dev/null
+++ b/test/testdata/mpd/subtitles.mpd
@@ -0,0 +1,351 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Created with Unified Streaming Platform (version=1.10.18-20255) -->
+<MPD
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xmlns="urn:mpeg:dash:schema:mpd:2011"
+  xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-DASH_schema_files/DASH-MPD.xsd"
+  type="static"
+  mediaPresentationDuration="PT14M48S"
+  maxSegmentDuration="PT1M"
+  minBufferTime="PT10S"
+  profiles="urn:mpeg:dash:profile:isoff-live:2011">
+  <Period
+    id="1"
+    duration="PT14M48S">
+    <BaseURL>dash/</BaseURL>
+    <AdaptationSet
+      id="1"
+      group="1"
+      contentType="audio"
+      segmentAlignment="true"
+      audioSamplingRate="48000"
+      mimeType="audio/mp4"
+      codecs="mp4a.40.2"
+      startWithSAP="1">
+      <AudioChannelConfiguration
+        schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011"
+        value="2" />
+      <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
+      <SegmentTemplate
+        timescale="48000"
+        initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
+        media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
+        <SegmentTimeline>
+          <S t="0" d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="3584" />
+        </SegmentTimeline>
+      </SegmentTemplate>
+      <Representation
+        id="audio=128001"
+        bandwidth="128001">
+      </Representation>
+    </AdaptationSet>
+    <AdaptationSet
+      id="2"
+      group="3"
+      contentType="text"
+      lang="en"
+      mimeType="application/mp4"
+      codecs="stpp"
+      startWithSAP="1">
+      <Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle" />
+      <SegmentTemplate
+        timescale="1000"
+        initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
+        media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
+        <SegmentTimeline>
+          <S t="0" d="60000" r="9" />
+          <S d="24000" />
+        </SegmentTimeline>
+      </SegmentTemplate>
+      <Representation
+        id="textstream_eng=1000"
+        bandwidth="1000">
+      </Representation>
+    </AdaptationSet>
+    <AdaptationSet
+      id="3"
+      group="2"
+      contentType="video"
+      par="960:409"
+      minBandwidth="100000"
+      maxBandwidth="4482000"
+      maxWidth="1689"
+      maxHeight="720"
+      segmentAlignment="true"
+      mimeType="video/mp4"
+      codecs="avc1.4D401F"
+      startWithSAP="1">
+      <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
+      <SegmentTemplate
+        timescale="12288"
+        initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
+        media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
+        <SegmentTimeline>
+          <S t="0" d="24576" r="443" />
+        </SegmentTimeline>
+      </SegmentTemplate>
+      <Representation
+        id="video=100000"
+        bandwidth="100000"
+        width="336"
+        height="144"
+        sar="2880:2863"
+        scanType="progressive">
+      </Representation>
+      <Representation
+        id="video=326000"
+        bandwidth="326000"
+        width="562"
+        height="240"
+        sar="115200:114929"
+        scanType="progressive">
+      </Representation>
+      <Representation
+        id="video=698000"
+        bandwidth="698000"
+        width="844"
+        height="360"
+        sar="86400:86299"
+        scanType="progressive">
+      </Representation>
+      <Representation
+        id="video=1493000"
+        bandwidth="1493000"
+        width="1126"
+        height="480"
+        sar="230400:230267"
+        scanType="progressive">
+      </Representation>
+      <Representation
+        id="video=4482000"
+        bandwidth="4482000"
+        width="1688"
+        height="720"
+        sar="86400:86299"
+        scanType="progressive">
+      </Representation>
+    </AdaptationSet>
+  </Period>
+</MPD>
diff --git a/test/testdata/mpd/url_and_range.mpd b/test/testdata/mpd/url_and_range.mpd
new file mode 100644
index 000000000..b8c68aad2
--- /dev/null
+++ b/test/testdata/mpd/url_and_range.mpd
@@ -0,0 +1,32 @@
+<?xml version="1.0" ?>
+<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" profiles="urn:mpeg:dash:profile:isoff-live:2011" minBufferTime="PT10.01S" mediaPresentationDuration="PT30.097S" type="static">
+  <!-- Created with Bento4 mp4-dash.py, VERSION=2.0.0-639 -->
+  <Period>
+    <!-- Video -->
+    <AdaptationSet mimeType="video/mp4" segmentAlignment="true" startWithSAP="1" maxWidth="768" maxHeight="432">
+      <Representation id="video-avc1" codecs="avc1.4D401E" width="768" height="432" scanType="progressive" frameRate="30000/1001" bandwidth="699597">
+        <SegmentList timescale="1000" duration="10010">
+          <Initialization sourceURL="video-frag.mp4" range="36-746"/>
+          <SegmentURL media="video-frag.mp4" mediaRange="747-876117"/>
+          <SegmentURL media="video-frag.mp4" mediaRange="876118-1466913"/>
+          <SegmentURL media="video-frag.mp4" mediaRange="1466914-1953954"/>
+          <SegmentURL media="video-frag.mp4" mediaRange="1953955-1994652"/>
+        </SegmentList>
+      </Representation>
+    </AdaptationSet>
+    <!-- Audio -->
+    <AdaptationSet mimeType="audio/mp4" startWithSAP="1" segmentAlignment="true">
+      <Representation id="audio-und-mp4a.40.2" codecs="mp4a.40.2" bandwidth="98808" audioSamplingRate="48000">
+        <AudioChannelConfiguration schemeIdUri="urn:mpeg:mpegB:cicp:ChannelConfiguration" value="2"/>
+        <SegmentList timescale="1000" duration="10010">
+          <Initialization sourceURL="audio-frag.mp4" range="32-623"/>
+          <SegmentURL media="audio-frag.mp4" mediaRange="624-124199"/>
+          <SegmentURL media="audio-frag.mp4" mediaRange="124200-250303"/>
+          <SegmentURL media="audio-frag.mp4" mediaRange="250304-374365"/>
+          <SegmentURL media="audio-frag.mp4" mediaRange="374366-374836"/>
+        </SegmentList>
+      </Representation>
+    </AdaptationSet>
+  </Period>
+</MPD>
+
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index ed55d3e07..76414554a 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -183,6 +183,8 @@ class InfoExtractor(object):
                                             fragment_base_url
                                  * "duration" (optional, int or float)
                                  * "filesize" (optional, int)
+                                 * "range" (optional, str of the form "start-end"
+                                            to use in HTTP Range header)
                     * preference Order number of this format. If this field is
                                  present and not None, the formats get sorted
                                  by this field, regardless of all other values.
@@ -2296,15 +2298,27 @@ class InfoExtractor(object):
             def extract_Initialization(source):
                 initialization = source.find(_add_ns('Initialization'))
                 if initialization is not None:
-                    ms_info['initialization_url'] = initialization.attrib['sourceURL']
+                    ms_info['initialization_url'] = initialization.get('sourceURL') or base_url
+                    initialization_url_range = initialization.get('range')
+                    if initialization_url_range:
+                        ms_info['initialization_url_range'] = initialization_url_range
 
             segment_list = element.find(_add_ns('SegmentList'))
             if segment_list is not None:
                 extract_common(segment_list)
                 extract_Initialization(segment_list)
                 segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
-                if segment_urls_e:
-                    ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
+                segment_urls = traverse_obj(segment_urls_e, (
+                    Ellipsis, T(lambda e: e.attrib), 'media'))
+                if segment_urls:
+                    ms_info['segment_urls'] = segment_urls
+                segment_urls_range = traverse_obj(segment_urls_e, (
+                    Ellipsis, T(lambda e: e.attrib), 'mediaRange',
+                    T(lambda r: re.findall(r'^\d+-\d+$', r)), 0))
+                if segment_urls_range:
+                    ms_info['segment_urls_range'] = segment_urls_range
+                    if not segment_urls:
+                        ms_info['segment_urls'] = [base_url for _ in segment_urls_range]
             else:
                 segment_template = element.find(_add_ns('SegmentTemplate'))
                 if segment_template is not None:
@@ -2443,6 +2457,11 @@ class InfoExtractor(object):
                     def location_key(location):
                         return 'url' if re.match(r'^https?://', location) else 'path'
 
+                    def calc_segment_duration():
+                        return float_or_none(
+                            representation_ms_info['segment_duration'],
+                            representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
+
                     if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
 
                         media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
@@ -2512,45 +2531,68 @@ class InfoExtractor(object):
                                         'duration': duration,
                                     })
                                     segment_index += 1
-                            representation_ms_info['fragments'] = fragments
-                        elif 'segment_urls' in representation_ms_info:
+                        elif 'segment_urls_range' in representation_ms_info:
+                            # Segment URLs with mediaRange
+                            # Example: https://kinescope.io/200615537/master.mpd
+                            # https://github.com/ytdl-org/youtube-dl/issues/30235
+                            # or any mpd generated with Bento4 `mp4dash --no-split --use-segment-list`
+                            segment_duration = calc_segment_duration()
+                            for segment_url, segment_url_range in zip(
+                                    representation_ms_info['segment_urls'], representation_ms_info['segment_urls_range']):
+                                fragments.append({
+                                    location_key(segment_url): segment_url,
+                                    'range': segment_url_range,
+                                    'duration': segment_duration,
+                                })
+                        else:
                             # Segment URLs with no SegmentTimeline
                             # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
                             # https://github.com/ytdl-org/youtube-dl/pull/14844
-                            fragments = []
-                            segment_duration = float_or_none(
-                                representation_ms_info['segment_duration'],
-                                representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
+                            segment_duration = calc_segment_duration()
                             for segment_url in representation_ms_info['segment_urls']:
-                                fragment = {
+                                fragments.append({
                                     location_key(segment_url): segment_url,
-                                }
-                                if segment_duration:
-                                    fragment['duration'] = segment_duration
-                                fragments.append(fragment)
-                            representation_ms_info['fragments'] = fragments
-                        # If there is a fragments key available then we correctly recognized fragmented media.
-                        # Otherwise we will assume unfragmented media with direct access. Technically, such
-                        # assumption is not necessarily correct since we may simply have no support for
-                        # some forms of fragmented media renditions yet, but for now we'll use this fallback.
-                        if 'fragments' in representation_ms_info:
-                            f.update({
-                                # NB: mpd_url may be empty when MPD manifest is parsed from a string
-                                'url': mpd_url or base_url,
-                                'fragment_base_url': base_url,
-                                'fragments': [],
-                                'protocol': 'http_dash_segments',
+                                    'duration': segment_duration,
+                                })
+                        representation_ms_info['fragments'] = fragments
+
+                    # If there is a fragments key available then we correctly recognized fragmented media.
+                    # Otherwise we will assume unfragmented media with direct access. Technically, such
+                    # assumption is not necessarily correct since we may simply have no support for
+                    # some forms of fragmented media renditions yet, but for now we'll use this fallback.
+                    if 'fragments' in representation_ms_info:
+                        base_url = representation_ms_info['base_url'] 
+                        f.update({
+                            # NB: mpd_url may be empty when MPD manifest is parsed from a string
+                            'url': mpd_url or base_url,
+                            'fragment_base_url': base_url,
+                            'fragments': [],
+                            'protocol': 'http_dash_segments',
+                        })
+                        if 'initialization_url' in representation_ms_info and 'initialization_url_range' in representation_ms_info:
+                            # Initialization URL with range (accompanied by Segment URLs with mediaRange above)
+                            # https://github.com/ytdl-org/youtube-dl/issues/30235
+                            initialization_url = representation_ms_info['initialization_url']
+                            f['fragments'].append({
+                                location_key(initialization_url): initialization_url,
+                                'range': representation_ms_info['initialization_url_range'],
                             })
-                            if 'initialization_url' in representation_ms_info:
-                                initialization_url = representation_ms_info['initialization_url']
-                                if not f.get('url'):
-                                    f['url'] = initialization_url
-                                f['fragments'].append({location_key(initialization_url): initialization_url})
-                            f['fragments'].extend(representation_ms_info['fragments'])
-                        else:
-                            # Assuming direct URL to unfragmented media.
-                            f['url'] = base_url
-                        formats.append(f)
+                        elif 'initialization_url' in representation_ms_info:
+                            initialization_url = representation_ms_info['initialization_url']
+                            if not f.get('url'):
+                                f['url'] = initialization_url
+                            f['fragments'].append({location_key(initialization_url): initialization_url})
+                        elif 'initialization_url_range' in representation_ms_info:
+                            # no Initialization URL but range (accompanied by no Segment URLs but mediaRange above)
+                            # https://github.com/ytdl-org/youtube-dl/issues/27575
+                            f['fragments'].append({
+                                location_key(base_url): base_url,
+                                'range': representation_ms_info['initialization_url_range'],
+                            })
+                        f['fragments'].extend(representation_ms_info['fragments'])
+                        if not period_duration:
+                            period_duration = sum(traverse_obj(representation_ms_info, (
+                                'fragments', Ellipsis, 'duration', T(float_or_none))))
                     else:
                         # Assuming direct URL to unfragmented media.
                         f['url'] = representation_ms_info['base_url']

From 1fd8f802b836cc1e8bf87b2dbe02decd6a980cb6 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 27 Jan 2024 15:45:43 +0000
Subject: [PATCH 153/156] [InfoExtractor] Correctly resolve BaseURL in DASH
 manifest Specs: * ISO/IEC 23009-1:2012 section 5.6 * RFC 3986 section 5.

---
 youtube_dl/extractor/common.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 76414554a..0e5dfd8fa 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2262,9 +2262,24 @@ class InfoExtractor(object):
         def is_drm_protected(element):
             return element.find(_add_ns('ContentProtection')) is not None
 
+        from ..utils import YoutubeDLHandler
+        fix_path = YoutubeDLHandler._fix_path
+
+        def resolve_base_url(element, parent_base_url=None):
+            # TODO: use native XML traversal when ready
+            b_url = traverse_obj(element, (
+                T(lambda e: e.find(_add_ns('BaseURL')).text)))
+            if parent_base_url and b_url:
+                if not parent_base_url[-1] in ('/', ':'):
+                    parent_base_url += '/'
+                b_url = compat_urlparse.urljoin(parent_base_url, b_url)
+            if b_url:
+                b_url = fix_path(b_url)
+            return b_url or parent_base_url
+
         def extract_multisegment_info(element, ms_parent_info):
             ms_info = ms_parent_info.copy()
-            base_url = ms_info.get('base_url')
+            base_url = ms_info['base_url'] = resolve_base_url(element, ms_info.get('base_url'))
 
             # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
             # common attributes and elements.  We will only extract relevant
@@ -2336,11 +2351,13 @@ class InfoExtractor(object):
         mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
         formats, subtitles = [], {}
         stream_numbers = collections.defaultdict(int)
+        mpd_base_url = resolve_base_url(mpd_doc, mpd_base_url or mpd_url)
         for period in mpd_doc.findall(_add_ns('Period')):
             period_duration = parse_duration(period.get('duration')) or mpd_duration
             period_ms_info = extract_multisegment_info(period, {
                 'start_number': 1,
                 'timescale': 1,
+                'base_url': mpd_base_url,
             })
             for adaptation_set in period.findall(_add_ns('AdaptationSet')):
                 if is_drm_protected(adaptation_set):
@@ -2561,7 +2578,7 @@ class InfoExtractor(object):
                     # assumption is not necessarily correct since we may simply have no support for
                     # some forms of fragmented media renditions yet, but for now we'll use this fallback.
                     if 'fragments' in representation_ms_info:
-                        base_url = representation_ms_info['base_url'] 
+                        base_url = representation_ms_info['base_url']
                         f.update({
                             # NB: mpd_url may be empty when MPD manifest is parsed from a string
                             'url': mpd_url or base_url,

From bdda6b81df61f52eed2502c8ae624d297d918488 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 27 Jan 2024 15:57:25 +0000
Subject: [PATCH 154/156] [Vbox7IE] Improve extraction * DASH extraction no
 longer fails with new range support * but always find combined formats if
 available * suppress ineffective XFF geo-bypass (causes time-outs) * adapted
 from https://github.com/ytdl-org/youtube-dl/pull/29680 * thx former GH user
 kikuyan

---
 youtube_dl/extractor/vbox7.py | 121 ++++++++++++++++++++++++----------
 1 file changed, 86 insertions(+), 35 deletions(-)

diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py
index 8152acefd..c504c5311 100644
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@@ -2,9 +2,19 @@
 from __future__ import unicode_literals
 
 import re
+import time
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    determine_ext,
+    ExtractorError,
+    float_or_none,
+    merge_dicts,
+    T,
+    traverse_obj,
+    txt_or_none,
+    url_or_none,
+)
 
 
 class Vbox7IE(InfoExtractor):
@@ -20,10 +30,12 @@ class Vbox7IE(InfoExtractor):
                         )
                         (?P<id>[\da-fA-F]+)
                     '''
+    _EMBED_REGEX = [r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)']
     _GEO_COUNTRIES = ['BG']
+    _GEO_BYPASS = False
     _TESTS = [{
-        'url': 'http://vbox7.com/play:0946fff23c',
-        'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
+        'url': 'https://vbox7.com/play:0946fff23c',
+        'md5': '50ca1f78345a9c15391af47d8062d074',
         'info_dict': {
             'id': '0946fff23c',
             'ext': 'mp4',
@@ -34,18 +46,21 @@ class Vbox7IE(InfoExtractor):
             'upload_date': '20160812',
             'uploader': 'zdraveibulgaria',
         },
-        'params': {
-            'proxy': '127.0.0.1:8118',
-        },
+        'expected_warnings': [
+            'Unable to download webpage',
+        ],
     }, {
         'url': 'http://vbox7.com/play:249bb972c2',
-        'md5': '99f65c0c9ef9b682b97313e052734c3f',
+        'md5': 'aaf19465e37ec0b30b918df83ec32c50',
         'info_dict': {
             'id': '249bb972c2',
             'ext': 'mp4',
             'title': 'Смях! Чудо - чист за секунди - Скрита камера',
+            'description': 'Смях! Чудо - чист за секунди - Скрита камера',
+            'timestamp': 1360215023,
+            'upload_date': '20130207',
+            'uploader': 'svideteliat_ot_varshava',
         },
-        'skip': 'georestricted',
     }, {
         'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
         'only_matching': True,
@@ -54,52 +69,88 @@ class Vbox7IE(InfoExtractor):
         'only_matching': True,
     }]
 
-    @staticmethod
-    def _extract_url(webpage):
-        mobj = re.search(
-            r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)',
-            webpage)
+    @classmethod
+    def _extract_url(cls, webpage):
+        mobj = re.search(cls._EMBED_REGEX[0], webpage)
         if mobj:
             return mobj.group('url')
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        url = 'https://vbox7.com/play:%s' % (video_id,)
 
+        now = time.time()
         response = self._download_json(
-            'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id,
-            video_id)
+            'https://www.vbox7.com/aj/player/item/options?vid=%s' % (video_id,),
+            video_id, headers={'Referer': url})
+        # estimate time to which possible `ago` member is relative
+        now = now + 0.5 * (time.time() - now)
 
         if 'error' in response:
             raise ExtractorError(
                 '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
 
-        video = response['options']
+        video_url = traverse_obj(response, ('options', 'src', T(url_or_none)))
 
-        title = video['title']
-        video_url = video['src']
-
-        if '/na.mp4' in video_url:
+        if '/na.mp4' in video_url or '':
             self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
 
-        uploader = video.get('uploader')
+        ext = determine_ext(video_url)
+        if ext == 'mpd':
+            # In case MPD cannot be parsed, or anyway, get mp4 combined
+            # formats usually provided to Safari, iOS, and old Windows
+            try:
+                formats, subtitles = self._extract_mpd_formats_and_subtitles(
+                    video_url, video_id, 'dash', fatal=False)
+            except KeyError:
+                self.report_warning('Failed to parse MPD manifest')
+                formats, subtitles = [], {}
 
-        webpage = self._download_webpage(
-            'http://vbox7.com/play:%s' % video_id, video_id, fatal=None)
+            video = response['options']
+            resolutions = (1080, 720, 480, 240, 144)
+            highest_res = traverse_obj(video, ('highestRes', T(int))) or resolutions[0]
+            for res in traverse_obj(video, ('resolutions', lambda _, r: int(r) > 0)) or resolutions:
+                if res > highest_res:
+                    continue
+                formats.append({
+                    'url': video_url.replace('.mpd', '_%d.mp4' % res),
+                    'format_id': '%dp' % res,
+                    'height': res,
+                })
+            # if above formats are flaky, enable the line below
+            # self._check_formats(formats, video_id)
+        else:
+            formats = [{
+                'url': video_url,
+            }]
+            subtitles = {}
+        self._sort_formats(formats)
 
-        info = {}
+        webpage = self._download_webpage(url, video_id, fatal=False) or ''
 
-        if webpage:
-            info = self._search_json_ld(
-                webpage.replace('"/*@context"', '"@context"'), video_id,
-                fatal=False)
+        info = self._search_json_ld(
+            webpage.replace('"/*@context"', '"@context"'), video_id,
+            fatal=False) if webpage else {}
 
-        info.update({
+        if not info.get('title'):
+            info['title'] = traverse_obj(response, (
+                'options', 'title', T(txt_or_none))) or self._og_search_title(webpage)
+
+        def if_missing(k):
+            return lambda x: None if k in info else x
+
+        info = merge_dicts(info, {
             'id': video_id,
-            'title': title,
-            'url': video_url,
-            'uploader': uploader,
-            'thumbnail': self._proto_relative_url(
+            'formats': formats,
+            'subtitles': subtitles or None,
+        }, info, traverse_obj(response, ('options', {
+            'uploader': ('uploader', T(txt_or_none)),
+            'timestamp': ('ago', T(if_missing('timestamp')), T(lambda t: int(round((now - t) / 60.0)) * 60)),
+            'duration': ('duration', T(if_missing('duration')), T(float_or_none)),
+        })))
+        if 'thumbnail' not in info:
+            info['thumbnail'] = self._proto_relative_url(
                 info.get('thumbnail') or self._og_search_thumbnail(webpage),
-                'http:'),
-        })
+                'https:'),
+
         return info

From 4416f82c809a81737d68875dcb201e366d58dabd Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 27 Jan 2024 18:17:09 +0000
Subject: [PATCH 155/156] [Vbox7IE] Sanitise ld+json containing unexpected
 characters * based on PR #29680 * added hack to force invoking
 `transform_source` * fixes #26218

---
 youtube_dl/extractor/vbox7.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py
index c504c5311..d114ecb07 100644
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@@ -5,6 +5,7 @@ import re
 import time
 
 from .common import InfoExtractor
+from ..compat import compat_kwargs
 from ..utils import (
     determine_ext,
     ExtractorError,
@@ -75,6 +76,27 @@ class Vbox7IE(InfoExtractor):
         if mobj:
             return mobj.group('url')
 
+    # transform_source=None, fatal=True
+    def _parse_json(self, json_string, video_id, *args, **kwargs):
+        if '"@context"' in json_string[:30]:
+            # this is ld+json, or that's the way to bet
+            transform_source = args[0] if len(args) > 0 else kwargs.get('transform_source')
+            if not transform_source:
+
+                def fix_chars(src):
+                    # fix malformed ld+json: replace raw CRLFs with escaped LFs
+                    return re.sub(
+                        r'"[^"]+"', lambda m: re.sub(r'\r?\n', r'\\n', m.group(0)), src)
+
+                if len(args) > 0:
+                    args = (fix_chars,) + args[1:]
+                else:
+                    kwargs['transform_source'] = fix_chars
+                    kwargs = compat_kwargs(kwargs)
+
+        return super(Vbox7IE, self)._parse_json(
+            json_string, video_id, *args, **kwargs)
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         url = 'https://vbox7.com/play:%s' % (video_id,)

From 7687389f08a5c7c49e57d1b7f7b11b1c87b47b68 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 18 Feb 2024 20:55:01 +0000
Subject: [PATCH 156/156] [Vbox7] Improve extraction, adding features from
 yt-dlp PR #9100

* changes from https://github.com/yt-dlp/yt-dlp/pull/9100 (thx
seproDev):
  - attempt HLS extraction
  - re-enable XFF
  - test `view_count`, `duration` extraction
* improve commenting, error checks
---
 youtube_dl/extractor/vbox7.py | 80 +++++++++++++++++++++++------------
 1 file changed, 53 insertions(+), 27 deletions(-)

diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py
index d114ecb07..1c0b77074 100644
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@@ -7,6 +7,7 @@ import time
 from .common import InfoExtractor
 from ..compat import compat_kwargs
 from ..utils import (
+    base_url,
     determine_ext,
     ExtractorError,
     float_or_none,
@@ -14,6 +15,7 @@ from ..utils import (
     T,
     traverse_obj,
     txt_or_none,
+    url_basename,
     url_or_none,
 )
 
@@ -33,8 +35,8 @@ class Vbox7IE(InfoExtractor):
                     '''
     _EMBED_REGEX = [r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)']
     _GEO_COUNTRIES = ['BG']
-    _GEO_BYPASS = False
     _TESTS = [{
+        # the http: URL just redirects here
         'url': 'https://vbox7.com/play:0946fff23c',
         'md5': '50ca1f78345a9c15391af47d8062d074',
         'info_dict': {
@@ -42,17 +44,19 @@ class Vbox7IE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Борисов: Притеснен съм за бъдещето на България',
             'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"',
-            'thumbnail': r're:^https?://.*\.jpg$',
             'timestamp': 1470982814,
             'upload_date': '20160812',
             'uploader': 'zdraveibulgaria',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'view_count': int,
+            'duration': 2640,
         },
         'expected_warnings': [
             'Unable to download webpage',
         ],
     }, {
         'url': 'http://vbox7.com/play:249bb972c2',
-        'md5': 'aaf19465e37ec0b30b918df83ec32c50',
+        'md5': '99f65c0c9ef9b682b97313e052734c3f',
         'info_dict': {
             'id': '249bb972c2',
             'ext': 'mp4',
@@ -61,7 +65,11 @@ class Vbox7IE(InfoExtractor):
             'timestamp': 1360215023,
             'upload_date': '20130207',
             'uploader': 'svideteliat_ot_varshava',
+            'thumbnail': 'https://i49.vbox7.com/o/249/249bb972c20.jpg',
+            'view_count': int,
+            'duration': 83,
         },
+        'expected_warnings': ['Failed to download m3u8 information'],
     }, {
         'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
         'only_matching': True,
@@ -76,6 +84,9 @@ class Vbox7IE(InfoExtractor):
         if mobj:
             return mobj.group('url')
 
+    # specialisation to transform what looks like ld+json that
+    # may contain invalid character combinations
+
     # transform_source=None, fatal=True
     def _parse_json(self, json_string, video_id, *args, **kwargs):
         if '"@context"' in json_string[:30]:
@@ -103,49 +114,64 @@ class Vbox7IE(InfoExtractor):
 
         now = time.time()
         response = self._download_json(
-            'https://www.vbox7.com/aj/player/item/options?vid=%s' % (video_id,),
-            video_id, headers={'Referer': url})
+            'https://www.vbox7.com/aj/player/item/options', video_id,
+            query={'vid': video_id}, headers={'Referer': url})
         # estimate time to which possible `ago` member is relative
         now = now + 0.5 * (time.time() - now)
 
-        if 'error' in response:
+        if traverse_obj(response, 'error'):
             raise ExtractorError(
                 '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
 
-        video_url = traverse_obj(response, ('options', 'src', T(url_or_none)))
+        src_url = traverse_obj(response, ('options', 'src', T(url_or_none))) or ''
 
-        if '/na.mp4' in video_url or '':
+        fmt_base = url_basename(src_url).rsplit('.', 1)[0].rsplit('_', 1)[0]
+        if fmt_base in ('na', 'vn'):
             self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
 
-        ext = determine_ext(video_url)
+        ext = determine_ext(src_url)
         if ext == 'mpd':
-            # In case MPD cannot be parsed, or anyway, get mp4 combined
-            # formats usually provided to Safari, iOS, and old Windows
+            # extract MPD
             try:
                 formats, subtitles = self._extract_mpd_formats_and_subtitles(
-                    video_url, video_id, 'dash', fatal=False)
-            except KeyError:
+                    src_url, video_id, 'dash', fatal=False)
+            except KeyError:  # fatal doesn't catch this
                 self.report_warning('Failed to parse MPD manifest')
                 formats, subtitles = [], {}
+        elif ext != 'm3u8':
+            formats = [{
+                'url': src_url,
+            }] if src_url else []
+            subtitles = {}
 
+        if src_url:
+            # possibly extract HLS, based on https://github.com/yt-dlp/yt-dlp/pull/9100
+            fmt_base = base_url(src_url) + fmt_base
+            # prepare for _extract_m3u8_formats_and_subtitles()
+            # hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
+            hls_formats = self._extract_m3u8_formats(
+                '{0}.m3u8'.format(fmt_base), video_id, m3u8_id='hls', fatal=False)
+            formats.extend(hls_formats)
+            # self._merge_subtitles(hls_subs, target=subtitles)
+
+            # In case MPD/HLS cannot be parsed, or anyway, get mp4 combined
+            # formats usually provided to Safari, iOS, and old Windows
             video = response['options']
             resolutions = (1080, 720, 480, 240, 144)
-            highest_res = traverse_obj(video, ('highestRes', T(int))) or resolutions[0]
-            for res in traverse_obj(video, ('resolutions', lambda _, r: int(r) > 0)) or resolutions:
-                if res > highest_res:
-                    continue
-                formats.append({
-                    'url': video_url.replace('.mpd', '_%d.mp4' % res),
-                    'format_id': '%dp' % res,
+            highest_res = traverse_obj(video, (
+                'highestRes', T(int))) or resolutions[0]
+            resolutions = traverse_obj(video, (
+                'resolutions', lambda _, r: highest_res >= int(r) > 0)) or resolutions
+            mp4_formats = traverse_obj(resolutions, (
+                Ellipsis, T(lambda res: {
+                    'url': '{0}_{1}.mp4'.format(fmt_base, res),
+                    'format_id': 'http-{0}'.format(res),
                     'height': res,
-                })
+                })))
             # if above formats are flaky, enable the line below
-            # self._check_formats(formats, video_id)
-        else:
-            formats = [{
-                'url': video_url,
-            }]
-            subtitles = {}
+            # self._check_formats(mp4_formats, video_id)
+            formats.extend(mp4_formats)
+
         self._sort_formats(formats)
 
         webpage = self._download_webpage(url, video_id, fatal=False) or ''