From b5242da7d24028f60cd23fd10f28fb635c7c7634 Mon Sep 17 00:00:00 2001
From: lanegramling <lanegramling@gmail.com>
Date: Thu, 16 Dec 2021 11:42:17 -0700
Subject: [PATCH 001/312] [youtube] Update signature function patterns (closes
 #30363) (#30366)

---
 youtube_dl/extractor/youtube.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index dc4bd4a77..62e58c13e 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1323,10 +1323,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         funcname = self._search_regex(
             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
-             r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
-             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
-             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
+             r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
+             r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
+             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
+             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
              # Obsolete patterns
              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

From e41882335066ed03b1f4837e72fc0e83dfbe3525 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Fri, 17 Dec 2021 01:43:16 +0700
Subject: [PATCH 002/312] [ChangeLog] Actualize [ci skip]

---
 ChangeLog | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 680fffdf8..e530e6aea 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,28 @@
+version <unreleased>
+
+Core
+* [postprocessor/ffmpeg] Show ffmpeg output on error (#22680, #29336)
+
+Extractors
+* [youtube] Update signature function patterns (#30363, #30366)
+* [peertube] Only call description endpoint if necessary (#29383)
+* [periscope] Pass referer to HLS requests (#29419)
+- [liveleak] Remove extractor (#17625, #24222, #29331)
++ [pornhub] Add support for pornhubthbh7ap3u.onion
+* [pornhub] Detect geo restriction
+* [pornhub] Dismiss tbr extracted from download URLs (#28927)
+* [curiositystream:collection] Extend _VALID_URL (#26326, #29117)
+* [youtube] Make get_video_info processing more robust (#29333)
+* [youtube] Workaround for get_video_info request (#29333)
+* [bilibili] Strip uploader name (#29202)
+* [youtube] Update invidious instance list (#29281)
+* [umg:de] Update GraphQL API URL (#29304)
+* [nrk] Switch psapi URL to https (#29344)
++ [egghead] Add support for app.egghead.io (#28404, #29303)
+* [appleconnect] Fix extraction (#29208)
++ [orf:tvthek] Add support for MPD formats (#28672, #29236)
+
+
 version 2021.06.06
 
 Extractors

From 5014bd67c22b421207b2650d4dc874b95b36dda1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Fri, 17 Dec 2021 01:49:07 +0700
Subject: [PATCH 003/312] release 2021.12.17

---
 .github/ISSUE_TEMPLATE/1_broken_site.md          | 6 +++---
 .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++--
 .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++--
 .github/ISSUE_TEMPLATE/4_bug_report.md           | 6 +++---
 .github/ISSUE_TEMPLATE/5_feature_request.md      | 4 ++--
 ChangeLog                                        | 2 +-
 docs/supportedsites.md                           | 2 --
 youtube_dl/version.py                            | 2 +-
 8 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md
index 4eb505231..e5405c235 100644
--- a/.github/ISSUE_TEMPLATE/1_broken_site.md
+++ b/.github/ISSUE_TEMPLATE/1_broken_site.md
@@ -18,7 +18,7 @@ title: ''
 
 <!--
 Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
-- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
 - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
 - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
 - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
 -->
 
 - [ ] I'm reporting a broken site support
-- [ ] I've verified that I'm running youtube-dl version **2021.06.06**
+- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
 - [ ] I've checked that all provided URLs are alive and playable in a browser
 - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
 - [ ] I've searched the bugtracker for similar issues including closed ones
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
  [debug] User config: []
  [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
  [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
- [debug] youtube-dl version 2021.06.06
+ [debug] youtube-dl version 2021.12.17
  [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
  [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
  [debug] Proxy map: {}
diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md
index 9fed0b489..33b01ce7f 100644
--- a/.github/ISSUE_TEMPLATE/2_site_support_request.md
+++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md
@@ -19,7 +19,7 @@ labels: 'site-support-request'
 
 <!--
 Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
-- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
 - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
 - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
 - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
 -->
 
 - [ ] I'm reporting a new site support request
-- [ ] I've verified that I'm running youtube-dl version **2021.06.06**
+- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
 - [ ] I've checked that all provided URLs are alive and playable in a browser
 - [ ] I've checked that none of provided URLs violate any copyrights
 - [ ] I've searched the bugtracker for similar site support requests including closed ones
diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md
index 573e8ded0..285610cc7 100644
--- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md
+++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md
@@ -18,13 +18,13 @@ title: ''
 
 <!--
 Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
-- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
 - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
 - Finally, put x into all relevant boxes (like this [x])
 -->
 
 - [ ] I'm reporting a site feature request
-- [ ] I've verified that I'm running youtube-dl version **2021.06.06**
+- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
 - [ ] I've searched the bugtracker for similar site feature requests including closed ones
 
 
diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md
index c0031bf7a..af73525fb 100644
--- a/.github/ISSUE_TEMPLATE/4_bug_report.md
+++ b/.github/ISSUE_TEMPLATE/4_bug_report.md
@@ -18,7 +18,7 @@ title: ''
 
 <!--
 Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
-- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
 - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
 - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
 - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
 -->
 
 - [ ] I'm reporting a broken site support issue
-- [ ] I've verified that I'm running youtube-dl version **2021.06.06**
+- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
 - [ ] I've checked that all provided URLs are alive and playable in a browser
 - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
 - [ ] I've searched the bugtracker for similar bug reports including closed ones
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
  [debug] User config: []
  [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
  [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
- [debug] youtube-dl version 2021.06.06
+ [debug] youtube-dl version 2021.12.17
  [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
  [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
  [debug] Proxy map: {}
diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md
index 1138ab2ca..42c878b83 100644
--- a/.github/ISSUE_TEMPLATE/5_feature_request.md
+++ b/.github/ISSUE_TEMPLATE/5_feature_request.md
@@ -19,13 +19,13 @@ labels: 'request'
 
 <!--
 Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
-- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
 - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
 - Finally, put x into all relevant boxes (like this [x])
 -->
 
 - [ ] I'm reporting a feature request
-- [ ] I've verified that I'm running youtube-dl version **2021.06.06**
+- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
 - [ ] I've searched the bugtracker for similar feature requests including closed ones
 
 
diff --git a/ChangeLog b/ChangeLog
index e530e6aea..658864282 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,4 @@
-version <unreleased>
+version 2021.12.17
 
 Core
 * [postprocessor/ffmpeg] Show ffmpeg output on error (#22680, #29336)
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index ed0d5e9d9..ae2a6b8b0 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -472,8 +472,6 @@
  - **LinuxAcademy**
  - **LiTV**
  - **LiveJournal**
- - **LiveLeak**
- - **LiveLeakEmbed**
  - **livestream**
  - **livestream:original**
  - **LnkGo**
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 461dd87ca..b82fbc702 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2021.06.06'
+__version__ = '2021.12.17'

From ed99d68bdddfba0440dc81c105d5c0ea7cee7d1c Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 30 Jan 2022 00:41:47 +0530
Subject: [PATCH 004/312] Add back `YoutubeSearchURLIE`

---
 test/test_all_urls.py              |   6 +-
 youtube_dl/extractor/extractors.py |   2 +-
 youtube_dl/extractor/youtube.py    | 177 +++++++++++++++--------------
 3 files changed, 93 insertions(+), 92 deletions(-)

diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index df6d81b5d..0e1328ede 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -66,9 +66,9 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
         self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])
 
-    # def test_youtube_search_matching(self):
-    #     self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
-    #     self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
+    def test_youtube_search_matching(self):
+        self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
+        self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
 
     def test_youtube_extract(self):
         assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 9b449937d..d403a2dbe 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1556,7 +1556,7 @@ from .youtube import (
     YoutubeRecommendedIE,
     YoutubeSearchDateIE,
     YoutubeSearchIE,
-    #YoutubeSearchURLIE,
+    YoutubeSearchURLIE,
     YoutubeSubscriptionsIE,
     YoutubeTruncatedIDIE,
     YoutubeTruncatedURLIE,
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 87bdc1677..578cfcf90 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -308,6 +308,77 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
                 default='{}'), video_id, fatal=False)
 
+    def _search_results(self, query, params):
+        data = {
+            'context': {
+                'client': {
+                    'clientName': 'WEB',
+                    'clientVersion': '2.20201021.03.00',
+                }
+            },
+            'query': query,
+        }
+        if params:
+            data['params'] = params
+        for page_num in itertools.count(1):
+            search = self._download_json(
+                'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+                video_id='query "%s"' % query,
+                note='Downloading page %s' % page_num,
+                errnote='Unable to download API page', fatal=False,
+                data=json.dumps(data).encode('utf8'),
+                headers={'content-type': 'application/json'})
+            if not search:
+                break
+            slr_contents = try_get(
+                search,
+                (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
+                 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
+                list)
+            if not slr_contents:
+                break
+            isr_contents = try_get(
+                slr_contents,
+                lambda x: x[0]['itemSectionRenderer']['contents'],
+                list)
+            if not isr_contents:
+                break
+            for content in isr_contents:
+                if not isinstance(content, dict):
+                    continue
+                video = content.get('videoRenderer')
+                if not isinstance(video, dict):
+                    continue
+                video_id = video.get('videoId')
+                if not video_id:
+                    continue
+                title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
+                description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
+                duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
+                view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
+                view_count = int_or_none(self._search_regex(
+                    r'^(\d+)', re.sub(r'\s', '', view_count_text),
+                    'view count', default=None))
+                uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
+                yield {
+                    '_type': 'url_transparent',
+                    'ie_key': YoutubeIE.ie_key(),
+                    'id': video_id,
+                    'url': video_id,
+                    'title': title,
+                    'description': description,
+                    'duration': duration,
+                    'view_count': view_count,
+                    'uploader': uploader,
+                }
+            token = try_get(
+                slr_contents,
+                lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
+                compat_str)
+            if not token:
+                break
+            data['continuation'] = token
+
 
 class YoutubeIE(YoutubeBaseInfoExtractor):
     IE_DESC = 'YouTube.com'
@@ -2454,7 +2525,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                         (?:
                             (?:channel|c|user|feed)/|
                             (?:playlist|watch)\?.*?\blist=|
-                            (?!(?:watch|embed|v|e)\b)
+                            (?!(?:watch|embed|v|e|results)\b)
                         )
                         (?P<id>[^/?\#&]+)
                     '''
@@ -3379,88 +3450,18 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
 
 class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
     IE_DESC = 'YouTube.com searches'
-    # there doesn't appear to be a real limit, for example if you search for
-    # 'python' you get more than 8.000.000 results
-    _MAX_RESULTS = float('inf')
     IE_NAME = 'youtube:search'
     _SEARCH_KEY = 'ytsearch'
-    _SEARCH_PARAMS = None
+    _SEARCH_PARAMS = 'EgIQAQ%3D%3D'  # Videos only
     _TESTS = []
 
     def _entries(self, query, n):
-        data = {
-            'context': {
-                'client': {
-                    'clientName': 'WEB',
-                    'clientVersion': '2.20201021.03.00',
-                }
-            },
-            'query': query,
-        }
-        if self._SEARCH_PARAMS:
-            data['params'] = self._SEARCH_PARAMS
         total = 0
-        for page_num in itertools.count(1):
-            search = self._download_json(
-                'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
-                video_id='query "%s"' % query,
-                note='Downloading page %s' % page_num,
-                errnote='Unable to download API page', fatal=False,
-                data=json.dumps(data).encode('utf8'),
-                headers={'content-type': 'application/json'})
-            if not search:
-                break
-            slr_contents = try_get(
-                search,
-                (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
-                 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
-                list)
-            if not slr_contents:
-                break
-            isr_contents = try_get(
-                slr_contents,
-                lambda x: x[0]['itemSectionRenderer']['contents'],
-                list)
-            if not isr_contents:
-                break
-            for content in isr_contents:
-                if not isinstance(content, dict):
-                    continue
-                video = content.get('videoRenderer')
-                if not isinstance(video, dict):
-                    continue
-                video_id = video.get('videoId')
-                if not video_id:
-                    continue
-                title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
-                description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
-                duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
-                view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
-                view_count = int_or_none(self._search_regex(
-                    r'^(\d+)', re.sub(r'\s', '', view_count_text),
-                    'view count', default=None))
-                uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
-                total += 1
-                yield {
-                    '_type': 'url_transparent',
-                    'ie_key': YoutubeIE.ie_key(),
-                    'id': video_id,
-                    'url': video_id,
-                    'title': title,
-                    'description': description,
-                    'duration': duration,
-                    'view_count': view_count,
-                    'uploader': uploader,
-                }
-                if total == n:
-                    return
-            token = try_get(
-                slr_contents,
-                lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
-                compat_str)
-            if not token:
-                break
-            data['continuation'] = token
+        for entry in self._search_results(query, self._SEARCH_PARAMS):
+            yield entry
+            total += 1
+            if total >= n:
+                return
 
     def _get_n_results(self, query, n):
         """Get a specified number of results for a query"""
@@ -3471,18 +3472,19 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
     _SEARCH_KEY = 'ytsearchdate'
     IE_DESC = 'YouTube.com searches, newest videos first'
-    _SEARCH_PARAMS = 'CAI%3D'
+    _SEARCH_PARAMS = 'CAISAhAB'  # Videos only, sorted by date
+    _TESTS = []
 
 
-r"""
-class YoutubeSearchURLIE(YoutubeSearchIE):
-    IE_DESC = 'YouTube.com search URLs'
-    IE_NAME = 'youtube:search_url'
-    _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
+class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
+    IE_DESC = 'YouTube search URLs with sorting and filter support'
+    IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
+    _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
     _TESTS = [{
         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
         'playlist_mincount': 5,
         'info_dict': {
+            'id': 'youtube-dl test video',
             'title': 'youtube-dl test video',
         }
     }, {
@@ -3491,11 +3493,10 @@ class YoutubeSearchURLIE(YoutubeSearchIE):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        query = compat_urllib_parse_unquote_plus(mobj.group('query'))
-        webpage = self._download_webpage(url, query)
-        return self.playlist_result(self._process_page(webpage), playlist_title=query)
-"""
+        qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+        query = (qs.get('search_query') or qs.get('q'))[0]
+        params = qs.get('sp', ('',))[0]
+        return self.playlist_result(self._search_results(query, params), query, query)
 
 
 class YoutubeFeedsInfoExtractor(YoutubeTabIE):

From bfe72723d8318f8bfcb35dee69a40758df5fa3c0 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 30 Jan 2022 00:49:55 +0530
Subject: [PATCH 005/312] Use `itertools.islice`

---
 youtube_dl/extractor/youtube.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 578cfcf90..017837e10 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -3455,17 +3455,10 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
     _SEARCH_PARAMS = 'EgIQAQ%3D%3D'  # Videos only
     _TESTS = []
 
-    def _entries(self, query, n):
-        total = 0
-        for entry in self._search_results(query, self._SEARCH_PARAMS):
-            yield entry
-            total += 1
-            if total >= n:
-                return
-
     def _get_n_results(self, query, n):
         """Get a specified number of results for a query"""
-        return self.playlist_result(self._entries(query, n), query)
+        entries = itertools.islice(self._search_results(query, self._SEARCH_PARAMS), 0, None if n == float('inf') else n)
+        return self.playlist_result(entries, query, query)
 
 
 class YoutubeSearchDateIE(YoutubeSearchIE):

From 2c4cb134a90b49a4d44965b57ff43cfd45ec2d69 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 30 Jan 2022 00:54:22 +0530
Subject: [PATCH 006/312] Fix max_results

---
 youtube_dl/extractor/youtube.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 017837e10..bbd3e80d8 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -3453,6 +3453,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
     IE_NAME = 'youtube:search'
     _SEARCH_KEY = 'ytsearch'
     _SEARCH_PARAMS = 'EgIQAQ%3D%3D'  # Videos only
+    _MAX_RESULTS = float('inf')
     _TESTS = []
 
     def _get_n_results(self, query, n):

From 57044eacebc6f2f3cd83c345e1b6e659a22e4773 Mon Sep 17 00:00:00 2001
From: df <fieldhouse@gmx.net>
Date: Thu, 28 Oct 2021 15:55:38 +0100
Subject: [PATCH 007/312] Fix test_youtube_playlist_noplaylist

---
 test/test_youtube_lists.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index cf2fdf14f..72820972e 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
 # Allow direct execution
@@ -9,11 +10,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from test.helper import FakeYDL
 
-
 from youtube_dl.extractor import (
+    YoutubeIE,
     YoutubePlaylistIE,
     YoutubeTabIE,
-    YoutubeIE,
 )
 
 
@@ -25,9 +25,11 @@ class TestYoutubeLists(unittest.TestCase):
     def test_youtube_playlist_noplaylist(self):
         dl = FakeYDL()
         dl.params['noplaylist'] = True
+        dl.params['format'] = 'best'
         ie = YoutubePlaylistIE(dl)
         result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
         self.assertEqual(result['_type'], 'url')
+        result = dl.extract_info(result['url'], download=False, ie_key=result.get('ie_key'), process=False)
         self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
 
     def test_youtube_course(self):

From 46e0a729b2d4503d8d49433fdddfce726d08261e Mon Sep 17 00:00:00 2001
From: df <fieldhouse@gmx.net>
Date: Thu, 28 Oct 2021 15:57:10 +0100
Subject: [PATCH 008/312] Remove obsolete test_youtube_course

---
 test/test_youtube_lists.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index 72820972e..e1636a1a6 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -32,16 +32,6 @@ class TestYoutubeLists(unittest.TestCase):
         result = dl.extract_info(result['url'], download=False, ie_key=result.get('ie_key'), process=False)
         self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
 
-    def test_youtube_course(self):
-        dl = FakeYDL()
-        ie = YoutubePlaylistIE(dl)
-        # TODO find a > 100 (paginating?) videos course
-        result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
-        entries = list(result['entries'])
-        self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
-        self.assertEqual(len(entries), 25)
-        self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
-
     def test_youtube_mix(self):
         dl = FakeYDL()
         ie = YoutubePlaylistIE(dl)

From 2c2c2bd348b7dce0aad55a6fc37a18c6f9a000e3 Mon Sep 17 00:00:00 2001
From: df <fieldhouse@gmx.net>
Date: Fri, 29 Oct 2021 03:03:00 +0100
Subject: [PATCH 009/312] Fix test_youtube_mix

---
 test/test_youtube_lists.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index e1636a1a6..fae8a950a 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -34,12 +34,14 @@ class TestYoutubeLists(unittest.TestCase):
 
     def test_youtube_mix(self):
         dl = FakeYDL()
-        ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')
-        entries = result['entries']
+        dl.params['format'] = 'best'
+        ie = YoutubeTabIE(dl)
+        result = dl.extract_info('https://www.youtube.com/watch?v=uVJ0Il5WvbE&list=PLhQjrBD2T381k8ul4WQ8SQ165XqY149WW',
+                                 download=False, ie_key=ie.ie_key(), process=True)
+        entries = (result or {}).get('entries', [{'id': 'not_found', }])
         self.assertTrue(len(entries) >= 50)
         original_video = entries[0]
-        self.assertEqual(original_video['id'], 'OQpdSVF_k_w')
+        self.assertEqual(original_video['id'], 'uVJ0Il5WvbE')
 
     def test_youtube_toptracks(self):
         print('Skipping: The playlist page gives error 500')

From d76d59d99d05fba94963690a039d38373dddc658 Mon Sep 17 00:00:00 2001
From: df <fieldhouse@gmx.net>
Date: Fri, 29 Oct 2021 03:10:35 +0100
Subject: [PATCH 010/312] Remove obsolete non-working test_youtube_toptracks

---
 test/test_youtube_lists.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index fae8a950a..69c5d52eb 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -43,15 +43,6 @@ class TestYoutubeLists(unittest.TestCase):
         original_video = entries[0]
         self.assertEqual(original_video['id'], 'uVJ0Il5WvbE')
 
-    def test_youtube_toptracks(self):
-        print('Skipping: The playlist page gives error 500')
-        return
-        dl = FakeYDL()
-        ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
-        entries = result['entries']
-        self.assertEqual(len(entries), 100)
-
     def test_youtube_flat_playlist_extraction(self):
         dl = FakeYDL()
         dl.params['extract_flat'] = True

From 39ca35e7651048c2adf558f1d6db2df0de4554f5 Mon Sep 17 00:00:00 2001
From: df <fieldhouse@gmx.net>
Date: Mon, 1 Nov 2021 04:44:57 +0000
Subject: [PATCH 011/312] Fix test_youtube_flat_playlist_extraction

---
 test/test_youtube_lists.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index 69c5d52eb..07a6b6d06 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -52,7 +52,7 @@ class TestYoutubeLists(unittest.TestCase):
         entries = list(result['entries'])
         self.assertTrue(len(entries) == 1)
         video = entries[0]
-        self.assertEqual(video['_type'], 'url_transparent')
+        self.assertEqual(video['_type'], 'url')
         self.assertEqual(video['ie_key'], 'Youtube')
         self.assertEqual(video['id'], 'BaW_jenozKc')
         self.assertEqual(video['url'], 'BaW_jenozKc')

From 5f5de51a499f732a6e687f32037e130cbdc50c8f Mon Sep 17 00:00:00 2001
From: df <fieldhouse@gmx.net>
Date: Mon, 1 Nov 2021 13:34:29 +0000
Subject: [PATCH 012/312] Add compat_map/filter and use the former

---
 youtube_dl/compat.py            | 21 +++++++++++++++++++++
 youtube_dl/extractor/youtube.py |  1 +
 2 files changed, 22 insertions(+)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 9e45c454b..29e0d3a02 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2962,6 +2962,25 @@ else:
         compat_Struct = struct.Struct
 
 
+# compat_map/filter() returning an iterator, supposedly the
+# same versioning as for zip below
+try:
+    from future_builtins import map as compat_map
+except ImportError:
+    try:
+        from itertools import imap as compat_map
+    except ImportError:
+        compat_map = map
+
+try:
+    from future_builtins import filter as compat_filter
+except ImportError:
+    try:
+        from itertools import ifilter as compat_filter
+    except ImportError:
+        compat_filter = filter
+
+
 try:
     from future_builtins import zip as compat_zip
 except ImportError:  # not 2.6+ or is 3.x
@@ -3015,6 +3034,7 @@ __all__ = [
     'compat_etree_fromstring',
     'compat_etree_register_namespace',
     'compat_expanduser',
+    'compat_filter',
     'compat_get_terminal_size',
     'compat_getenv',
     'compat_getpass',
@@ -3026,6 +3046,7 @@ __all__ = [
     'compat_integer_types',
     'compat_itertools_count',
     'compat_kwargs',
+    'compat_map',
     'compat_numeric_types',
     'compat_ord',
     'compat_os_name',
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 62e58c13e..da410f8f0 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -13,6 +13,7 @@ from .common import InfoExtractor, SearchInfoExtractor
 from ..compat import (
     compat_chr,
     compat_HTTPError,
+    compat_map as map,
     compat_parse_qs,
     compat_str,
     compat_urllib_parse_unquote_plus,

From 96f87aaa3b34d80bc72097a7475d8093849091fc Mon Sep 17 00:00:00 2001
From: df <fieldhouse@gmx.net>
Date: Tue, 2 Nov 2021 11:18:39 +0000
Subject: [PATCH 013/312] Back-port JS interpreter upgrade from yt-dlp PR #1437

---
 test/test_jsinterp.py  |  51 +++++
 youtube_dl/compat.py   |   5 +
 youtube_dl/jsinterp.py | 496 ++++++++++++++++++++++++++++++++---------
 3 files changed, 449 insertions(+), 103 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index c24b8ca74..4d05ea610 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -112,6 +112,57 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('z'), 5)
 
+    def test_for_loop(self):
+        # function x() { a=0; for (i=0; i-10; i++) {a++} a }
+        jsi = JSInterpreter('''
+        function x() { a=0; for (i=0; i-10; i = i + 1) {a++} a }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 10)
+
+    def test_switch(self):
+        jsi = JSInterpreter('''
+        function x(f) { switch(f){
+            case 1:f+=1;
+            case 2:f+=2;
+            case 3:f+=3;break;
+            case 4:f+=4;
+            default:f=0;
+        } return f }
+        ''')
+        self.assertEqual(jsi.call_function('x', 1), 7)
+        self.assertEqual(jsi.call_function('x', 3), 6)
+        self.assertEqual(jsi.call_function('x', 5), 0)
+
+    def test_try(self):
+        jsi = JSInterpreter('''
+        function x() { try{return 10} catch(e){return 5} }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 10)
+
+    def test_for_loop_continue(self):
+        jsi = JSInterpreter('''
+        function x() { a=0; for (i=0; i-10; i++) { continue; a++ } a }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 0)
+
+    def test_for_loop_break(self):
+        jsi = JSInterpreter('''
+        function x() { a=0; for (i=0; i-10; i++) { break; a++ } a }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 0)
+
+    def test_literal_list(self):
+        jsi = JSInterpreter('''
+        function x() { [1, 2, "asdf", [5, 6, 7]][3] }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [5, 6, 7])
+
+    def test_comma(self):
+        jsi = JSInterpreter('''
+        function x() { a=5; a -= 1, a+=3; return a }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 7)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 29e0d3a02..2004a405a 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -21,6 +21,10 @@ import subprocess
 import sys
 import xml.etree.ElementTree
 
+try:
+    import collections.abc as compat_collections_abc
+except ImportError:
+    import collections as compat_collections_abc
 
 try:
     import urllib.request as compat_urllib_request
@@ -3025,6 +3029,7 @@ __all__ = [
     'compat_b64decode',
     'compat_basestring',
     'compat_chr',
+    'compat_collections_abc',
     'compat_cookiejar',
     'compat_cookiejar_Cookie',
     'compat_cookies',
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 7bda59610..061e92c2a 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -8,6 +8,15 @@ from .utils import (
     ExtractorError,
     remove_quotes,
 )
+from .compat import (
+    compat_collections_abc
+)
+MutableMapping = compat_collections_abc.MutableMapping
+
+
+class Nonlocal:
+    pass
+
 
 _OPERATORS = [
     ('|', operator.or_),
@@ -22,11 +31,55 @@ _OPERATORS = [
     ('*', operator.mul),
 ]
 _ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
-_ASSIGN_OPERATORS.append(('=', lambda cur, right: right))
+_ASSIGN_OPERATORS.append(('=', (lambda cur, right: right)))
 
 _NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
 
 
+class JS_Break(ExtractorError):
+    def __init__(self):
+        ExtractorError.__init__(self, 'Invalid break')
+
+
+class JS_Continue(ExtractorError):
+    def __init__(self):
+        ExtractorError.__init__(self, 'Invalid continue')
+
+
+class LocalNameSpace(MutableMapping):
+    def __init__(self, *stack):
+        self.stack = tuple(stack)
+
+    def __getitem__(self, key):
+        for scope in self.stack:
+            if key in scope:
+                return scope[key]
+        raise KeyError(key)
+
+    def __setitem__(self, key, value):
+        for scope in self.stack:
+            if key in scope:
+                scope[key] = value
+                break
+        else:
+            self.stack[0][key] = value
+        return value
+
+    def __delitem__(self, key):
+        raise NotImplementedError('Deleting is not supported')
+
+    def __iter__(self):
+        for scope in self.stack:
+            for scope_item in iter(scope):
+                yield scope_item
+
+    def __len__(self, key):
+        return len(iter(self))
+
+    def __repr__(self):
+        return 'LocalNameSpace%s' % (self.stack, )
+
+
 class JSInterpreter(object):
     def __init__(self, code, objects=None):
         if objects is None:
@@ -34,11 +87,58 @@ class JSInterpreter(object):
         self.code = code
         self._functions = {}
         self._objects = objects
+        self.__named_object_counter = 0
+
+    def _named_object(self, namespace, obj):
+        self.__named_object_counter += 1
+        name = '__youtube_dl_jsinterp_obj%s' % (self.__named_object_counter, )
+        namespace[name] = obj
+        return name
+
+    @staticmethod
+    def _separate(expr, delim=',', max_split=None):
+        if not expr:
+            return
+        parens = {'(': 0, '{': 0, '[': 0, ']': 0, '}': 0, ')': 0}
+        start, splits, pos, max_pos = 0, 0, 0, len(delim) - 1
+        for idx, char in enumerate(expr):
+            if char in parens:
+                parens[char] += 1
+            is_in_parens = (parens['['] - parens[']']
+                            or parens['('] - parens[')']
+                            or parens['{'] - parens['}'])
+            if char == delim[pos] and not is_in_parens:
+                if pos == max_pos:
+                    pos = 0
+                    yield expr[start: idx - max_pos]
+                    start = idx + 1
+                    splits += 1
+                    if max_split and splits >= max_split:
+                        break
+                else:
+                    pos += 1
+            else:
+                pos = 0
+        yield expr[start:]
+
+    @staticmethod
+    def _separate_at_paren(expr, delim):
+        separated = list(JSInterpreter._separate(expr, delim, 1))
+        if len(separated) < 2:
+            raise ExtractorError('No terminating paren {0} in {1}'.format(delim, expr))
+        return separated[0][1:].strip(), separated[1].strip()
 
     def interpret_statement(self, stmt, local_vars, allow_recursion=100):
         if allow_recursion < 0:
             raise ExtractorError('Recursion limit reached')
 
+        sub_statements = list(self._separate(stmt, ';'))
+        stmt = (sub_statements or ['']).pop()
+        for sub_stmt in sub_statements:
+            ret, should_abort = self.interpret_statement(sub_stmt, local_vars, allow_recursion - 1)
+            if should_abort:
+                return ret
+
         should_abort = False
         stmt = stmt.lstrip()
         stmt_m = re.match(r'var\s', stmt)
@@ -61,25 +161,119 @@ class JSInterpreter(object):
         if expr == '':  # Empty expression
             return None
 
-        if expr.startswith('('):
-            parens_count = 0
-            for m in re.finditer(r'[()]', expr):
-                if m.group(0) == '(':
-                    parens_count += 1
-                else:
-                    parens_count -= 1
-                    if parens_count == 0:
-                        sub_expr = expr[1:m.start()]
-                        sub_result = self.interpret_expression(
-                            sub_expr, local_vars, allow_recursion)
-                        remaining_expr = expr[m.end():].strip()
-                        if not remaining_expr:
-                            return sub_result
-                        else:
-                            expr = json.dumps(sub_result) + remaining_expr
-                        break
+        if expr.startswith('{'):
+            inner, outer = self._separate_at_paren(expr, '}')
+            inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion - 1)
+            if not outer or should_abort:
+                return inner
             else:
-                raise ExtractorError('Premature end of parens in %r' % expr)
+                expr = json.dumps(inner) + outer
+
+        if expr.startswith('('):
+            inner, outer = self._separate_at_paren(expr, ')')
+            inner = self.interpret_expression(inner, local_vars, allow_recursion)
+            if not outer:
+                return inner
+            else:
+                expr = json.dumps(inner) + outer
+
+        if expr.startswith('['):
+            inner, outer = self._separate_at_paren(expr, ']')
+            name = self._named_object(local_vars, [
+                self.interpret_expression(item, local_vars, allow_recursion)
+                for item in self._separate(inner)])
+            expr = name + outer
+
+        m = re.match(r'try\s*', expr)
+        if m:
+            if expr[m.end()] == '{':
+                try_expr, expr = self._separate_at_paren(expr[m.end():], '}')
+            else:
+                try_expr, expr = expr[m.end() - 1:], ''
+            ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion - 1)
+            if should_abort:
+                return ret
+            return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
+
+        m = re.match(r'(?:(?P<catch>catch)|(?P<for>for)|(?P<switch>switch))\s*\(', expr)
+        md = m.groupdict() if m else {}
+        if md.get('catch'):
+            # We ignore the catch block
+            _, expr = self._separate_at_paren(expr, '}')
+            return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
+
+        elif md.get('for'):
+            def raise_constructor_error(c):
+                raise ExtractorError(
+                    'Premature return in the initialization of a for loop in {0!r}'.format(c))
+
+            constructor, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
+            if remaining.startswith('{'):
+                body, expr = self._separate_at_paren(remaining, '}')
+            else:
+                m = re.match(r'switch\s*\(', remaining)  # FIXME
+                if m:
+                    switch_val, remaining = self._separate_at_paren(remaining[m.end() - 1:], ')')
+                    body, expr = self._separate_at_paren(remaining, '}')
+                    body = 'switch(%s){%s}' % (switch_val, body)
+                else:
+                    body, expr = remaining, ''
+            start, cndn, increment = self._separate(constructor, ';')
+            if self.interpret_statement(start, local_vars, allow_recursion - 1)[1]:
+                raise_constructor_error(constructor)
+            while True:
+                if not self.interpret_expression(cndn, local_vars, allow_recursion):
+                    break
+                try:
+                    ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion - 1)
+                    if should_abort:
+                        return ret
+                except JS_Break:
+                    break
+                except JS_Continue:
+                    pass
+                if self.interpret_statement(increment, local_vars, allow_recursion - 1)[1]:
+                    raise_constructor_error(constructor)
+            return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
+
+        elif md.get('switch'):
+            switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
+            switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion)
+            body, expr = self._separate_at_paren(remaining, '}')
+            body, default = body.split('default:') if 'default:' in body else (body, None)
+            items = body.split('case ')[1:]
+            if default:
+                items.append('default:%s' % (default, ))
+            matched = False
+            for item in items:
+                case, stmt = [i.strip() for i in self._separate(item, ':', 1)]
+                matched = matched or case == 'default' or switch_val == self.interpret_expression(case, local_vars, allow_recursion)
+                if matched:
+                    try:
+                        ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion - 1)
+                        if should_abort:
+                            return ret
+                    except JS_Break:
+                        break
+            return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
+
+        # Comma separated statements
+        sub_expressions = list(self._separate(expr))
+        expr = sub_expressions.pop().strip() if sub_expressions else ''
+        for sub_expr in sub_expressions:
+            self.interpret_expression(sub_expr, local_vars, allow_recursion)
+
+        for m in re.finditer(r'''(?x)
+                (?P<pre_sign>\+\+|--)(?P<var1>%(_NAME_RE)s)|
+                (?P<var2>%(_NAME_RE)s)(?P<post_sign>\+\+|--)''' % globals(), expr):
+            var = m.group('var1') or m.group('var2')
+            start, end = m.span()
+            sign = m.group('pre_sign') or m.group('post_sign')
+            ret = local_vars[var]
+            local_vars[var] += 1 if sign[0] == '+' else -1
+            if m.group('pre_sign'):
+                ret = local_vars[var]
+            expr = expr[:start] + json.dumps(ret) + expr[end:]
 
         for op, opfunc in _ASSIGN_OPERATORS:
             m = re.match(r'''(?x)
@@ -88,14 +282,13 @@ class JSInterpreter(object):
                 (?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
             if not m:
                 continue
-            right_val = self.interpret_expression(
-                m.group('expr'), local_vars, allow_recursion - 1)
+            right_val = self.interpret_expression(m.group('expr'), local_vars, allow_recursion)
 
             if m.groupdict().get('index'):
                 lvar = local_vars[m.group('out')]
-                idx = self.interpret_expression(
-                    m.group('index'), local_vars, allow_recursion)
-                assert isinstance(idx, int)
+                idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
+                if not isinstance(idx, int):
+                    raise ExtractorError('List indices must be integers: %s' % (idx, ))
                 cur = lvar[idx]
                 val = opfunc(cur, right_val)
                 lvar[idx] = val
@@ -109,8 +302,13 @@ class JSInterpreter(object):
         if expr.isdigit():
             return int(expr)
 
+        if expr == 'break':
+            raise JS_Break()
+        elif expr == 'continue':
+            raise JS_Continue()
+
         var_m = re.match(
-            r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE,
+            r'(?!if|return|true|false|null)(?P<name>%s)$' % _NAME_RE,
             expr)
         if var_m:
             return local_vars[var_m.group('name')]
@@ -124,91 +322,161 @@ class JSInterpreter(object):
             r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
         if m:
             val = local_vars[m.group('in')]
-            idx = self.interpret_expression(
-                m.group('idx'), local_vars, allow_recursion - 1)
+            idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion)
             return val[idx]
 
+        def raise_expr_error(where, op, exp):
+            raise ExtractorError('Premature {0} return of {1} in {2!r}'.format(where, op, exp))
+
+        for op, opfunc in _OPERATORS:
+            separated = list(self._separate(expr, op))
+            if len(separated) < 2:
+                continue
+            right_val = separated.pop()
+            left_val = op.join(separated)
+            left_val, should_abort = self.interpret_statement(
+                left_val, local_vars, allow_recursion - 1)
+            if should_abort:
+                raise_expr_error('left-side', op, expr)
+            right_val, should_abort = self.interpret_statement(
+                right_val, local_vars, allow_recursion - 1)
+            if should_abort:
+                raise_expr_error('right-side', op, expr)
+            return opfunc(left_val or 0, right_val)
+
         m = re.match(
-            r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE,
+            r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*' % _NAME_RE,
             expr)
         if m:
             variable = m.group('var')
-            member = remove_quotes(m.group('member') or m.group('member2'))
-            arg_str = m.group('args')
+            nl = Nonlocal()
 
-            if variable in local_vars:
-                obj = local_vars[variable]
+            nl.member = remove_quotes(m.group('member') or m.group('member2'))
+            arg_str = expr[m.end():]
+            if arg_str.startswith('('):
+                arg_str, remaining = self._separate_at_paren(arg_str, ')')
             else:
-                if variable not in self._objects:
-                    self._objects[variable] = self.extract_object(variable)
-                obj = self._objects[variable]
+                arg_str, remaining = None, arg_str
 
-            if arg_str is None:
-                # Member access
-                if member == 'length':
-                    return len(obj)
-                return obj[member]
+            def assertion(cndn, msg):
+                """ assert, but without risk of getting optimized out """
+                if not cndn:
+                    raise ExtractorError('{0} {1}: {2}'.format(nl.member, msg, expr))
 
-            assert expr.endswith(')')
-            # Function call
-            if arg_str == '':
-                argvals = tuple()
-            else:
-                argvals = tuple([
+            def eval_method():
+                # nonlocal member
+                member = nl.member
+                if variable == 'String':
+                    obj = str
+                elif variable in local_vars:
+                    obj = local_vars[variable]
+                else:
+                    if variable not in self._objects:
+                        self._objects[variable] = self.extract_object(variable)
+                    obj = self._objects[variable]
+
+                if arg_str is None:
+                    # Member access
+                    if member == 'length':
+                        return len(obj)
+                    return obj[member]
+
+                # Function call
+                argvals = [
                     self.interpret_expression(v, local_vars, allow_recursion)
-                    for v in arg_str.split(',')])
+                    for v in self._separate(arg_str)]
 
-            if member == 'split':
-                assert argvals == ('',)
-                return list(obj)
-            if member == 'join':
-                assert len(argvals) == 1
-                return argvals[0].join(obj)
-            if member == 'reverse':
-                assert len(argvals) == 0
-                obj.reverse()
-                return obj
-            if member == 'slice':
-                assert len(argvals) == 1
-                return obj[argvals[0]:]
-            if member == 'splice':
-                assert isinstance(obj, list)
-                index, howMany = argvals
-                res = []
-                for i in range(index, min(index + howMany, len(obj))):
-                    res.append(obj.pop(index))
-                return res
+                if obj == str:
+                    if member == 'fromCharCode':
+                        assertion(argvals, 'takes one or more arguments')
+                        return ''.join(map(chr, argvals))
+                    raise ExtractorError('Unsupported string method %s' % (member, ))
 
-            return obj[member](argvals)
+                if member == 'split':
+                    assertion(argvals, 'takes one or more arguments')
+                    assertion(argvals == [''], 'with arguments is not implemented')
+                    return list(obj)
+                elif member == 'join':
+                    assertion(isinstance(obj, list), 'must be applied on a list')
+                    assertion(len(argvals) == 1, 'takes exactly one argument')
+                    return argvals[0].join(obj)
+                elif member == 'reverse':
+                    assertion(not argvals, 'does not take any arguments')
+                    obj.reverse()
+                    return obj
+                elif member == 'slice':
+                    assertion(isinstance(obj, list), 'must be applied on a list')
+                    assertion(len(argvals) == 1, 'takes exactly one argument')
+                    return obj[argvals[0]:]
+                elif member == 'splice':
+                    assertion(isinstance(obj, list), 'must be applied on a list')
+                    assertion(argvals, 'takes one or more arguments')
+                    index, howMany = (argvals + [len(obj)])[:2]
+                    if index < 0:
+                        index += len(obj)
+                    add_items = argvals[2:]
+                    res = []
+                    for i in range(index, min(index + howMany, len(obj))):
+                        res.append(obj.pop(index))
+                    for i, item in enumerate(add_items):
+                        obj.insert(index + i, item)
+                    return res
+                elif member == 'unshift':
+                    assertion(isinstance(obj, list), 'must be applied on a list')
+                    assertion(argvals, 'takes one or more arguments')
+                    for item in reversed(argvals):
+                        obj.insert(0, item)
+                    return obj
+                elif member == 'pop':
+                    assertion(isinstance(obj, list), 'must be applied on a list')
+                    assertion(not argvals, 'does not take any arguments')
+                    if not obj:
+                        return
+                    return obj.pop()
+                elif member == 'push':
+                    assertion(argvals, 'takes one or more arguments')
+                    obj.extend(argvals)
+                    return obj
+                elif member == 'forEach':
+                    assertion(argvals, 'takes one or more arguments')
+                    assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
+                    f, this = (argvals + [''])[:2]
+                    return [f((item, idx, obj), this=this) for idx, item in enumerate(obj)]
+                elif member == 'indexOf':
+                    assertion(argvals, 'takes one or more arguments')
+                    assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
+                    idx, start = (argvals + [0])[:2]
+                    try:
+                        return obj.index(idx, start)
+                    except ValueError:
+                        return -1
 
-        for op, opfunc in _OPERATORS:
-            m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr)
-            if not m:
-                continue
-            x, abort = self.interpret_statement(
-                m.group('x'), local_vars, allow_recursion - 1)
-            if abort:
-                raise ExtractorError(
-                    'Premature left-side return of %s in %r' % (op, expr))
-            y, abort = self.interpret_statement(
-                m.group('y'), local_vars, allow_recursion - 1)
-            if abort:
-                raise ExtractorError(
-                    'Premature right-side return of %s in %r' % (op, expr))
-            return opfunc(x, y)
+                if isinstance(obj, list):
+                    member = int(member)
+                    nl.member = member
+                return obj[member](argvals)
 
-        m = re.match(
-            r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
+            if remaining:
+                return self.interpret_expression(
+                    self._named_object(local_vars, eval_method()) + remaining,
+                    local_vars, allow_recursion)
+            else:
+                return eval_method()
+
+        m = re.match(r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
         if m:
             fname = m.group('func')
             argvals = tuple([
                 int(v) if v.isdigit() else local_vars[v]
-                for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple()
-            if fname not in self._functions:
+                for v in self._separate(m.group('args'))])
+            if fname in local_vars:
+                return local_vars[fname](argvals)
+            elif fname not in self._functions:
                 self._functions[fname] = self.extract_function(fname)
             return self._functions[fname](argvals)
 
-        raise ExtractorError('Unsupported JS expression %r' % expr)
+        if expr:
+            raise ExtractorError('Unsupported JS expression %r' % expr)
 
     def extract_object(self, objname):
         _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
@@ -233,30 +501,52 @@ class JSInterpreter(object):
 
         return obj
 
-    def extract_function(self, funcname):
+    def extract_function_code(self, funcname):
+        """ @returns argnames, code """
         func_m = re.search(
             r'''(?x)
-                (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
+                (?:function\s+%(f_n)s|[{;,]\s*%(f_n)s\s*=\s*function|var\s+%(f_n)s\s*=\s*function)\s*
                 \((?P<args>[^)]*)\)\s*
-                \{(?P<code>[^}]+)\}''' % (
-                re.escape(funcname), re.escape(funcname), re.escape(funcname)),
+                (?P<code>\{(?:(?!};)[^"]|"([^"]|\\")*")+\})''' % {'f_n': re.escape(funcname), },
             self.code)
+        code, _ = self._separate_at_paren(func_m.group('code'), '}')  # refine the match
         if func_m is None:
             raise ExtractorError('Could not find JS function %r' % funcname)
-        argnames = func_m.group('args').split(',')
+        return func_m.group('args').split(','), code
 
-        return self.build_function(argnames, func_m.group('code'))
+    def extract_function(self, funcname):
+        return self.extract_function_from_code(*self.extract_function_code(funcname))
+
+    def extract_function_from_code(self, argnames, code, *global_stack):
+        local_vars = {}
+        while True:
+            mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code)
+            if mobj is None:
+                break
+            start, body_start = mobj.span()
+            body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
+            name = self._named_object(
+                local_vars,
+                self.extract_function_from_code(
+                    [str.strip(x) for x in mobj.group('args').split(',')],
+                    body, local_vars, *global_stack))
+            code = code[:start] + name + remaining
+        return self.build_function(argnames, code, local_vars, *global_stack)
 
     def call_function(self, funcname, *args):
-        f = self.extract_function(funcname)
-        return f(args)
+        return self.extract_function(funcname)(args)
 
-    def build_function(self, argnames, code):
-        def resf(args):
-            local_vars = dict(zip(argnames, args))
-            for stmt in code.split(';'):
-                res, abort = self.interpret_statement(stmt, local_vars)
-                if abort:
+    def build_function(self, argnames, code, *global_stack):
+        global_stack = list(global_stack) or [{}]
+        local_vars = global_stack.pop(0)
+
+        def resf(args, **kwargs):
+            local_vars.update(dict(zip(argnames, args)))
+            local_vars.update(kwargs)
+            var_stack = LocalNameSpace(local_vars, *global_stack)
+            for stmt in self._separate(code.replace('\n', ''), ';'):
+                ret, should_abort = self.interpret_statement(stmt, var_stack)
+                if should_abort:
                     break
-            return res
+            return ret
         return resf

From e1eae16b56b5c57e341b000167c0a92e67095e6e Mon Sep 17 00:00:00 2001
From: df <fieldhouse@gmx.net>
Date: Thu, 4 Nov 2021 12:48:06 +0000
Subject: [PATCH 014/312] Handle default in switch better

Add https://github.com/yt-dlp/yt-dlp/commit/a1fc7ca0743c8df06416e68ee74b64e07dfe7135
Thanks coletdjnz
---
 test/test_jsinterp.py  | 15 +++++++++++++++
 youtube_dl/jsinterp.py | 23 ++++++++++++++---------
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 4d05ea610..acdabffb1 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -133,6 +133,21 @@ class TestJSInterpreter(unittest.TestCase):
         self.assertEqual(jsi.call_function('x', 3), 6)
         self.assertEqual(jsi.call_function('x', 5), 0)
 
+    def test_switch_default(self):
+        jsi = JSInterpreter('''
+        function x(f) { switch(f){
+            case 2: f+=2;
+            default: f-=1;
+            case 5:
+            case 6: f+=6;
+            case 0: break;
+            case 1: f+=1;
+        } return f }
+        ''')
+        self.assertEqual(jsi.call_function('x', 1), 2)
+        self.assertEqual(jsi.call_function('x', 5), 11)
+        self.assertEqual(jsi.call_function('x', 9), 14)
+
     def test_try(self):
         jsi = JSInterpreter('''
         function x() { try{return 10} catch(e){return 5} }
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 061e92c2a..c35765702 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -240,21 +240,26 @@ class JSInterpreter(object):
             switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
             switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion)
             body, expr = self._separate_at_paren(remaining, '}')
-            body, default = body.split('default:') if 'default:' in body else (body, None)
-            items = body.split('case ')[1:]
-            if default:
-                items.append('default:%s' % (default, ))
-            matched = False
-            for item in items:
-                case, stmt = [i.strip() for i in self._separate(item, ':', 1)]
-                matched = matched or case == 'default' or switch_val == self.interpret_expression(case, local_vars, allow_recursion)
-                if matched:
+            items = body.replace('default:', 'case default:').split('case ')[1:]
+            for default in (False, True):
+                matched = False
+                for item in items:
+                    case, stmt = [i.strip() for i in self._separate(item, ':', 1)]
+                    if default:
+                        matched = matched or case == 'default'
+                    elif not matched:
+                        matched = (case != 'default'
+                                   and switch_val == self.interpret_expression(case, local_vars, allow_recursion))
+                    if not matched:
+                        continue
                     try:
                         ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion - 1)
                         if should_abort:
                             return ret
                     except JS_Break:
                         break
+                if matched:
+                    break
             return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
 
         # Comma separated statements

From 1ca673bd98cc5bbfa76d00ac84ad5f6c1376db01 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 27 Nov 2021 02:06:13 +0000
Subject: [PATCH 015/312] Fix splice to handle float

Needed for new youtube js player f1ca6900
Add https://github.com/yt-dlp/yt-dlp/commit/57dbe8077f8d00e0fffac53669f40cd7d584474f#diff-729b57caa8d006426f6a8960c061f519a8b6658682284015e069745af52ffb07
---
 youtube_dl/jsinterp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index c35765702..c75cf45b9 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -416,7 +416,7 @@ class JSInterpreter(object):
                 elif member == 'splice':
                     assertion(isinstance(obj, list), 'must be applied on a list')
                     assertion(argvals, 'takes one or more arguments')
-                    index, howMany = (argvals + [len(obj)])[:2]
+                    index, howMany = map(int, (argvals + [len(obj)])[:2])
                     if index < 0:
                         index += len(obj)
                     add_items = argvals[2:]

From 9d142109f445ea247e476cfc0e0ca134f6ebb802 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 27 Nov 2021 03:18:29 +0000
Subject: [PATCH 016/312] Back-port test_youtube_signature.py from yt-dlp and
 fix JSInterp accordingly

---
 test/test_youtube_signature.py | 89 ++++++++++++++++++++++++----------
 youtube_dl/jsinterp.py         |  9 ++--
 2 files changed, 69 insertions(+), 29 deletions(-)

diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 627d4cb92..c8e85b500 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -14,9 +14,10 @@ import string
 
 from test.helper import FakeYDL
 from youtube_dl.extractor import YoutubeIE
+from youtube_dl.jsinterp import JSInterpreter
 from youtube_dl.compat import compat_str, compat_urlretrieve
 
-_TESTS = [
+_SIG_TESTS = [
     (
         'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
         86,
@@ -64,6 +65,25 @@ _TESTS = [
     )
 ]
 
+_NSIG_TESTS = [
+    (
+        'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js',
+        'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w',
+    ),
+    (
+        'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js',
+        'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN',
+    ),
+    (
+        'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js',
+        'oBo2h5euWy6osrUt', '3DIBbn3qdQ',
+    ),
+    (
+        'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js',
+        'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q',
+    ),
+]
+
 
 class TestPlayerInfo(unittest.TestCase):
     def test_youtube_extract_player_info(self):
@@ -95,35 +115,54 @@ class TestSignature(unittest.TestCase):
             os.mkdir(self.TESTDATA_DIR)
 
 
-def make_tfunc(url, sig_input, expected_sig):
-    m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url)
-    assert m, '%r should follow URL format' % url
-    test_id = m.group(1)
+def t_factory(name, sig_func, url_pattern):
+    def make_tfunc(url, sig_input, expected_sig):
+        m = url_pattern.match(url)
+        assert m, '%r should follow URL format' % url
+        test_id = m.group('id')
 
-    def test_func(self):
-        basename = 'player-%s.js' % test_id
-        fn = os.path.join(self.TESTDATA_DIR, basename)
+        def test_func(self):
+            basename = 'player-{0}-{1}.js'.format(name, test_id)
+            fn = os.path.join(self.TESTDATA_DIR, basename)
 
-        if not os.path.exists(fn):
-            compat_urlretrieve(url, fn)
+            if not os.path.exists(fn):
+                compat_urlretrieve(url, fn)
+            with io.open(fn, encoding='utf-8') as testf:
+                jscode = testf.read()
+            self.assertEqual(sig_func(jscode, sig_input), expected_sig)
 
-        ydl = FakeYDL()
-        ie = YoutubeIE(ydl)
-        with io.open(fn, encoding='utf-8') as testf:
-            jscode = testf.read()
-        func = ie._parse_sig_js(jscode)
-        src_sig = (
-            compat_str(string.printable[:sig_input])
-            if isinstance(sig_input, int) else sig_input)
-        got_sig = func(src_sig)
-        self.assertEqual(got_sig, expected_sig)
-
-    test_func.__name__ = str('test_signature_js_' + test_id)
-    setattr(TestSignature, test_func.__name__, test_func)
+        test_func.__name__ = str('test_{0}_js_{1}'.format(name, test_id))
+        setattr(TestSignature, test_func.__name__, test_func)
+    return make_tfunc
 
 
-for test_spec in _TESTS:
-    make_tfunc(*test_spec)
+def signature(jscode, sig_input):
+    func = YoutubeIE(FakeYDL())._parse_sig_js(jscode)
+    src_sig = (
+        compat_str(string.printable[:sig_input])
+        if isinstance(sig_input, int) else sig_input)
+    return func(src_sig)
+
+
+def n_sig(jscode, sig_input):
+    # Pending implementation of _extract_n_function_name() or similar in
+    # youtube.py, hard-code here
+    # funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
+    import re
+    funcname = re.search(r'[=(,&|](\w+)\(\w+\),\w+\.set\("n",', jscode)
+    funcname = funcname and funcname.group(1)
+    return JSInterpreter(jscode).call_function(funcname, sig_input)
+
+
+make_sig_test = t_factory(
+    'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
+for test_spec in _SIG_TESTS:
+    make_sig_test(*test_spec)
+
+make_nsig_test = t_factory(
+    'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_-]+)/.+.js$'))
+for test_spec in _NSIG_TESTS:
+    make_nsig_test(*test_spec)
 
 
 if __name__ == '__main__':
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index c75cf45b9..a2306557b 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -9,7 +9,8 @@ from .utils import (
     remove_quotes,
 )
 from .compat import (
-    compat_collections_abc
+    compat_collections_abc,
+    compat_str,
 )
 MutableMapping = compat_collections_abc.MutableMapping
 
@@ -372,7 +373,7 @@ class JSInterpreter(object):
                 # nonlocal member
                 member = nl.member
                 if variable == 'String':
-                    obj = str
+                    obj = compat_str
                 elif variable in local_vars:
                     obj = local_vars[variable]
                 else:
@@ -391,7 +392,7 @@ class JSInterpreter(object):
                     self.interpret_expression(v, local_vars, allow_recursion)
                     for v in self._separate(arg_str)]
 
-                if obj == str:
+                if obj == compat_str:
                     if member == 'fromCharCode':
                         assertion(argvals, 'takes one or more arguments')
                         return ''.join(map(chr, argvals))
@@ -533,7 +534,7 @@ class JSInterpreter(object):
             name = self._named_object(
                 local_vars,
                 self.extract_function_from_code(
-                    [str.strip(x) for x in mobj.group('args').split(',')],
+                    [x.strip() for x in mobj.group('args').split(',')],
                     body, local_vars, *global_stack))
             code = code[:start] + name + remaining
         return self.build_function(argnames, code, local_vars, *global_stack)

From 6ca7b776965ed1e9220690edc4ee22de8c8587f5 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 10 Dec 2021 19:14:54 +0000
Subject: [PATCH 017/312] Refactor JSInterpreter._separate

yt-dlp/yt-dlp/@06dfe0a, improve _MATCHING_PARENS
---
 youtube_dl/jsinterp.py | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index a2306557b..8eaa911cd 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -36,6 +36,8 @@ _ASSIGN_OPERATORS.append(('=', (lambda cur, right: right)))
 
 _NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
 
+_MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
+
 
 class JS_Break(ExtractorError):
     def __init__(self):
@@ -100,26 +102,24 @@ class JSInterpreter(object):
     def _separate(expr, delim=',', max_split=None):
         if not expr:
             return
-        parens = {'(': 0, '{': 0, '[': 0, ']': 0, '}': 0, ')': 0}
-        start, splits, pos, max_pos = 0, 0, 0, len(delim) - 1
+        counters = {k: 0 for k in _MATCHING_PARENS.values()}
+        start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
         for idx, char in enumerate(expr):
-            if char in parens:
-                parens[char] += 1
-            is_in_parens = (parens['['] - parens[']']
-                            or parens['('] - parens[')']
-                            or parens['{'] - parens['}'])
-            if char == delim[pos] and not is_in_parens:
-                if pos == max_pos:
-                    pos = 0
-                    yield expr[start: idx - max_pos]
-                    start = idx + 1
-                    splits += 1
-                    if max_split and splits >= max_split:
-                        break
-                else:
-                    pos += 1
-            else:
+            if char in _MATCHING_PARENS:
+                counters[_MATCHING_PARENS[char]] += 1
+            elif char in counters:
+                counters[char] -= 1
+            if char != delim[pos] or any(counters.values()):
                 pos = 0
+                continue
+            elif pos != delim_len:
+                pos += 1
+                continue
+            yield expr[start: idx - delim_len]
+            start, pos = idx + 1, 0
+            splits += 1
+            if max_split and splits >= max_split:
+                break
         yield expr[start:]
 
     @staticmethod

From af9e72507ea38e5ab3fa2751ed09ec88021260cb Mon Sep 17 00:00:00 2001
From: df <fieldhouse@gmx.net>
Date: Mon, 1 Nov 2021 04:45:42 +0000
Subject: [PATCH 018/312] Implement n-param descrambling using JSInterp

Fixes #29326, closes #29790, closes #30004, closes #30024, closes #30052,
closes #30088, closes #30097, closes #30102, closes #30109, closes #30119,
closes #30125, closes #30128, closes #30162, closes #30173, closes #30186,
closes #30192, closes #30221, closes #30239, closes #30539, closes #30552.
---
 youtube_dl/extractor/youtube.py | 115 +++++++++++++++++++++++++++-----
 1 file changed, 99 insertions(+), 16 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index da410f8f0..63918924d 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1254,6 +1254,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             raise ExtractorError('Cannot identify player %r' % player_url)
         return id_m.group('id')
 
+    def _get_player_code(self, video_id, player_url, player_id=None):
+        if not player_id:
+            player_id = self._extract_player_info(player_url)
+
+        if player_id not in self._code_cache:
+            self._code_cache[player_id] = self._download_webpage(
+                player_url, video_id,
+                note='Downloading player ' + player_id,
+                errnote='Download of %s failed' % player_url)
+        return self._code_cache[player_id]
+
     def _extract_signature_function(self, video_id, player_url, example_sig):
         player_id = self._extract_player_info(player_url)
 
@@ -1266,12 +1277,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         if cache_spec is not None:
             return lambda s: ''.join(s[i] for i in cache_spec)
 
-        if player_id not in self._code_cache:
-            self._code_cache[player_id] = self._download_webpage(
-                player_url, video_id,
-                note='Downloading player ' + player_id,
-                errnote='Download of %s failed' % player_url)
-        code = self._code_cache[player_id]
+        code = self._get_player_code(video_id, player_url, player_id)
         res = self._parse_sig_js(code)
 
         test_string = ''.join(map(compat_chr, range(len(example_sig))))
@@ -1350,11 +1356,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         if player_url is None:
             raise ExtractorError('Cannot decrypt signature without player_url')
 
-        if player_url.startswith('//'):
-            player_url = 'https:' + player_url
-        elif not re.match(r'https?://', player_url):
-            player_url = compat_urlparse.urljoin(
-                'https://www.youtube.com', player_url)
         try:
             player_id = (player_url, self._signature_cache_id(s))
             if player_id not in self._player_cache:
@@ -1371,6 +1372,88 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             raise ExtractorError(
                 'Signature extraction failed: ' + tb, cause=e)
 
+    def _extract_player_url(self, webpage):
+        player_url = self._search_regex(
+            r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
+            webpage or '', 'player URL', fatal=False)
+        if not player_url:
+            return
+        if player_url.startswith('//'):
+            player_url = 'https:' + player_url
+        elif not re.match(r'https?://', player_url):
+            player_url = compat_urlparse.urljoin(
+                'https://www.youtube.com', player_url)
+        return player_url
+
+    # from yt-dlp
+    # See also:
+    # 1. https://github.com/ytdl-org/youtube-dl/issues/29326#issuecomment-894619419
+    # 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
+    # 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
+    def _extract_n_function_name(self, jscode):
+        return self._search_regex(
+            (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
+            jscode, 'Initial JS player n function name', group='nfunc')
+
+    def _extract_n_function(self, video_id, player_url):
+        player_id = self._extract_player_info(player_url)
+        func_code = self._downloader.cache.load('youtube-nsig', player_id)
+
+        if func_code:
+            jsi = JSInterpreter(func_code)
+        else:
+            player_id = self._extract_player_info(player_url)
+            jscode = self._get_player_code(video_id, player_url, player_id)
+            funcname = self._extract_n_function_name(jscode)
+            jsi = JSInterpreter(jscode)
+            func_code = jsi.extract_function_code(funcname)
+            self._downloader.cache.store('youtube-nsig', player_id, func_code)
+
+        if self._downloader.params.get('youtube_print_sig_code'):
+            self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(player_id, func_code[1]))
+
+        return lambda s: jsi.extract_function_from_code(*func_code)([s])
+
+    def _n_descramble(self, n_param, player_url, video_id):
+        """Compute the response to YT's "n" parameter challenge
+
+        Args:
+        n_param     -- challenge string that is the value of the
+                       URL's "n" query parameter
+        player_url  -- URL of YT player JS
+        video_id
+        """
+
+        sig_id = ('nsig_value', n_param)
+        if sig_id in self._player_cache:
+            return self._player_cache[sig_id]
+
+        try:
+            player_id = ('nsig', player_url)
+            if player_id not in self._player_cache:
+                self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
+            func = self._player_cache[player_id]
+            self._player_cache[sig_id] = func(n_param)
+            if self._downloader.params.get('verbose', False):
+                self._downloader.to_screen('[debug] [%s] %s' % (self.IE_NAME, 'Decrypted nsig {0} => {1}'.format(n_param, self._player_cache[sig_id])))
+            return self._player_cache[sig_id]
+        except Exception as e:
+            raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
+
+    def _unthrottle_format_urls(self, video_id, player_url, formats):
+        for fmt in formats:
+            parsed_fmt_url = compat_urlparse.urlparse(fmt['url'])
+            qs = compat_urlparse.parse_qs(parsed_fmt_url.query)
+            n_param = qs.get('n')
+            if not n_param:
+                continue
+            n_param = n_param[-1]
+            n_response = self._n_descramble(n_param, player_url, video_id)
+            if n_response:
+                qs['n'] = [n_response]
+                fmt['url'] = compat_urlparse.urlunparse(
+                    parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+
     def _mark_watched(self, video_id, player_response):
         playback_url = url_or_none(try_get(
             player_response,
@@ -1632,11 +1715,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 if not (sc and fmt_url and encrypted_sig):
                     continue
                 if not player_url:
-                    if not webpage:
-                        continue
-                    player_url = self._search_regex(
-                        r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
-                        webpage, 'player URL', fatal=False)
+                    player_url = self._extract_player_url(webpage)
                 if not player_url:
                     continue
                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
@@ -1782,6 +1861,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         is_live = video_details.get('isLive')
         owner_profile_url = microformat.get('ownerProfileUrl')
 
+        if not player_url:
+            player_url = self._extract_player_url(webpage)
+        self._unthrottle_format_urls(video_id, player_url, formats)
+
         info = {
             'id': video_id,
             'title': self._live_title(video_title) if is_live else video_title,

From 1e677567cd083d43f55daef0cc74e5fa24575ae3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 1 Feb 2022 14:39:03 +0000
Subject: [PATCH 019/312] [YouTube] Fix n-sig for player e06dea74 (#30582)

From yt-dl commit 48416bc
---
 test/test_youtube_signature.py  | 24 +++++++++++++++++-------
 youtube_dl/extractor/youtube.py | 14 +++++++++++---
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index c8e85b500..fc5e9828e 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -82,6 +82,14 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js',
         'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q',
     ),
+    (
+        'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js',
+        'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw',
+    ),
+    (
+        'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
+        'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
+    ),
 ]
 
 
@@ -110,10 +118,17 @@ class TestPlayerInfo(unittest.TestCase):
 class TestSignature(unittest.TestCase):
     def setUp(self):
         TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-        self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
+        self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs')
         if not os.path.exists(self.TESTDATA_DIR):
             os.mkdir(self.TESTDATA_DIR)
 
+    def tearDown(self):
+        try:
+            for f in os.listdir(self.TESTDATA_DIR):
+                os.remove(f)
+        except OSError:
+            pass
+
 
 def t_factory(name, sig_func, url_pattern):
     def make_tfunc(url, sig_input, expected_sig):
@@ -145,12 +160,7 @@ def signature(jscode, sig_input):
 
 
 def n_sig(jscode, sig_input):
-    # Pending implementation of _extract_n_function_name() or similar in
-    # youtube.py, hard-code here
-    # funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
-    import re
-    funcname = re.search(r'[=(,&|](\w+)\(\w+\),\w+\.set\("n",', jscode)
-    funcname = funcname and funcname.group(1)
+    funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
     return JSInterpreter(jscode).call_function(funcname, sig_input)
 
 
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 63918924d..7943b94f9 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -28,6 +28,7 @@ from ..utils import (
     dict_get,
     float_or_none,
     int_or_none,
+    js_to_json,
     mimetype2ext,
     parse_codecs,
     parse_duration,
@@ -1391,9 +1392,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
     # 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
     # 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
     def _extract_n_function_name(self, jscode):
-        return self._search_regex(
-            (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
-            jscode, 'Initial JS player n function name', group='nfunc')
+        target = r'(?P<nfunc>[a-zA-Z0-9$]{3})(?:\[(?P<idx>\d+)\])?'
+        nfunc_and_idx = self._search_regex(
+            r'\.get\("n"\)\)&&\(b=(%s)\([a-zA-Z0-9]\)' % (target, ),
+            jscode, 'Initial JS player n function name')
+        nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
+        if not idx:
+            return nfunc
+        return self._parse_json(self._search_regex(
+            r'var %s\s*=\s*(\[.+?\]);' % (nfunc, ), jscode,
+            'Initial JS player n function list ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
 
     def _extract_n_function(self, video_id, player_url):
         player_id = self._extract_player_info(player_url)

From 34c06b16f5eb814308392b68dce07bbff62bc406 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 31 Jan 2022 00:02:56 +0000
Subject: [PATCH 020/312] Support Youtube Shorts URL format

---
 youtube_dl/extractor/youtube.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 7943b94f9..05688dc70 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -417,6 +417,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                          (?:                                                  # the various things that can precede the ID:
                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
+                             |shorts/
                              |(?:                                             # or the v= param in all its forms
                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
@@ -1119,6 +1120,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'skip_download': True,
             },
         },
+        {
+            # YT 'Shorts'
+            'url': 'https://youtube.com/shorts/4L2J27mJ3Dc',
+            'info_dict': {
+                'id': '4L2J27mJ3Dc',
+                'ext': 'mp4',
+                'upload_date': '20211025',
+                'uploader': 'Charlie Berens',
+                'description': 'md5:976512b8a29269b93bbd8a61edc45a6d',
+                'uploader_id': 'fivedlrmilkshake',
+                'title': 'Midwest Squid Game #Shorts',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
     ]
     _formats = {
         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

From 41f0043983c831b7c0c3614340d2f66ec153087b Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 1 Feb 2022 23:22:57 +0000
Subject: [PATCH 021/312] Avoid crashing if n-sig decode fails

---
 youtube_dl/extractor/youtube.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 05688dc70..4165de15c 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -26,6 +26,7 @@ from ..utils import (
     ExtractorError,
     clean_html,
     dict_get,
+    error_to_compat_str,
     float_or_none,
     int_or_none,
     js_to_json,
@@ -1463,7 +1464,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 self._downloader.to_screen('[debug] [%s] %s' % (self.IE_NAME, 'Decrypted nsig {0} => {1}'.format(n_param, self._player_cache[sig_id])))
             return self._player_cache[sig_id]
         except Exception as e:
-            raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
+            self._downloader.report_warning(
+                '[%s] %s (%s %s)' % (
+                    self.IE_NAME,
+                    'Unable to decode n-parameter: download likely to be throttled',
+                    error_to_compat_str(e),
+                    traceback.format_exc()))
 
     def _unthrottle_format_urls(self, video_id, player_url, formats):
         for fmt in formats:

From 78ce962f4fe020994c216dd2671546fbe58a5c67 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 30 Jan 2022 01:24:09 +0530
Subject: [PATCH 022/312] [youtube] Support channel search

Code from https://github.com/yt-dlp/yt-dlp/commit/cd684175adbe663bbdf6a6c72d8b99b617b6ff2e
---
 youtube_dl/extractor/youtube.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 4165de15c..8e1254f19 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2438,6 +2438,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
     }, {
         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
         'only_matching': True,
+    }, {
+        'note': 'Search tab',
+        'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
+        'playlist_mincount': 40,
+        'info_dict': {
+            'id': 'UCYO_jab_esuFRV4b17AJtAw',
+            'title': '3Blue1Brown - Search - linear algebra',
+            'description': 'md5:e1384e8a133307dd10edee76e875d62f',
+            'uploader': '3Blue1Brown',
+            'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
+        }
     }]
 
     @classmethod
@@ -2835,8 +2846,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
     @staticmethod
     def _extract_selected_tab(tabs):
         for tab in tabs:
-            if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
-                return tab['tabRenderer']
+            renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
+            if renderer.get('selected') is True:
+                return renderer
         else:
             raise ExtractorError('Unable to find selected tab')
 
@@ -2893,6 +2905,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             title = channel_title or item_id
             if tab_title:
                 title += ' - %s' % tab_title
+            if selected_tab.get('expandedText'):
+                title += ' - %s' % selected_tab['expandedText']
             description = renderer.get('description')
             playlist_id = renderer.get('externalId')
         else:

From 7a497f1405ecdcd76c671c7bfaad238d75d01639 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 4 Feb 2022 04:09:23 +0000
Subject: [PATCH 023/312] Rework 2c2c2bd with an actual Mix page and realistic
 playlist size

From https://github.com/ytdl-org/youtube-dl/commit/2c2c2bd348b7dce0aad55a6fc37a18c6f9a000e3#commitcomment-65953545
---
 test/test_youtube_lists.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index 07a6b6d06..e0e8891ba 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -36,12 +36,12 @@ class TestYoutubeLists(unittest.TestCase):
         dl = FakeYDL()
         dl.params['format'] = 'best'
         ie = YoutubeTabIE(dl)
-        result = dl.extract_info('https://www.youtube.com/watch?v=uVJ0Il5WvbE&list=PLhQjrBD2T381k8ul4WQ8SQ165XqY149WW',
+        result = dl.extract_info('https://www.youtube.com/watch?v=tyITL_exICo&list=RDCLAK5uy_kLWIr9gv1XLlPbaDS965-Db4TrBoUTxQ8',
                                  download=False, ie_key=ie.ie_key(), process=True)
         entries = (result or {}).get('entries', [{'id': 'not_found', }])
-        self.assertTrue(len(entries) >= 50)
+        self.assertTrue(len(entries) >= 25)
         original_video = entries[0]
-        self.assertEqual(original_video['id'], 'uVJ0Il5WvbE')
+        self.assertEqual(original_video['id'], 'tyITL_exICo')
 
     def test_youtube_flat_playlist_extraction(self):
         dl = FakeYDL()

From 0c0876f790c78c38ececbc920073e8b6cf01e9c7 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 3 Feb 2022 07:44:37 +0530
Subject: [PATCH 024/312] [youtube:search] Add tests

---
 youtube_dl/extractor/youtube.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 3ab60960a..41695a561 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -3206,7 +3206,14 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
     _SEARCH_KEY = 'ytsearch'
     _SEARCH_PARAMS = 'EgIQAQ%3D%3D'  # Videos only
     _MAX_RESULTS = float('inf')
-    _TESTS = []
+    _TESTS = [{
+        'url': 'ytsearch10:youtube-dl test video',
+        'playlist_count': 10,
+        'info_dict': {
+            'id': 'youtube-dl test video',
+            'title': 'youtube-dl test video',
+        }
+    }]
 
     def _get_n_results(self, query, n):
         """Get a specified number of results for a query"""
@@ -3219,7 +3226,14 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
     _SEARCH_KEY = 'ytsearchdate'
     IE_DESC = 'YouTube.com searches, newest videos first'
     _SEARCH_PARAMS = 'CAISAhAB'  # Videos only, sorted by date
-    _TESTS = []
+    _TESTS = [{
+        'url': 'ytsearchdate10:youtube-dl test video',
+        'playlist_count': 10,
+        'info_dict': {
+            'id': 'youtube-dl test video',
+            'title': 'youtube-dl test video',
+        }
+    }]
 
 
 class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
@@ -3232,7 +3246,8 @@ class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
         'info_dict': {
             'id': 'youtube-dl test video',
             'title': 'youtube-dl test video',
-        }
+        },
+        'params': {'playlistend': 5}
     }, {
         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
         'only_matching': True,

From 61d791726f67255c2ed3c0bb6ee24c8c1faeb028 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 4 Feb 2022 11:24:03 +0000
Subject: [PATCH 025/312] Find TV2DK Kaltura ID in Nuxt.js page format

---
 youtube_dl/extractor/tv2dk.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/tv2dk.py b/youtube_dl/extractor/tv2dk.py
index 8bd5fd640..106a081e1 100644
--- a/youtube_dl/extractor/tv2dk.py
+++ b/youtube_dl/extractor/tv2dk.py
@@ -41,8 +41,16 @@ class TV2DKIE(InfoExtractor):
             'duration': 1347,
             'view_count': int,
         },
-        'params': {
-            'skip_download': True,
+        'add_ie': ['Kaltura'],
+    }, {
+        'url': 'https://www.tv2lorry.dk/gadekamp/gadekamp-6-hoejhuse-i-koebenhavn',
+        'info_dict': {
+            'id': '1_7iwll9n0',
+            'ext': 'mp4',
+            'upload_date': '20211027',
+            'title': 'Gadekamp #6 - Højhuse i København',
+            'uploader_id': 'tv2lorry',
+            'timestamp': 1635345229,
         },
         'add_ie': ['Kaltura'],
     }, {
@@ -91,7 +99,8 @@ class TV2DKIE(InfoExtractor):
             add_entry(partner_id, kaltura_id)
         if not entries:
             kaltura_id = self._search_regex(
-                r'entry_id\s*:\s*["\']([0-9a-z_]+)', webpage, 'kaltura id')
+                (r'entry_id\s*:\s*["\']([0-9a-z_]+)',
+                 r'\\u002FentryId\\u002F(\w+)\\u002F'), webpage, 'kaltura id')
             partner_id = self._search_regex(
                 (r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
                 'partner id')

From 27dbf6f0ab778a9e3d81be64a615046e6737c3f6 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 4 Feb 2022 11:38:44 +0000
Subject: [PATCH 026/312] Return the item itself if playlist has one entry

Removes playlist spam from log
---
 youtube_dl/extractor/tv2dk.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/youtube_dl/extractor/tv2dk.py b/youtube_dl/extractor/tv2dk.py
index 106a081e1..ec5cbdf03 100644
--- a/youtube_dl/extractor/tv2dk.py
+++ b/youtube_dl/extractor/tv2dk.py
@@ -105,6 +105,8 @@ class TV2DKIE(InfoExtractor):
                 (r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
                 'partner id')
             add_entry(partner_id, kaltura_id)
+        if len(entries) == 1:
+            return entries[0]
         return self.playlist_result(entries)
 
 

From 8248133e5ee5579316120cbcbff3ba8b713f1017 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 4 Feb 2022 11:29:41 +0000
Subject: [PATCH 027/312] Back-port yt-dlp Viki extractor

From https://github.com/yt-dlp/yt-dlp/pull/2540
---
 youtube_dl/extractor/viki.py | 335 +++++++++++++++--------------------
 1 file changed, 144 insertions(+), 191 deletions(-)

diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index 2e9cbf148..2ddca0ca6 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -1,38 +1,29 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import base64
 import hashlib
 import hmac
-import itertools
 import json
-import re
 import time
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_parse_qs,
-    compat_urllib_parse_urlparse,
-)
 from ..utils import (
     ExtractorError,
     int_or_none,
     parse_age_limit,
     parse_iso8601,
-    sanitized_Request,
-    std_headers,
     try_get,
 )
 
 
 class VikiBaseIE(InfoExtractor):
     _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
-    _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
-    _API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
+    _API_URL_TEMPLATE = 'https://api.viki.io%s'
 
+    _DEVICE_ID = '112395910d'
     _APP = '100005a'
-    _APP_VERSION = '6.0.0'
-    _APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
+    _APP_VERSION = '6.11.3'
+    _APP_SECRET = 'd96704b180208dbb2efa30fe44c48bd8690441af9f567ba8fd710a72badc85198f7472'
 
     _GEO_BYPASS = False
     _NETRC_MACHINE = 'viki'
@@ -45,43 +36,60 @@ class VikiBaseIE(InfoExtractor):
         'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers',
     }
 
-    def _prepare_call(self, path, timestamp=None, post_data=None):
+    def _stream_headers(self, timestamp, sig):
+        return {
+            'X-Viki-manufacturer': 'vivo',
+            'X-Viki-device-model': 'vivo 1606',
+            'X-Viki-device-os-ver': '6.0.1',
+            'X-Viki-connection-type': 'WIFI',
+            'X-Viki-carrier': '',
+            'X-Viki-as-id': '100005a-1625321982-3932',
+            'timestamp': str(timestamp),
+            'signature': str(sig),
+            'x-viki-app-ver': self._APP_VERSION
+        }
+
+    def _api_query(self, path, version=4, **kwargs):
         path += '?' if '?' not in path else '&'
-        if not timestamp:
-            timestamp = int(time.time())
-        query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
+        app = self._APP
+        query = '/v{version}/{path}app={app}'.format(**locals())
         if self._token:
             query += '&token=%s' % self._token
+        return query + ''.join('&{name}={val}.format(**locals())' for name, val in kwargs.items())
+
+    def _sign_query(self, path):
+        timestamp = int(time.time())
+        query = self._api_query(path, version=5)
         sig = hmac.new(
             self._APP_SECRET.encode('ascii'),
-            query.encode('ascii'),
-            hashlib.sha1
-        ).hexdigest()
-        url = self._API_URL_TEMPLATE % (query, sig)
-        return sanitized_Request(
-            url, json.dumps(post_data).encode('utf-8')) if post_data else url
+            '{query}&t={timestamp}'.format(**locals()).encode('ascii'),
+            hashlib.sha1).hexdigest()
+        return timestamp, sig, self._API_URL_TEMPLATE % query
 
-    def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
+    def _call_api(
+            self, path, video_id, note='Downloading JSON metadata', data=None, query=None, fatal=True):
+        if query is None:
+            timestamp, sig, url = self._sign_query(path)
+        else:
+            url = self._API_URL_TEMPLATE % self._api_query(path, version=4)
         resp = self._download_json(
-            self._prepare_call(path, timestamp, post_data), video_id, note,
-            headers={'x-viki-app-ver': self._APP_VERSION})
-
-        error = resp.get('error')
-        if error:
-            if error == 'invalid timestamp':
-                resp = self._download_json(
-                    self._prepare_call(path, int(resp['current_timestamp']), post_data),
-                    video_id, '%s (retry)' % note)
-                error = resp.get('error')
-            if error:
-                self._raise_error(resp['error'])
+            url, video_id, note, fatal=fatal, query=query,
+            data=json.dumps(data).encode('utf-8') if data else None,
+            headers=({'x-viki-app-ver': self._APP_VERSION} if data
+                     else self._stream_headers(timestamp, sig) if query is None
+                     else None), expected_status=400) or {}
 
+        self._raise_error(resp.get('error'), fatal)
         return resp
 
-    def _raise_error(self, error):
-        raise ExtractorError(
-            '%s returned error: %s' % (self.IE_NAME, error),
-            expected=True)
+    def _raise_error(self, error, fatal=True):
+        if error is None:
+            return
+        msg = '%s said: %s' % (self.IE_NAME, error)
+        if fatal:
+            raise ExtractorError(msg, expected=True)
+        else:
+            self.report_warning(msg)
 
     def _check_errors(self, data):
         for reason, status in (data.get('blocking') or {}).items():
@@ -90,9 +98,10 @@ class VikiBaseIE(InfoExtractor):
                 if reason == 'geo':
                     self.raise_geo_restricted(msg=message)
                 elif reason == 'paywall':
+                    if try_get(data, lambda x: x['paywallable']['tvod']):
+                        self._raise_error('This video is for rent only or TVOD (Transactional Video On demand)')
                     self.raise_login_required(message)
-                raise ExtractorError('%s said: %s' % (
-                    self.IE_NAME, message), expected=True)
+                self._raise_error(message)
 
     def _real_initialize(self):
         self._login()
@@ -102,35 +111,39 @@ class VikiBaseIE(InfoExtractor):
         if username is None:
             return
 
-        login_form = {
-            'login_id': username,
-            'password': password,
-        }
-
-        login = self._call_api(
-            'sessions.json', None,
-            'Logging in', post_data=login_form)
-
-        self._token = login.get('token')
+        self._token = self._call_api(
+            'sessions.json', None, 'Logging in', fatal=False,
+            data={'username': username, 'password': password}).get('token')
         if not self._token:
-            self.report_warning('Unable to get session token, login has probably failed')
+            self.report_warning('Login Failed: Unable to get session token')
 
     @staticmethod
-    def dict_selection(dict_obj, preferred_key, allow_fallback=True):
+    def dict_selection(dict_obj, preferred_key):
         if preferred_key in dict_obj:
-            return dict_obj.get(preferred_key)
-
-        if not allow_fallback:
-            return
-
-        filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()]))
-        return filtered_dict[0] if filtered_dict else None
+            return dict_obj[preferred_key]
+        return (list(filter(None, dict_obj.values())) or [None])[0]
 
 
 class VikiIE(VikiBaseIE):
     IE_NAME = 'viki'
     _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
     _TESTS = [{
+        'note': 'Free non-DRM video with storyboards in MPD',
+        'url': 'https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1',
+        'info_dict': {
+            'id': '1175236v',
+            'ext': 'mp4',
+            'title': 'Choosing Spouse by Lottery - Episode 1',
+            'timestamp': 1606463239,
+            'age_limit': 12,
+            'uploader': 'FCC',
+            'upload_date': '20201127',
+        },
+        'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
+        'params': {
+            'format': 'bestvideo',
+        },
+    }, {
         'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
         'info_dict': {
             'id': '1023585v',
@@ -146,7 +159,7 @@ class VikiIE(VikiBaseIE):
         'params': {
             'format': 'bestvideo',
         },
-        'skip': 'Blocked in the US',
+        'skip': 'Content is only available to Viki Pass Plus subscribers',
         'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
     }, {
         # clip
@@ -178,11 +191,11 @@ class VikiIE(VikiBaseIE):
             'like_count': int,
             'age_limit': 13,
         },
-        'skip': 'Blocked in the US',
+        'skip': 'Page not found!',
     }, {
         # episode
         'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
-        'md5': '0a53dc252e6e690feccd756861495a8c',
+        'md5': '670440c79f7109ca6564d4c7f24e3e81',
         'info_dict': {
             'id': '44699v',
             'ext': 'mp4',
@@ -193,7 +206,7 @@ class VikiIE(VikiBaseIE):
             'upload_date': '20100405',
             'uploader': 'group8',
             'like_count': int,
-            'age_limit': 13,
+            'age_limit': 15,
             'episode_number': 1,
         },
         'params': {
@@ -224,7 +237,7 @@ class VikiIE(VikiBaseIE):
     }, {
         # non-English description
         'url': 'http://www.viki.com/videos/158036v-love-in-magic',
-        'md5': '41faaba0de90483fb4848952af7c7d0d',
+        'md5': '78bf49fdaa51f9e7f9150262a9ef9bdf',
         'info_dict': {
             'id': '158036v',
             'ext': 'mp4',
@@ -232,8 +245,8 @@ class VikiIE(VikiBaseIE):
             'upload_date': '20111122',
             'timestamp': 1321985454,
             'description': 'md5:44b1e46619df3a072294645c770cef36',
-            'title': 'Love In Magic',
-            'age_limit': 13,
+            'title': 'Love in Magic',
+            'age_limit': 15,
         },
         'params': {
             'format': 'bestvideo',
@@ -244,45 +257,53 @@ class VikiIE(VikiBaseIE):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        resp = self._download_json(
-            'https://www.viki.com/api/videos/' + video_id,
-            video_id, 'Downloading video JSON', headers={
-                'x-client-user-agent': std_headers['User-Agent'],
-                'x-viki-app-ver': '3.0.0',
-            })
-        video = resp['video']
+        video = self._call_api('videos/{0}.json'.format(video_id), video_id, 'Downloading video JSON', query={})
 
         self._check_errors(video)
 
-        title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
+        title = try_get(video, lambda x: x['titles']['en'], str)
         episode_number = int_or_none(video.get('number'))
         if not title:
             title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id
             container_titles = try_get(video, lambda x: x['container']['titles'], dict) or {}
             container_title = self.dict_selection(container_titles, 'en')
-            title = '%s - %s' % (container_title, title)
+            if container_title and title == video_id:
+                title = container_title
+            else:
+                title = '%s - %s' % (container_title, title)
+
+        resp = self._call_api(
+            'playback_streams/%s.json?drms=dt3&device_id=%s' % (video_id, self._DEVICE_ID),
+            video_id, 'Downloading video streams JSON')['main'][0]
+
+        mpd_url = resp['url']
+        # 720p is hidden in another MPD which can be found in the current manifest content
+        mpd_content = self._download_webpage(mpd_url, video_id, note='Downloading initial MPD manifest')
+        mpd_url = self._search_regex(
+            r'(?mi)<BaseURL>(http.+.mpd)', mpd_content, 'new manifest', default=mpd_url)
+        if 'mpdhd_high' not in mpd_url:
+            # Modify the URL to get 1080p
+            mpd_url = mpd_url.replace('mpdhd', 'mpdhd_high')
+        formats = self._extract_mpd_formats(mpd_url, video_id)
+        self._sort_formats(formats)
 
         description = self.dict_selection(video.get('descriptions', {}), 'en')
-
+        thumbnails = [{
+            'id': thumbnail_id,
+            'url': thumbnail['url'],
+        } for thumbnail_id, thumbnail in (video.get('images') or {}).items() if thumbnail.get('url')]
         like_count = int_or_none(try_get(video, lambda x: x['likes']['count']))
 
-        thumbnails = []
-        for thumbnail_id, thumbnail in (video.get('images') or {}).items():
-            thumbnails.append({
-                'id': thumbnail_id,
-                'url': thumbnail.get('url'),
-            })
+        stream_id = try_get(resp, lambda x: x['properties']['track']['stream_id'])
+        subtitles = dict((lang, [{
+            'ext': ext,
+            'url': self._API_URL_TEMPLATE % self._api_query(
+                'videos/{0}/auth_subtitles/{1}.{2}'.format(video_id, lang, ext), stream_id=stream_id)
+        } for ext in ('srt', 'vtt')]) for lang in (video.get('subtitle_completions') or {}).keys())
 
-        subtitles = {}
-        for subtitle_lang, _ in (video.get('subtitle_completions') or {}).items():
-            subtitles[subtitle_lang] = [{
-                'ext': subtitles_format,
-                'url': self._prepare_call(
-                    'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
-            } for subtitles_format in ('srt', 'vtt')]
-
-        result = {
+        return {
             'id': video_id,
+            'formats': formats,
             'title': title,
             'description': description,
             'duration': int_or_none(video.get('duration')),
@@ -296,79 +317,6 @@ class VikiIE(VikiBaseIE):
             'episode_number': episode_number,
         }
 
-        formats = []
-
-        def add_format(format_id, format_dict, protocol='http'):
-            # rtmps URLs does not seem to work
-            if protocol == 'rtmps':
-                return
-            format_url = format_dict.get('url')
-            if not format_url:
-                return
-            qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
-            stream = qs.get('stream', [None])[0]
-            if stream:
-                format_url = base64.b64decode(stream).decode()
-            if format_id in ('m3u8', 'hls'):
-                m3u8_formats = self._extract_m3u8_formats(
-                    format_url, video_id, 'mp4',
-                    entry_protocol='m3u8_native',
-                    m3u8_id='m3u8-%s' % protocol, fatal=False)
-                # Despite CODECS metadata in m3u8 all video-only formats
-                # are actually video+audio
-                for f in m3u8_formats:
-                    if '_drm/index_' in f['url']:
-                        continue
-                    if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
-                        f['acodec'] = None
-                    formats.append(f)
-            elif format_id in ('mpd', 'dash'):
-                formats.extend(self._extract_mpd_formats(
-                    format_url, video_id, 'mpd-%s' % protocol, fatal=False))
-            elif format_url.startswith('rtmp'):
-                mobj = re.search(
-                    r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
-                    format_url)
-                if not mobj:
-                    return
-                formats.append({
-                    'format_id': 'rtmp-%s' % format_id,
-                    'ext': 'flv',
-                    'url': mobj.group('url'),
-                    'play_path': mobj.group('playpath'),
-                    'app': mobj.group('app'),
-                    'page_url': url,
-                })
-            else:
-                formats.append({
-                    'url': format_url,
-                    'format_id': '%s-%s' % (format_id, protocol),
-                    'height': int_or_none(self._search_regex(
-                        r'^(\d+)[pP]$', format_id, 'height', default=None)),
-                })
-
-        for format_id, format_dict in (resp.get('streams') or {}).items():
-            add_format(format_id, format_dict)
-        if not formats:
-            streams = self._call_api(
-                'videos/%s/streams.json' % video_id, video_id,
-                'Downloading video streams JSON')
-
-            if 'external' in streams:
-                result.update({
-                    '_type': 'url_transparent',
-                    'url': streams['external']['url'],
-                })
-                return result
-
-            for format_id, stream_dict in streams.items():
-                for protocol, format_dict in stream_dict.items():
-                    add_format(format_id, format_dict, protocol)
-        self._sort_formats(formats)
-
-        result['formats'] = formats
-        return result
-
 
 class VikiChannelIE(VikiBaseIE):
     IE_NAME = 'viki:channel'
@@ -378,9 +326,9 @@ class VikiChannelIE(VikiBaseIE):
         'info_dict': {
             'id': '50c',
             'title': 'Boys Over Flowers',
-            'description': 'md5:804ce6e7837e1fd527ad2f25420f4d59',
+            'description': 'md5:f08b679c200e1a273c695fe9986f21d7',
         },
-        'playlist_mincount': 71,
+        'playlist_mincount': 51,
     }, {
         'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
         'info_dict': {
@@ -401,33 +349,38 @@ class VikiChannelIE(VikiBaseIE):
         'only_matching': True,
     }]
 
-    _PER_PAGE = 25
+    _video_types = ('episodes', 'movies', 'clips', 'trailers')
+
+    def _entries(self, channel_id):
+        params = {
+            'app': self._APP, 'token': self._token, 'only_ids': 'true',
+            'direction': 'asc', 'sort': 'number', 'per_page': 30
+        }
+        video_types = self._video_types
+        for video_type in video_types:
+            if video_type not in self._video_types:
+                self.report_warning('Unknown video_type: ' + video_type)
+            page_num = 0
+            while True:
+                page_num += 1
+                params['page'] = page_num
+                res = self._call_api(
+                    'containers/{channel_id}/{video_type}.json'.format(**locals()), channel_id, query=params, fatal=False,
+                    note='Downloading %s JSON page %d' % (video_type.title(), page_num))
+
+                for video_id in res.get('response') or []:
+                    yield self.url_result('https://www.viki.com/videos/' + video_id, VikiIE.ie_key(), video_id)
+                if not res.get('more'):
+                    break
 
     def _real_extract(self, url):
         channel_id = self._match_id(url)
 
-        channel = self._call_api(
-            'containers/%s.json' % channel_id, channel_id,
-            'Downloading channel JSON')
+        channel = self._call_api('containers/%s.json' % channel_id, channel_id, 'Downloading channel JSON')
 
         self._check_errors(channel)
 
-        title = self.dict_selection(channel['titles'], 'en')
-
-        description = self.dict_selection(channel['descriptions'], 'en')
-
-        entries = []
-        for video_type in ('episodes', 'clips', 'movies'):
-            for page_num in itertools.count(1):
-                page = self._call_api(
-                    'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
-                    % (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
-                    'Downloading %s JSON page #%d' % (video_type, page_num))
-                for video in page['response']:
-                    video_id = video['id']
-                    entries.append(self.url_result(
-                        'https://www.viki.com/videos/%s' % video_id, 'Viki'))
-                if not page['pagination']['next']:
-                    break
-
-        return self.playlist_result(entries, channel_id, title, description)
+        return self.playlist_result(
+            self._entries(channel_id), channel_id,
+            self.dict_selection(channel['titles'], 'en'),
+            self.dict_selection(channel['descriptions'], 'en'))

From b494824286f0ac2fc7313452b287fbbffe61ccbe Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 17 Jan 2022 13:11:11 +0000
Subject: [PATCH 028/312] Support Tele5 pages with Discovery Networks format
 instead of JWPlatform

---
 youtube_dl/extractor/tele5.py | 86 ++++++++++++++---------------------
 1 file changed, 35 insertions(+), 51 deletions(-)

diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py
index 3e1a7a9e6..df02dfc47 100644
--- a/youtube_dl/extractor/tele5.py
+++ b/youtube_dl/extractor/tele5.py
@@ -1,19 +1,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
-from .common import InfoExtractor
-from .jwplatform import JWPlatformIE
-from .nexx import NexxIE
 from ..compat import compat_urlparse
 from ..utils import (
-    NO_DEFAULT,
-    smuggle_url,
+    ExtractorError,
+    extract_attributes,
 )
 
+from .dplay import DPlayIE
 
-class Tele5IE(InfoExtractor):
+
+class Tele5IE(DPlayIE):
     _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     _GEO_COUNTRIES = ['DE']
     _TESTS = [{
@@ -28,6 +25,7 @@ class Tele5IE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+        'skip': 'No longer available: "404 Seite nicht gefunden"',
     }, {
         # jwplatform, nexx unavailable
         'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
@@ -42,7 +40,20 @@ class Tele5IE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
-        'add_ie': [JWPlatformIE.ie_key()],
+        'skip': 'No longer available, redirects to Filme page',
+    }, {
+        'url': 'https://tele5.de/mediathek/angel-of-mine/',
+        'info_dict': {
+            'id': '1252360',
+            'ext': 'mp4',
+            'upload_date': '20220109',
+            'timestamp': 1641762000,
+            'title': 'Angel of Mine',
+            'description': 'md5:a72546a175e1286eb3251843a52d1ad7',
+        },
+        'params': {
+            'format': 'bestvideo',
+        },
     }, {
         'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
         'only_matching': True,
@@ -64,45 +75,18 @@ class Tele5IE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
-        video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
-
-        NEXX_ID_RE = r'\d{6,}'
-        JWPLATFORM_ID_RE = r'[a-zA-Z0-9]{8}'
-
-        def nexx_result(nexx_id):
-            return self.url_result(
-                'https://api.nexx.cloud/v3/759/videos/byid/%s' % nexx_id,
-                ie=NexxIE.ie_key(), video_id=nexx_id)
-
-        nexx_id = jwplatform_id = None
-
-        if video_id:
-            if re.match(NEXX_ID_RE, video_id):
-                return nexx_result(video_id)
-            elif re.match(JWPLATFORM_ID_RE, video_id):
-                jwplatform_id = video_id
-
-        if not nexx_id:
-            display_id = self._match_id(url)
-            webpage = self._download_webpage(url, display_id)
-
-            def extract_id(pattern, name, default=NO_DEFAULT):
-                return self._html_search_regex(
-                    (r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](%s)' % pattern,
-                     r'\s+id\s*=\s*["\']player_(%s)' % pattern,
-                     r'\bdata-id\s*=\s*["\'](%s)' % pattern), webpage, name,
-                    default=default)
-
-            nexx_id = extract_id(NEXX_ID_RE, 'nexx id', default=None)
-            if nexx_id:
-                return nexx_result(nexx_id)
-
-            if not jwplatform_id:
-                jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id')
-
-        return self.url_result(
-            smuggle_url(
-                'jwplatform:%s' % jwplatform_id,
-                {'geo_countries': self._GEO_COUNTRIES}),
-            ie=JWPlatformIE.ie_key(), video_id=jwplatform_id)
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        player_element = self._search_regex(r'(<hyoga-player\b[^>]+?>)', webpage, 'video player')
+        player_info = extract_attributes(player_element)
+        asset_id, country, realm = (player_info[x] for x in ('assetid', 'locale', 'realm', ))
+        endpoint = compat_urlparse.urlparse(player_info['endpoint']).hostname
+        source_type = player_info.get('sourcetype')
+        if source_type:
+            endpoint = '%s-%s' % (source_type, endpoint)
+        try:
+            return self._get_disco_api_info(url, asset_id, endpoint, realm, country)
+        except ExtractorError as e:
+            if getattr(e, 'message', '') == 'Missing deviceId in context':
+                raise ExtractorError('DRM protected', cause=e, expected=True)
+            raise

From 4186e817772d49d6f66b07c5ac8c248f026a6446 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 17 Jan 2022 03:13:37 +0000
Subject: [PATCH 029/312] NDR: improve extraction of NDR id, description, etc
 with current page formats

---
 youtube_dl/extractor/ndr.py | 45 +++++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py
index ddd828d92..a0d553f00 100644
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -4,8 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
 from ..utils import (
     determine_ext,
+    ExtractorError,
     int_or_none,
     merge_dicts,
     parse_iso8601,
@@ -20,13 +22,13 @@ class NDRBaseIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         display_id = next(group for group in mobj.groups() if group)
         webpage = self._download_webpage(url, display_id)
-        return self._extract_embed(webpage, display_id)
+        return self._extract_embed(webpage, display_id, url)
 
 
 class NDRIE(NDRBaseIE):
     IE_NAME = 'ndr'
     IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
-    _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
+    _VALID_URL = r'https?://(?:\w+\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
     _TESTS = [{
         # httpVideo, same content id
         'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
@@ -109,19 +111,38 @@ class NDRIE(NDRBaseIE):
         'only_matching': True,
     }]
 
-    def _extract_embed(self, webpage, display_id):
-        embed_url = self._html_search_meta(
-            'embedURL', webpage, 'embed URL',
-            default=None) or self._search_regex(
-            r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
-            'embed URL', group='url')
+    def _extract_embed(self, webpage, display_id, url):
+        embed_url = (
+            self._html_search_meta(
+                'embedURL', webpage, 'embed URL',
+                default=None)
+            or self._search_regex(
+                r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+                'embed URL', group='url', default=None)
+            or self._search_regex(
+                r'\bvar\s*sophoraID\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+                'embed URL', group='url', default=''))
+        # some more work needed if we only found sophoraID
+        if re.match(r'^[a-z]+\d+$', embed_url):
+            # get the initial part of the url path,. eg /panorama/archiv/2022/
+            parsed_url = compat_urllib_parse_urlparse(url)
+            path = self._search_regex(r'(.+/)%s' % display_id, parsed_url.path or '', 'embed URL', default='')
+            # find tell-tale image with the actual ID
+            ndr_id = self._search_regex(r'%s([a-z]+\d+)(?!\.)\b' % (path, ), webpage, 'embed URL', default=None)
+            # or try to use special knowledge!
+            NDR_INFO_URL_TPL = 'https://www.ndr.de/info/%s-player.html'
+            embed_url = 'ndr:%s' % (ndr_id, ) if ndr_id else NDR_INFO_URL_TPL % (embed_url, )
+        if not embed_url:
+            raise ExtractorError('Unable to extract embedUrl')
+
         description = self._search_regex(
             r'<p[^>]+itemprop="description">([^<]+)</p>',
             webpage, 'description', default=None) or self._og_search_description(webpage)
         timestamp = parse_iso8601(
             self._search_regex(
-                r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
-                webpage, 'upload date', default=None))
+                (r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="(?P<cont>[^"]+)"',
+                 r'\bvar\s*pdt\s*=\s*(?P<q>["\'])(?P<cont>(?:(?!(?P=q)).)+)(?P=q)', ),
+                webpage, 'upload date', group='cont', default=None))
         info = self._search_json_ld(webpage, display_id, default={})
         return merge_dicts({
             '_type': 'url_transparent',
@@ -179,7 +200,7 @@ class NJoyIE(NDRBaseIE):
         video_id = self._search_regex(
             r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
         description = self._search_regex(
-            r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
+                r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
             webpage, 'description', fatal=False)
         return {
             '_type': 'url_transparent',
@@ -291,7 +312,7 @@ class NDREmbedBaseIE(InfoExtractor):
 
 class NDREmbedIE(NDREmbedBaseIE):
     IE_NAME = 'ndr:embed'
-    _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
+    _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html'
     _TESTS = [{
         'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
         'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',

From f0a05a55c2ee512880546c056cfbec5ad3399798 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 17 Jan 2022 03:22:32 +0000
Subject: [PATCH 030/312] NJoy: improve extraction of NDR id, description, etc
 with current page formats

---
 youtube_dl/extractor/ndr.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py
index a0d553f00..0a723e3b0 100644
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -196,18 +196,25 @@ class NJoyIE(NDRBaseIE):
         'only_matching': True,
     }]
 
-    def _extract_embed(self, webpage, display_id):
+    def _extract_embed(self, webpage, display_id, url=None):
+        # find tell-tale URL with the actual ID, or ...
         video_id = self._search_regex(
-            r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
-        description = self._search_regex(
+            (r'''\bsrc\s*=\s*(?:"|')?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''',
+             r'<iframe[^>]+id="pp_([\da-z]+)"', ),
+            webpage, 'NDR id', default=None)
+
+        description = (
+            self._html_search_meta('description', webpage)
+            or self._search_regex(
                 r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
-            webpage, 'description', fatal=False)
+                webpage, 'description', fatal=False))
         return {
             '_type': 'url_transparent',
             'ie_key': 'NDREmbedBase',
             'url': 'ndr:%s' % video_id,
             'display_id': display_id,
             'description': description,
+            'title': display_id.replace('-', ' ').strip(),
         }
 
 

From 39a98b09a2acf50dc64bc41185be723b98e740b9 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 17 Jan 2022 03:29:43 +0000
Subject: [PATCH 031/312] Fix NDR, NJoy tests

---
 youtube_dl/extractor/ndr.py | 41 ++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py
index 0a723e3b0..1996d4f96 100644
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -40,13 +40,14 @@ class NDRIE(NDRBaseIE):
             'title': 'Party, Pötte und Parade',
             'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
             'uploader': 'ndrtv',
-            'timestamp': 1431108900,
+            'timestamp': 1431255671,
             'upload_date': '20150510',
             'duration': 3498,
         },
         'params': {
             'skip_download': True,
         },
+        'expected_warnings': ['Unable to download f4m manifest'],
     }, {
         # httpVideo, different content id
         'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
@@ -65,6 +66,7 @@ class NDRIE(NDRBaseIE):
         'params': {
             'skip_download': True,
         },
+        'skip': 'No longer available',
     }, {
         # httpAudio, same content id
         'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
@@ -76,8 +78,8 @@ class NDRIE(NDRBaseIE):
             'title': 'La Valette entgeht der Hinrichtung',
             'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
             'uploader': 'ndrinfo',
-            'timestamp': 1290626100,
-            'upload_date': '20140729',
+            'timestamp': 1631711863,
+            'upload_date': '20210915',
             'duration': 884,
         },
         'params': {
@@ -91,9 +93,10 @@ class NDRIE(NDRBaseIE):
             'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
             'ext': 'mp4',
             'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
-            'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6',
+            'description': 'md5:700f6de264010585012a72f97b0ac0c9',
             'uploader': 'ndrtv',
-            'upload_date': '20201113',
+            'upload_date': '20201207',
+            'timestamp': 1614349457,
             'duration': 1749,
             'subtitles': {
                 'de': [{
@@ -174,19 +177,19 @@ class NJoyIE(NDRBaseIE):
         'params': {
             'skip_download': True,
         },
+        'skip': 'No longer available',
     }, {
         # httpVideo, different content id
         'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
         'md5': '417660fffa90e6df2fda19f1b40a64d8',
         'info_dict': {
-            'id': 'dockville882',
+            'id': 'livestream283',
             'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
-            'ext': 'mp4',
-            'title': '"Ich hab noch nie" mit Felix Jaehn',
-            'description': 'md5:85dd312d53be1b99e1f998a16452a2f3',
+            'ext': 'mp3',
+            'title': 'Das frueheste DJ Set des Nordens live mit Felix Jaehn',
+            'description': 'md5:681698f527b8601e511e7b79edde7d2c',
             'uploader': 'njoy',
-            'upload_date': '20150822',
-            'duration': 211,
+            'upload_date': '20210830',
         },
         'params': {
             'skip_download': True,
@@ -332,6 +335,7 @@ class NDREmbedIE(NDREmbedBaseIE):
             'upload_date': '20150907',
             'duration': 132,
         },
+        'skip': 'No longer available',
     }, {
         'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
         'md5': '002085c44bae38802d94ae5802a36e78',
@@ -347,6 +351,7 @@ class NDREmbedIE(NDREmbedBaseIE):
         'params': {
             'skip_download': True,
         },
+        'skip': 'No longer available',
     }, {
         'url': 'http://www.ndr.de/info/audio51535-player.html',
         'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
@@ -356,7 +361,7 @@ class NDREmbedIE(NDREmbedBaseIE):
             'title': 'La Valette entgeht der Hinrichtung',
             'is_live': False,
             'uploader': 'ndrinfo',
-            'upload_date': '20140729',
+            'upload_date': '20210915',
             'duration': 884,
         },
         'params': {
@@ -377,15 +382,17 @@ class NDREmbedIE(NDREmbedBaseIE):
         'params': {
             'skip_download': True,
         },
+        'skip': 'No longer available',
     }, {
         # httpVideoLive
         'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
         'info_dict': {
             'id': 'livestream217',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
             'is_live': True,
-            'upload_date': '20150910',
+            'upload_date': '20210409',
+            'uploader': 'ndrtv',
         },
         'params': {
             'skip_download': True,
@@ -423,9 +430,10 @@ class NJoyEmbedIE(NDREmbedBaseIE):
             'ext': 'mp4',
             'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
             'is_live': False,
-            'upload_date': '20150807',
+            'upload_date': '20200826',
             'duration': 1011,
         },
+        'expected_warnings': ['Unable to download f4m manifest'],
     }, {
         # httpAudio
         'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
@@ -442,6 +450,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
         'params': {
             'skip_download': True,
         },
+        'skip': 'No longer available',
     }, {
         # httpAudioLive, no explicit ext
         'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
@@ -451,7 +460,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
             'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
             'is_live': True,
             'uploader': 'njoy',
-            'upload_date': '20150810',
+            'upload_date': '20210830',
         },
         'params': {
             'skip_download': True,

From 01824d275bfa7efbaca274b38c1ddc2b03f12f5d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 19 Jan 2022 13:24:33 +0000
Subject: [PATCH 032/312] Additional tweaks: allow any .ndr.de, simplify quote
 match

---
 youtube_dl/extractor/ndr.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py
index 1996d4f96..26627f8b0 100644
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -28,7 +28,7 @@ class NDRBaseIE(InfoExtractor):
 class NDRIE(NDRBaseIE):
     IE_NAME = 'ndr'
     IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
-    _VALID_URL = r'https?://(?:\w+\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
+    _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
     _TESTS = [{
         # httpVideo, same content id
         'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
@@ -202,7 +202,7 @@ class NJoyIE(NDRBaseIE):
     def _extract_embed(self, webpage, display_id, url=None):
         # find tell-tale URL with the actual ID, or ...
         video_id = self._search_regex(
-            (r'''\bsrc\s*=\s*(?:"|')?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''',
+            (r'''\bsrc\s*=\s*["']?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''',
              r'<iframe[^>]+id="pp_([\da-z]+)"', ),
             webpage, 'NDR id', default=None)
 
@@ -322,7 +322,7 @@ class NDREmbedBaseIE(InfoExtractor):
 
 class NDREmbedIE(NDREmbedBaseIE):
     IE_NAME = 'ndr:embed'
-    _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html'
+    _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html'
     _TESTS = [{
         'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
         'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',

From 5197336de6ee2d18c37732f3f7c6532c8899ec29 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 14 Jan 2022 20:14:14 +0000
Subject: [PATCH 033/312] Support more deeply nested ptmd_path with test,
 update tests

---
 youtube_dl/extractor/zdf.py | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py
index 4dd56f66d..3d39bb33a 100644
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -7,6 +7,7 @@ from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
     determine_ext,
+    ExtractorError,
     float_or_none,
     int_or_none,
     merge_dicts,
@@ -145,6 +146,7 @@ class ZDFIE(ZDFBaseIE):
             'timestamp': 1613948400,
             'upload_date': '20210221',
         },
+        'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
     }, {
         # Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
         'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
@@ -158,6 +160,7 @@ class ZDFIE(ZDFBaseIE):
             'timestamp': 1608604200,
             'upload_date': '20201222',
         },
+        'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
     }, {
         'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
         'info_dict': {
@@ -190,6 +193,17 @@ class ZDFIE(ZDFBaseIE):
     }, {
         'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
         'only_matching': True,
+    }, {
+        'url': 'https://www.zdf.de/arte/todliche-flucht/page-video-artede-toedliche-flucht-16-100.html',
+        'info_dict': {
+            'id': 'video_artede_083871-001-A',
+            'ext': 'mp4',
+            'title': 'Tödliche Flucht (1/6)',
+            'description': 'md5:e34f96a9a5f8abd839ccfcebad3d5315',
+            'duration': 3193.0,
+            'timestamp': 1641355200,
+            'upload_date': '20220105',
+        },
     }]
 
     def _extract_entry(self, url, player, content, video_id):
@@ -197,12 +211,18 @@ class ZDFIE(ZDFBaseIE):
 
         t = content['mainVideoContent']['http://zdf.de/rels/target']
 
-        ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
+        def get_ptmd_path(d):
+            return (
+                d.get('http://zdf.de/rels/streams/ptmd')
+                or d.get('http://zdf.de/rels/streams/ptmd-template',
+                         '').replace('{playerId}', 'ngplayer_2_4'))
+
+        ptmd_path = get_ptmd_path(try_get(t, lambda x: x['streams']['default'], dict) or {})
+        if not ptmd_path:
+            ptmd_path = get_ptmd_path(t)
 
         if not ptmd_path:
-            ptmd_path = t[
-                'http://zdf.de/rels/streams/ptmd-template'].replace(
-                '{playerId}', 'ngplayer_2_4')
+            raise ExtractorError('Could not extract ptmd_path')
 
         info = self._extract_ptmd(
             urljoin(url, ptmd_path), video_id, player['apiToken'], url)

From 5cb4833f408745135d1b0e178b9a2545a899f2ac Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 13 Jan 2022 19:38:08 +0000
Subject: [PATCH 034/312] Update URPlayIE extractor for Next.js page format,
 with subtitles

---
 youtube_dl/extractor/urplay.py | 52 ++++++++++++++++++++++++++++++----
 1 file changed, 46 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/urplay.py b/youtube_dl/extractor/urplay.py
index d6c79147e..abd2bee84 100644
--- a/youtube_dl/extractor/urplay.py
+++ b/youtube_dl/extractor/urplay.py
@@ -4,7 +4,11 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
     dict_get,
+    ExtractorError,
     int_or_none,
+    ISO639Utils,
+    parse_age_limit,
+    try_get,
     unified_timestamp,
 )
 
@@ -23,9 +27,10 @@ class URPlayIE(InfoExtractor):
             'upload_date': '20171214',
             'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik',
             'duration': 2269,
-            'categories': ['Kultur & historia'],
+            'categories': ['Vetenskap & teknik'],
             'tags': ['Kritiskt tänkande', 'Vetenskap', 'Vetenskaplig verksamhet'],
             'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
+            'age_limit': 15,
         },
     }, {
         'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
@@ -50,11 +55,19 @@ class URPlayIE(InfoExtractor):
         video_id = self._match_id(url)
         url = url.replace('skola.se/Produkter', 'play.se/program')
         webpage = self._download_webpage(url, video_id)
-        vid = int(video_id)
-        accessible_episodes = self._parse_json(self._html_search_regex(
-            r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
-            webpage, 'urplayer data'), video_id)['accessibleEpisodes']
-        urplayer_data = next(e for e in accessible_episodes if e.get('id') == vid)
+        urplayer_data = self._search_regex(
+            r'(?s)\bid\s*=\s*"__NEXT_DATA__"[^>]*>\s*({.+?})\s*</script',
+            webpage, 'urplayer next data', fatal=False) or {}
+        if urplayer_data:
+            urplayer_data = self._parse_json(urplayer_data, video_id, fatal=False)
+            urplayer_data = try_get(urplayer_data, lambda x: x['props']['pageProps']['program'], dict)
+            if not urplayer_data:
+                raise ExtractorError('Unable to parse __NEXT_DATA__')
+        else:
+            accessible_episodes = self._parse_json(self._html_search_regex(
+                r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
+                webpage, 'urplayer data'), video_id)['accessibleEpisodes']
+            urplayer_data = next(e for e in accessible_episodes if e.get('id') == int_or_none(video_id))
         episode = urplayer_data['title']
         raw_streaming_info = urplayer_data['streamingInfo']['raw']
         host = self._download_json(
@@ -72,6 +85,30 @@ class URPlayIE(InfoExtractor):
                     video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
         self._sort_formats(formats)
 
+        subtitles = {}
+
+        def parse_lang_code(code):
+            "3-character language code or None (utils candidate)"
+            if code is None:
+                return
+            lang = code.lower()
+            if not ISO639Utils.long2short(lang):
+                lang = ISO639Utils.short2long(lang)
+            return lang or None
+
+        for k, v in (urplayer_data['streamingInfo'].get('sweComplete') or {}).items():
+            if (k in ('sd', 'hd') or not isinstance(v, dict)):
+                continue
+            lang, sttl_url = (v.get(kk) for kk in ('language', 'location', ))
+            if not sttl_url:
+                continue
+            lang = parse_lang_code(lang)
+            if not lang:
+                continue
+            sttl = subtitles.get(lang) or []
+            sttl.append({'ext': k, 'url': sttl_url, })
+            subtitles[lang] = sttl
+
         image = urplayer_data.get('image') or {}
         thumbnails = []
         for k, v in image.items():
@@ -104,4 +141,7 @@ class URPlayIE(InfoExtractor):
             'season': series.get('label'),
             'episode': episode,
             'episode_number': int_or_none(urplayer_data.get('episodeNumber')),
+            'age_limit': parse_age_limit(min(try_get(a, lambda x: x['from'], int) or 0
+                                             for a in urplayer_data.get('ageRanges', []))),
+            'subtitles': subtitles,
         }

From 568c7005d513d0398c20b9e88eb9838c68651fc2 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 25 Jan 2022 12:59:31 +0000
Subject: [PATCH 035/312] Fix WDRMaus; extend URL matching for other Maus
 pages; improve ID extraction

---
 youtube_dl/extractor/wdr.py | 39 +++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index 2903d189e..a5488f3fd 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -10,6 +10,7 @@ from ..compat import (
 )
 from ..utils import (
     determine_ext,
+    dict_get,
     ExtractorError,
     js_to_json,
     strip_jsonp,
@@ -22,9 +23,10 @@ from ..utils import (
 
 
 class WDRIE(InfoExtractor):
-    _VALID_URL = r'https?://deviceids-medp\.wdr\.de/ondemand/\d+/(?P<id>\d+)\.js'
+    __API_URL_TPL = '//deviceids-medp.wdr.de/ondemand/%s/%s'
+    _VALID_URL = (r'(?:https?:' + __API_URL_TPL) % (r'\d+', r'(?=\d+\.js)|wdr:)(?P<id>\d{6,})')
     _GEO_COUNTRIES = ['DE']
-    _TEST = {
+    _TESTS = [{
         'url': 'http://deviceids-medp.wdr.de/ondemand/155/1557833.js',
         'info_dict': {
             'id': 'mdb-1557833',
@@ -32,11 +34,20 @@ class WDRIE(InfoExtractor):
             'title': 'Biathlon-Staffel verpasst Podest bei Olympia-Generalprobe',
             'upload_date': '20180112',
         },
-    }
+    },
+    ]
+
+    def _asset_url(self, wdr_id):
+        id_len = max(len(wdr_id), 5)
+        return ''.join(('https:', self.__API_URL_TPL % (wdr_id[:id_len - 4], wdr_id, ), '.js'))
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
+        if url.startswith('wdr:'):
+            video_id = url[4:]
+            url = self._asset_url(video_id)
+
         metadata = self._download_json(
             url, video_id, transform_source=strip_jsonp)
 
@@ -115,10 +126,10 @@ class WDRIE(InfoExtractor):
         }
 
 
-class WDRPageIE(InfoExtractor):
-    _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5'
+class WDRPageIE(WDRIE):
+    _MAUS_REGEX = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/)*?(?P<maus_id>[^/?#.]+)(?:/?|/index\.php5|\.php5)$'
     _PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
-    _VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
+    _VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _MAUS_REGEX
 
     _TESTS = [
         {
@@ -180,12 +191,12 @@ class WDRPageIE(InfoExtractor):
         {
             'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
             'info_dict': {
-                'id': 'mdb-1552552',
+                'id': 'mdb-2627637',
                 'ext': 'mp4',
                 'upload_date': 're:^[0-9]{8}$',
-                'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$',
+                'title': 're:^Die Sendung (?:mit der Maus )?vom [0-9.]{10}$',
             },
-            'skip': 'The id changes from week to week because of the new episode'
+            # 'skip': 'The id changes from week to week because of the new episode'
         },
         {
             'url': 'http://www.wdrmaus.de/filme/sachgeschichten/achterbahn.php5',
@@ -234,7 +245,7 @@ class WDRPageIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
+        display_id = dict_get(mobj.groupdict(), ('display_id', 'maus_id'), 'wdrmaus')
         webpage = self._download_webpage(url, display_id)
 
         entries = []
@@ -260,6 +271,14 @@ class WDRPageIE(InfoExtractor):
             jsonp_url = try_get(
                 media_link_obj, lambda x: x['mediaObj']['url'], compat_str)
             if jsonp_url:
+                # metadata, or player JS with ['ref'] giving WDR id, or just media, perhaps
+                clip_id = media_link_obj['mediaObj'].get('ref')
+                if jsonp_url.endswith('.assetjsonp'):
+                    asset = self._download_json(
+                        jsonp_url, display_id, fatal=False, transform_source=strip_jsonp)
+                    clip_id = try_get(asset, lambda x: x['trackerData']['trackerClipId'], compat_str)
+                if clip_id:
+                    jsonp_url = self._asset_url(clip_id[4:])
                 entries.append(self.url_result(jsonp_url, ie=WDRIE.ie_key()))
 
         # Playlist (e.g. https://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html)

From 96423449659131ed8e7bfaa7f791466c3f8f2db1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 25 Jan 2022 13:04:04 +0000
Subject: [PATCH 036/312] Fix tests for working IEs; disable obsolete WDRMobile

---
 youtube_dl/extractor/wdr.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index a5488f3fd..10db73148 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -170,11 +170,11 @@ class WDRPageIE(WDRIE):
         {
             'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
             'info_dict': {
-                'id': 'mdb-1406149',
+                'id': 'mdb-2296252',
                 'ext': 'mp4',
-                'title': r're:^WDR Fernsehen im Livestream \(nur in Deutschland erreichbar\) [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+                'title': r're:^WDR Fernsehen im Livestream (?:\(nur in Deutschland erreichbar\) )?[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
                 'alt_title': 'WDR Fernsehen Live',
-                'upload_date': '20150101',
+                'upload_date': '20201112',
                 'is_live': True,
             },
             'params': {
@@ -183,7 +183,7 @@ class WDRPageIE(WDRIE):
         },
         {
             'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
-            'playlist_mincount': 7,
+            'playlist_mincount': 6,
             'info_dict': {
                 'id': 'aktuelle-stunde-120',
             },
@@ -196,7 +196,7 @@ class WDRPageIE(WDRIE):
                 'upload_date': 're:^[0-9]{8}$',
                 'title': 're:^Die Sendung (?:mit der Maus )?vom [0-9.]{10}$',
             },
-            # 'skip': 'The id changes from week to week because of the new episode'
+            'skip': 'The id changes from week to week because of the new episode'
         },
         {
             'url': 'http://www.wdrmaus.de/filme/sachgeschichten/achterbahn.php5',
@@ -207,6 +207,7 @@ class WDRPageIE(WDRIE):
                 'upload_date': '20130919',
                 'title': 'Sachgeschichte - Achterbahn ',
             },
+            'skip': 'HTTP Error 404: Not Found',
         },
         {
             'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html',
@@ -232,6 +233,7 @@ class WDRPageIE(WDRIE):
             'params': {
                 'skip_download': True,
             },
+            'skip': 'HTTP Error 404: Not Found',
         },
         {
             'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html',
@@ -298,16 +300,14 @@ class WDRPageIE(WDRIE):
 class WDRElefantIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)wdrmaus\.de/elefantenseite/#(?P<id>.+)'
     _TEST = {
-        'url': 'http://www.wdrmaus.de/elefantenseite/#folge_ostern_2015',
+        'url': 'http://www.wdrmaus.de/elefantenseite/#elefantenkino_wippe',
+        # adaptive stream: unstable file MD5
         'info_dict': {
-            'title': 'Folge Oster-Spezial 2015',
-            'id': 'mdb-1088195',
+            'title': 'Wippe',
+            'id': 'mdb-1198320',
             'ext': 'mp4',
             'age_limit': None,
-            'upload_date': '20150406'
-        },
-        'params': {
-            'skip_download': True,
+            'upload_date': '20071003'
         },
     }
 
@@ -342,6 +342,7 @@ class WDRMobileIE(InfoExtractor):
         /[0-9]+/[0-9]+/
         (?P<id>[0-9]+)_(?P<title>[0-9]+)'''
     IE_NAME = 'wdr:mobile'
+    _WORKING = False  # no such domain
     _TEST = {
         'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4',
         'info_dict': {

From 23ad6402a6966dd09e4c854f32c33f69be1a064e Mon Sep 17 00:00:00 2001
From: Chris Rose <offline@offby1.net>
Date: Fri, 26 Nov 2021 08:08:17 -0800
Subject: [PATCH 037/312] xvideos: Fix for #30271

---
 youtube_dl/extractor/xvideos.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py
index 8fc64914c..e63d4690d 100644
--- a/youtube_dl/extractor/xvideos.py
+++ b/youtube_dl/extractor/xvideos.py
@@ -82,7 +82,7 @@ class XVideosIE(InfoExtractor):
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(
-            'https://www.xvideos.com/video%s/' % video_id, video_id)
+            'https://www.xvideos.com/video%s/0' % video_id, video_id)
 
         mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
         if mobj:

From 005339d6375f2d2a4cec962b1c1a157c1dffbf8f Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 8 Dec 2021 23:37:54 +0000
Subject: [PATCH 038/312] [applepodcasts] Support new AMP-ish page structure

---
 youtube_dl/extractor/applepodcasts.py | 43 ++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/youtube_dl/extractor/applepodcasts.py b/youtube_dl/extractor/applepodcasts.py
index 6a74de758..f0186d4bf 100644
--- a/youtube_dl/extractor/applepodcasts.py
+++ b/youtube_dl/extractor/applepodcasts.py
@@ -3,7 +3,9 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
+    clean_html,
     clean_podcast_url,
+    get_element_by_class,
     int_or_none,
     parse_iso8601,
     try_get,
@@ -14,15 +16,15 @@ class ApplePodcastsIE(InfoExtractor):
     _VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
     _TESTS = [{
         'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
-        'md5': 'df02e6acb11c10e844946a39e7222b08',
+        'md5': '41dc31cd650143e530d9423b6b5a344f',
         'info_dict': {
             'id': '1000482637777',
             'ext': 'mp3',
             'title': '207 - Whitney Webb Returns',
-            'description': 'md5:13a73bade02d2e43737751e3987e1399',
+            'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
             'upload_date': '20200705',
-            'timestamp': 1593921600,
-            'duration': 6425,
+            'timestamp': 1593932400,
+            'duration': 6454,
             'series': 'The Tim Dillon Show',
         }
     }, {
@@ -39,17 +41,38 @@ class ApplePodcastsIE(InfoExtractor):
     def _real_extract(self, url):
         episode_id = self._match_id(url)
         webpage = self._download_webpage(url, episode_id)
-        ember_data = self._parse_json(self._search_regex(
-            r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
-            webpage, 'ember data'), episode_id)
-        ember_data = ember_data.get(episode_id) or ember_data
-        episode = ember_data['data']['attributes']
+        episode_data = {}
+        ember_data = {}
+        # new page type 2021-11
+        amp_data = self._parse_json(self._search_regex(
+            r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
+            webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
+        amp_data = try_get(amp_data,
+                           lambda a: self._parse_json(
+                               next(a[x] for x in iter(a) if episode_id in x),
+                               episode_id),
+                           dict) or {}
+        amp_data = amp_data.get('d') or []
+        episode_data = try_get(
+            amp_data,
+            lambda a: next(x for x in a
+                           if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
+            dict)
+        if not episode_data:
+            # try pre 2021-11 page type: TODO: consider deleting if no longer used
+            ember_data = self._parse_json(self._search_regex(
+                r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
+                webpage, 'ember data'), episode_id) or {}
+            ember_data = ember_data.get(episode_id) or ember_data
+            episode_data = try_get(ember_data, lambda x: x['data'], dict)
+        episode = episode_data['attributes']
         description = episode.get('description') or {}
 
         series = None
-        for inc in (ember_data.get('included') or []):
+        for inc in (amp_data or ember_data.get('included') or []):
             if inc.get('type') == 'media/podcast':
                 series = try_get(inc, lambda x: x['attributes']['name'])
+        series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
 
         return {
             'id': episode_id,

From e00b0eab1e78ed822683b2689f60eab85514ac42 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 9 Dec 2021 00:55:04 +0000
Subject: [PATCH 039/312] [applepodcasts] Improve format extraction

Set acodec and vcodec, etc, to avoid breaking, eg, bestaudio
---
 youtube_dl/extractor/applepodcasts.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/applepodcasts.py b/youtube_dl/extractor/applepodcasts.py
index f0186d4bf..dd413a289 100644
--- a/youtube_dl/extractor/applepodcasts.py
+++ b/youtube_dl/extractor/applepodcasts.py
@@ -7,6 +7,7 @@ from ..utils import (
     clean_podcast_url,
     get_element_by_class,
     int_or_none,
+    parse_codecs,
     parse_iso8601,
     try_get,
 )
@@ -74,7 +75,7 @@ class ApplePodcastsIE(InfoExtractor):
                 series = try_get(inc, lambda x: x['attributes']['name'])
         series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
 
-        return {
+        info = [{
             'id': episode_id,
             'title': episode['name'],
             'url': clean_podcast_url(episode['assetUrl']),
@@ -82,4 +83,9 @@ class ApplePodcastsIE(InfoExtractor):
             'timestamp': parse_iso8601(episode.get('releaseDateTime')),
             'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
             'series': series,
-        }
+        }]
+        self._sort_formats(info)
+        info = info[0]
+        codecs = parse_codecs(info.get('ext', 'mp3'))
+        info.update(codecs)
+        return info

From 584715a803eef68f68fbbb8b72a022a699983197 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 9 Dec 2021 01:35:35 +0000
Subject: [PATCH 040/312] [applepodcasts] Extract default thumbnail image

---
 youtube_dl/extractor/applepodcasts.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/youtube_dl/extractor/applepodcasts.py b/youtube_dl/extractor/applepodcasts.py
index dd413a289..95e0f663c 100644
--- a/youtube_dl/extractor/applepodcasts.py
+++ b/youtube_dl/extractor/applepodcasts.py
@@ -27,6 +27,7 @@ class ApplePodcastsIE(InfoExtractor):
             'timestamp': 1593932400,
             'duration': 6454,
             'series': 'The Tim Dillon Show',
+            'thumbnail': 're:.+[.](png|jpe?g|webp)',
         }
     }, {
         'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
@@ -83,6 +84,7 @@ class ApplePodcastsIE(InfoExtractor):
             'timestamp': parse_iso8601(episode.get('releaseDateTime')),
             'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
             'series': series,
+            'thumbnail': self._og_search_thumbnail(webpage),
         }]
         self._sort_formats(info)
         info = info[0]

From 73e1ab6125eeea2b07942326cd2f1d6d9adff64e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 6 Dec 2021 19:26:33 +0000
Subject: [PATCH 041/312] [test:download] Only extract enough videos for
 playlist_mincount

---
 test/parameters.json  | 1 -
 test/test_download.py | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/parameters.json b/test/parameters.json
index 65fd54428..864c9d130 100644
--- a/test/parameters.json
+++ b/test/parameters.json
@@ -18,7 +18,6 @@
     "noprogress": false, 
     "outtmpl": "%(id)s.%(ext)s", 
     "password": null, 
-    "playlistend": -1, 
     "playliststart": 1, 
     "prefer_free_formats": false, 
     "quiet": false, 
diff --git a/test/test_download.py b/test/test_download.py
index ebe820dfc..8e43cfa12 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -121,6 +121,7 @@ def generator(test_case, tname):
         params['outtmpl'] = tname + '_' + params['outtmpl']
         if is_playlist and 'playlist' not in test_case:
             params.setdefault('extract_flat', 'in_playlist')
+            params.setdefault('playlistend', test_case.get('playlist_mincount'))
             params.setdefault('skip_download', True)
 
         ydl = YoutubeDL(params, auto_init=False)

From 91278f4b6b5600e9ce65826ec9e7e38e7dba5937 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 6 Dec 2021 20:52:21 +0000
Subject: [PATCH 042/312] [niconico] Back-port extractor from yt-dlp

Add Nico search extractors, fix extraction
---
 youtube_dl/extractor/extractors.py |   9 +-
 youtube_dl/extractor/niconico.py   | 646 +++++++++++++++++++++--------
 2 files changed, 477 insertions(+), 178 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 4e9954c6a..e70daf2b1 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -789,7 +789,14 @@ from .nick import (
     NickNightIE,
     NickRuIE,
 )
-from .niconico import NiconicoIE, NiconicoPlaylistIE
+from .niconico import (
+    NiconicoIE,
+    NiconicoPlaylistIE,
+    NiconicoUserIE,
+    NicovideoSearchIE,
+    NicovideoSearchDateIE,
+    NicovideoSearchURLIE,
+)
 from .ninecninemedia import NineCNineMediaIE
 from .ninegag import NineGagIE
 from .ninenow import NineNowIE
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
index a85fc3d5c..756ad0e25 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -2,25 +2,28 @@
 from __future__ import unicode_literals
 
 import datetime
-import functools
+import itertools
 import json
-import math
+import re
 
-from .common import InfoExtractor
+from .common import InfoExtractor, SearchInfoExtractor
+from ..postprocessor.ffmpeg import FFmpegPostProcessor
 from ..compat import (
     compat_parse_qs,
+    compat_str,
     compat_urllib_parse_urlparse,
 )
 from ..utils import (
-    determine_ext,
-    dict_get,
     ExtractorError,
+    dict_get,
     float_or_none,
-    InAdvancePagedList,
     int_or_none,
+    OnDemandPagedList,
     parse_duration,
     parse_iso8601,
+    PostProcessingError,
     remove_start,
+    str_or_none,
     try_get,
     unified_timestamp,
     urlencode_postdata,
@@ -34,7 +37,7 @@ class NiconicoIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'http://www.nicovideo.jp/watch/sm22312215',
-        'md5': 'd1a75c0823e2f629128c43e1212760f9',
+        'md5': 'a5bad06f1347452102953f323c69da34s',
         'info_dict': {
             'id': 'sm22312215',
             'ext': 'mp4',
@@ -162,6 +165,11 @@ class NiconicoIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
     _NETRC_MACHINE = 'niconico'
 
+    _API_HEADERS = {
+        'X-Frontend-ID': '6',
+        'X-Frontend-Version': '0'
+    }
+
     def _real_initialize(self):
         self._login()
 
@@ -191,37 +199,89 @@ class NiconicoIE(InfoExtractor):
             self._downloader.report_warning('unable to log in: bad username or password')
         return login_ok
 
-    def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
-        def yesno(boolean):
-            return 'yes' if boolean else 'no'
+    def _get_heartbeat_info(self, info_dict):
 
-        session_api_data = api_data['video']['dmcInfo']['session_api']
-        session_api_endpoint = session_api_data['urls'][0]
+        video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
 
-        format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
+        api_data = (
+            info_dict.get('_api_data')
+            or self._parse_json(
+                self._html_search_regex(
+                    'data-api-data="([^"]+)"',
+                    self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
+                    'API data', default='{}'),
+                video_id))
+
+        session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
+        session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
+
+        def ping():
+            status = try_get(
+                self._download_json(
+                    'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id,
+                    query={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])},
+                    note='Acquiring permission for downloading video',
+                    headers=self._API_HEADERS),
+                lambda x: x['meta']['status'])
+            if status != 200:
+                self.report_warning('Failed to acquire permission for playing video. The video may not download.')
+
+        yesno = lambda x: 'yes' if x else 'no'
+
+        # m3u8 (encryption)
+        if try_get(api_data, lambda x: x['media']['delivery']['encryption']) is not None:
+            protocol = 'm3u8'
+            encryption = self._parse_json(session_api_data['token'], video_id)['hls_encryption']
+            session_api_http_parameters = {
+                'parameters': {
+                    'hls_parameters': {
+                        'encryption': {
+                            encryption: {
+                                'encrypted_key': try_get(api_data, lambda x: x['media']['delivery']['encryption']['encryptedKey']),
+                                'key_uri': try_get(api_data, lambda x: x['media']['delivery']['encryption']['keyUri'])
+                            }
+                        },
+                        'transfer_preset': '',
+                        'use_ssl': yesno(session_api_endpoint['isSsl']),
+                        'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
+                        'segment_duration': 6000,
+                    }
+                }
+            }
+        # http
+        else:
+            protocol = 'http'
+            session_api_http_parameters = {
+                'parameters': {
+                    'http_output_download_parameters': {
+                        'use_ssl': yesno(session_api_endpoint['isSsl']),
+                        'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
+                    }
+                }
+            }
 
         session_response = self._download_json(
             session_api_endpoint['url'], video_id,
             query={'_format': 'json'},
             headers={'Content-Type': 'application/json'},
-            note='Downloading JSON metadata for %s' % format_id,
+            note='Downloading JSON metadata for %s' % info_dict['format_id'],
             data=json.dumps({
                 'session': {
                     'client_info': {
-                        'player_id': session_api_data['player_id'],
+                        'player_id': session_api_data.get('playerId'),
                     },
                     'content_auth': {
-                        'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]],
-                        'content_key_timeout': session_api_data['content_key_timeout'],
+                        'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
+                        'content_key_timeout': session_api_data.get('contentKeyTimeout'),
                         'service_id': 'nicovideo',
-                        'service_user_id': session_api_data['service_user_id']
+                        'service_user_id': session_api_data.get('serviceUserId')
                     },
-                    'content_id': session_api_data['content_id'],
+                    'content_id': session_api_data.get('contentId'),
                     'content_src_id_sets': [{
                         'content_src_ids': [{
                             'src_id_to_mux': {
-                                'audio_src_ids': [audio_quality['id']],
-                                'video_src_ids': [video_quality['id']],
+                                'audio_src_ids': [audio_src_id],
+                                'video_src_ids': [video_src_id],
                             }
                         }]
                     }],
@@ -229,52 +289,81 @@ class NiconicoIE(InfoExtractor):
                     'content_uri': '',
                     'keep_method': {
                         'heartbeat': {
-                            'lifetime': session_api_data['heartbeat_lifetime']
+                            'lifetime': session_api_data.get('heartbeatLifetime')
                         }
                     },
-                    'priority': session_api_data['priority'],
+                    'priority': session_api_data.get('priority'),
                     'protocol': {
                         'name': 'http',
                         'parameters': {
-                            'http_parameters': {
-                                'parameters': {
-                                    'http_output_download_parameters': {
-                                        'use_ssl': yesno(session_api_endpoint['is_ssl']),
-                                        'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
-                                    }
-                                }
-                            }
+                            'http_parameters': session_api_http_parameters
                         }
                     },
-                    'recipe_id': session_api_data['recipe_id'],
+                    'recipe_id': session_api_data.get('recipeId'),
                     'session_operation_auth': {
                         'session_operation_auth_by_signature': {
-                            'signature': session_api_data['signature'],
-                            'token': session_api_data['token'],
+                            'signature': session_api_data.get('signature'),
+                            'token': session_api_data.get('token'),
                         }
                     },
                     'timing_constraint': 'unlimited'
                 }
             }).encode())
 
-        resolution = video_quality.get('resolution', {})
+        info_dict['url'] = session_response['data']['session']['content_uri']
+        info_dict['protocol'] = protocol
+
+        # get heartbeat info
+        heartbeat_info_dict = {
+            'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
+            'data': json.dumps(session_response['data']),
+            # interval, convert milliseconds to seconds, then halve to make a buffer.
+            'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
+            'ping': ping
+        }
+
+        return info_dict, heartbeat_info_dict
+
+    def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
+        def parse_format_id(id_code):
+            mobj = re.match(r'''(?x)
+                    (?:archive_)?
+                    (?:(?P<codec>[^_]+)_)?
+                    (?:(?P<br>[\d]+)kbps_)?
+                    (?:(?P<res>[\d+]+)p_)?
+                ''', '%s_' % id_code)
+            return mobj.groupdict() if mobj else {}
+
+        protocol = 'niconico_dmc'
+        format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
+        vdict = parse_format_id(video_quality['id'])
+        adict = parse_format_id(audio_quality['id'])
+        resolution = try_get(video_quality, lambda x: x['metadata']['resolution'], dict) or {'height': vdict.get('res')}
+        vbr = try_get(video_quality, lambda x: x['metadata']['bitrate'], float)
 
         return {
-            'url': session_response['data']['session']['content_uri'],
+            'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']),
             'format_id': format_id,
+            'format_note': 'DMC %s' % try_get(video_quality, lambda x: x['metadata']['label'], compat_str),
             'ext': 'mp4',  # Session API are used in HTML5, which always serves mp4
-            'abr': float_or_none(audio_quality.get('bitrate'), 1000),
-            'vbr': float_or_none(video_quality.get('bitrate'), 1000),
-            'height': resolution.get('height'),
-            'width': resolution.get('width'),
+            'vcodec': vdict.get('codec'),
+            'acodec': adict.get('codec'),
+            'vbr': float_or_none(vbr, 1000) or float_or_none(vdict.get('br')),
+            'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')),
+            'height': int_or_none(resolution.get('height', vdict.get('res'))),
+            'width': int_or_none(resolution.get('width')),
+            'quality': -2 if 'low' in format_id else -1,  # Default quality value is -1
+            'protocol': protocol,
+            'http_headers': {
+                'Origin': 'https://www.nicovideo.jp',
+                'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
+            }
         }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        # Get video webpage. We are not actually interested in it for normal
-        # cases, but need the cookies in order to be able to download the
-        # info webpage
+        # Get video webpage for API data.
         webpage, handle = self._download_webpage_handle(
             'http://www.nicovideo.jp/watch/' + video_id, video_id)
         if video_id.startswith('so'):
@@ -284,86 +373,136 @@ class NiconicoIE(InfoExtractor):
             'data-api-data="([^"]+)"', webpage,
             'API data', default='{}'), video_id)
 
-        def _format_id_from_url(video_url):
-            return 'economy' if video_real_url.endswith('low') else 'normal'
+        def get_video_info_web(items):
+            return dict_get(api_data['video'], items)
 
-        try:
-            video_real_url = api_data['video']['smileInfo']['url']
-        except KeyError:  # Flash videos
-            # Get flv info
-            flv_info_webpage = self._download_webpage(
-                'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
-                video_id, 'Downloading flv info')
+        # Get video info
+        video_info_xml = self._download_xml(
+            'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
+            video_id, note='Downloading video info page')
 
-            flv_info = compat_parse_qs(flv_info_webpage)
-            if 'url' not in flv_info:
-                if 'deleted' in flv_info:
-                    raise ExtractorError('The video has been deleted.',
-                                         expected=True)
-                elif 'closed' in flv_info:
-                    raise ExtractorError('Niconico videos now require logging in',
-                                         expected=True)
-                elif 'error' in flv_info:
-                    raise ExtractorError('%s reports error: %s' % (
-                        self.IE_NAME, flv_info['error'][0]), expected=True)
-                else:
-                    raise ExtractorError('Unable to find video URL')
+        def get_video_info_xml(items):
+            if not isinstance(items, list):
+                items = [items]
+            for item in items:
+                ret = xpath_text(video_info_xml, './/' + item)
+                if ret:
+                    return ret
 
-            video_info_xml = self._download_xml(
-                'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
-                video_id, note='Downloading video info page')
+        if get_video_info_xml('error'):
+            error_code = get_video_info_xml('code')
 
-            def get_video_info(items):
-                if not isinstance(items, list):
-                    items = [items]
-                for item in items:
-                    ret = xpath_text(video_info_xml, './/' + item)
-                    if ret:
-                        return ret
+            if error_code == 'DELETED':
+                raise ExtractorError('The video has been deleted.',
+                                     expected=True)
+            elif error_code == 'NOT_FOUND':
+                raise ExtractorError('The video is not found.',
+                                     expected=True)
+            elif error_code == 'COMMUNITY':
+                self.to_screen('%s: The video is community members only.' % video_id)
+            else:
+                raise ExtractorError('%s reports error: %s' % (self.IE_NAME, error_code))
 
-            video_real_url = flv_info['url'][0]
+        # Start extracting video formats
+        formats = []
 
-            extension = get_video_info('movie_type')
-            if not extension:
-                extension = determine_ext(video_real_url)
+        # Get HTML5 videos info
+        quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie'])
+        if not quality_info:
+            raise ExtractorError('The video can\'t be downloaded', expected=True)
 
-            formats = [{
-                'url': video_real_url,
-                'ext': extension,
-                'format_id': _format_id_from_url(video_real_url),
-            }]
-        else:
-            formats = []
+        for audio_quality in quality_info.get('audios') or {}:
+            for video_quality in quality_info.get('videos') or {}:
+                if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
+                    continue
+                formats.append(self._extract_format_for_quality(
+                    api_data, video_id, audio_quality, video_quality))
 
-            dmc_info = api_data['video'].get('dmcInfo')
-            if dmc_info:  # "New" HTML5 videos
-                quality_info = dmc_info['quality']
-                for audio_quality in quality_info['audios']:
-                    for video_quality in quality_info['videos']:
-                        if not audio_quality['available'] or not video_quality['available']:
-                            continue
-                        formats.append(self._extract_format_for_quality(
-                            api_data, video_id, audio_quality, video_quality))
+        # Get flv/swf info
+        timestamp = None
+        video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
+        if video_real_url:
+            is_economy = video_real_url.endswith('low')
 
-                self._sort_formats(formats)
-            else:  # "Old" HTML5 videos
-                formats = [{
+            if is_economy:
+                self.report_warning('Site is currently in economy mode! You will only have access to lower quality streams')
+
+            # Invoking ffprobe to determine resolution
+            pp = FFmpegPostProcessor(self._downloader)
+            cookies = self._get_cookies('https://nicovideo.jp').output(header='', sep='; path=/; domain=nicovideo.jp;\n')
+
+            self.to_screen('%s: %s' % (video_id, 'Checking smile format with ffprobe'))
+
+            try:
+                metadata = pp.get_metadata_object(video_real_url, ['-cookies', cookies])
+            except PostProcessingError as err:
+                raise ExtractorError(err.msg, expected=True)
+
+            v_stream = a_stream = {}
+
+            # Some complex swf files doesn't have video stream (e.g. nm4809023)
+            for stream in metadata['streams']:
+                if stream['codec_type'] == 'video':
+                    v_stream = stream
+                elif stream['codec_type'] == 'audio':
+                    a_stream = stream
+
+            # Community restricted videos seem to have issues with the thumb API not returning anything at all
+            filesize = int(
+                (get_video_info_xml('size_high') if not is_economy else get_video_info_xml('size_low'))
+                or metadata['format']['size']
+            )
+            extension = (
+                get_video_info_xml('movie_type')
+                or 'mp4' if 'mp4' in metadata['format']['format_name'] else metadata['format']['format_name']
+            )
+
+            # 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'.
+            timestamp = (
+                parse_iso8601(get_video_info_web('first_retrieve'))
+                or unified_timestamp(get_video_info_web('postedDateTime'))
+            )
+            metadata_timestamp = (
+                parse_iso8601(try_get(v_stream, lambda x: x['tags']['creation_time']))
+                or timestamp if extension != 'mp4' else 0
+            )
+
+            # According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts
+            smile_threshold_timestamp = parse_iso8601('2016-12-08T00:00:00+09:00')
+
+            is_source = timestamp < smile_threshold_timestamp or metadata_timestamp > 0
+
+            # If movie file size is unstable, old server movie is not source movie.
+            if filesize > 1:
+                formats.append({
                     'url': video_real_url,
-                    'ext': 'mp4',
-                    'format_id': _format_id_from_url(video_real_url),
-                }]
+                    'format_id': 'smile' if not is_economy else 'smile_low',
+                    'format_note': 'SMILEVIDEO source' if not is_economy else 'SMILEVIDEO low quality',
+                    'ext': extension,
+                    'container': extension,
+                    'vcodec': v_stream.get('codec_name'),
+                    'acodec': a_stream.get('codec_name'),
+                    # Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209)
+                    'tbr': int_or_none(metadata['format'].get('bit_rate'), scale=1000),
+                    'vbr': int_or_none(v_stream.get('bit_rate'), scale=1000),
+                    'abr': int_or_none(a_stream.get('bit_rate'), scale=1000),
+                    'height': int_or_none(v_stream.get('height')),
+                    'width': int_or_none(v_stream.get('width')),
+                    'source_preference': 5 if not is_economy else -2,
+                    'quality': 5 if is_source and not is_economy else None,
+                    'filesize': filesize
+                })
 
-            def get_video_info(items):
-                return dict_get(api_data['video'], items)
+        self._sort_formats(formats)
 
         # Start extracting information
-        title = get_video_info('title')
-        if not title:
-            title = self._og_search_title(webpage, default=None)
-        if not title:
-            title = self._html_search_regex(
+        title = (
+            get_video_info_xml('title')  # prefer to get the untranslated original title
+            or get_video_info_web(['originalTitle', 'title'])
+            or self._og_search_title(webpage, default=None)
+            or self._html_search_regex(
                 r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
-                webpage, 'video title')
+                webpage, 'video title'))
 
         watch_api_data_string = self._html_search_regex(
             r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
@@ -372,14 +511,15 @@ class NiconicoIE(InfoExtractor):
         video_detail = watch_api_data.get('videoDetail', {})
 
         thumbnail = (
-            get_video_info(['thumbnail_url', 'thumbnailURL'])
+            self._html_search_regex(r'<meta property="og:image" content="([^"]+)">', webpage, 'thumbnail data', default=None)
+            or dict_get(  # choose highest from 720p to 240p
+                get_video_info_web('thumbnail'),
+                ['ogp', 'player', 'largeUrl', 'middleUrl', 'url'])
             or self._html_search_meta('image', webpage, 'thumbnail', default=None)
             or video_detail.get('thumbnail'))
 
-        description = get_video_info('description')
+        description = get_video_info_web('description')
 
-        timestamp = (parse_iso8601(get_video_info('first_retrieve'))
-                     or unified_timestamp(get_video_info('postedDateTime')))
         if not timestamp:
             match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
             if match:
@@ -388,19 +528,25 @@ class NiconicoIE(InfoExtractor):
             timestamp = parse_iso8601(
                 video_detail['postedAt'].replace('/', '-'),
                 delimiter=' ', timezone=datetime.timedelta(hours=9))
+        timestamp = timestamp or try_get(api_data, lambda x: parse_iso8601(x['video']['registeredAt']))
 
-        view_count = int_or_none(get_video_info(['view_counter', 'viewCount']))
+        view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount']))
         if not view_count:
             match = self._html_search_regex(
                 r'>Views: <strong[^>]*>([^<]+)</strong>',
                 webpage, 'view count', default=None)
             if match:
                 view_count = int_or_none(match.replace(',', ''))
-        view_count = view_count or video_detail.get('viewCount')
+        view_count = (
+            view_count
+            or video_detail.get('viewCount')
+            or try_get(api_data, lambda x: x['video']['count']['view']))
+
+        comment_count = (
+            int_or_none(get_video_info_web('comment_num'))
+            or video_detail.get('commentCount')
+            or try_get(api_data, lambda x: x['video']['count']['comment']))
 
-        comment_count = (int_or_none(get_video_info('comment_num'))
-                         or video_detail.get('commentCount')
-                         or try_get(api_data, lambda x: x['thread']['commentCount']))
         if not comment_count:
             match = self._html_search_regex(
                 r'>Comments: <strong[^>]*>([^<]+)</strong>',
@@ -409,22 +555,41 @@ class NiconicoIE(InfoExtractor):
                 comment_count = int_or_none(match.replace(',', ''))
 
         duration = (parse_duration(
-            get_video_info('length')
+            get_video_info_web('length')
             or self._html_search_meta(
                 'video:duration', webpage, 'video duration', default=None))
             or video_detail.get('length')
-            or get_video_info('duration'))
+            or get_video_info_web('duration'))
 
-        webpage_url = get_video_info('watch_url') or url
+        webpage_url = get_video_info_web('watch_url') or url
+
+        # for channel movie and community movie
+        channel_id = try_get(
+            api_data,
+            (lambda x: x['channel']['globalId'],
+             lambda x: x['community']['globalId']))
+        channel = try_get(
+            api_data,
+            (lambda x: x['channel']['name'],
+             lambda x: x['community']['name']))
 
         # Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
         # in the JSON, which will cause None to be returned instead of {}.
         owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
-        uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
-        uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
+        uploader_id = str_or_none(
+            get_video_info_web(['ch_id', 'user_id'])
+            or owner.get('id')
+            or channel_id
+        )
+        uploader = (
+            get_video_info_web(['ch_name', 'user_nickname'])
+            or owner.get('nickname')
+            or channel
+        )
 
         return {
             'id': video_id,
+            '_api_data': api_data,
             'title': title,
             'formats': formats,
             'thumbnail': thumbnail,
@@ -432,6 +597,8 @@ class NiconicoIE(InfoExtractor):
             'uploader': uploader,
             'timestamp': timestamp,
             'uploader_id': uploader_id,
+            'channel': channel,
+            'channel_id': channel_id,
             'view_count': view_count,
             'comment_count': comment_count,
             'duration': duration,
@@ -440,7 +607,7 @@ class NiconicoIE(InfoExtractor):
 
 
 class NiconicoPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.nicovideo.jp/mylist/27411728',
@@ -456,60 +623,185 @@ class NiconicoPlaylistIE(InfoExtractor):
         'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
         'only_matching': True,
     }]
-    _PAGE_SIZE = 100
 
-    def _call_api(self, list_id, resource, query):
-        return self._download_json(
-            'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
-            'Downloading %s JSON metatdata' % resource, query=query,
-            headers={'X-Frontend-Id': 6})['data']['mylist']
-
-    def _parse_owner(self, item):
-        owner = item.get('owner') or {}
-        if owner:
-            return {
-                'uploader': owner.get('name'),
-                'uploader_id': owner.get('id'),
-            }
-        return {}
-
-    def _fetch_page(self, list_id, page):
-        page += 1
-        items = self._call_api(list_id, 'page %d' % page, {
-            'page': page,
-            'pageSize': self._PAGE_SIZE,
-        })['items']
-        for item in items:
-            video = item.get('video') or {}
-            video_id = video.get('id')
-            if not video_id:
-                continue
-            count = video.get('count') or {}
-            get_count = lambda x: int_or_none(count.get(x))
-            info = {
-                '_type': 'url',
-                'id': video_id,
-                'title': video.get('title'),
-                'url': 'https://www.nicovideo.jp/watch/' + video_id,
-                'description': video.get('shortDescription'),
-                'duration': int_or_none(video.get('duration')),
-                'view_count': get_count('view'),
-                'comment_count': get_count('comment'),
-                'ie_key': NiconicoIE.ie_key(),
-            }
-            info.update(self._parse_owner(video))
-            yield info
+    _API_HEADERS = {
+        'X-Frontend-ID': '6',
+        'X-Frontend-Version': '0'
+    }
 
     def _real_extract(self, url):
         list_id = self._match_id(url)
-        mylist = self._call_api(list_id, 'list', {
-            'pageSize': 1,
-        })
-        entries = InAdvancePagedList(
-            functools.partial(self._fetch_page, list_id),
-            math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
-            self._PAGE_SIZE)
-        result = self.playlist_result(
-            entries, list_id, mylist.get('name'), mylist.get('description'))
-        result.update(self._parse_owner(mylist))
-        return result
+
+        def get_page_data(pagenum, pagesize):
+            return self._download_json(
+                'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
+                query={'page': 1 + pagenum, 'pageSize': pagesize},
+                headers=self._API_HEADERS).get('data').get('mylist')
+
+        data = get_page_data(0, 1)
+        title = data.get('name')
+        description = data.get('description')
+        uploader = data.get('owner').get('name')
+        uploader_id = data.get('owner').get('id')
+
+        def pagefunc(pagenum):
+            data = get_page_data(pagenum, 25)
+            return ({
+                '_type': 'url',
+                'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'),
+            } for item in data.get('items'))
+
+        return {
+            '_type': 'playlist',
+            'id': list_id,
+            'title': title,
+            'description': description,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'entries': OnDemandPagedList(pagefunc, 25),
+        }
+
+
+class NicovideoSearchBaseIE(InfoExtractor):
+    _MAX_RESULTS = float('inf')
+
+    def _entries(self, url, item_id, query=None, note='Downloading page %(page)s'):
+        query = query or {}
+        pages = [query['page']] if 'page' in query else itertools.count(1)
+        for page_num in pages:
+            query['page'] = str(page_num)
+            webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num})
+            results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.+?)(?=["\'])', webpage)
+            for item in results:
+                yield self.url_result('http://www.nicovideo.jp/watch/%s' % item, 'Niconico', item)
+            if not results:
+                break
+
+    def _get_n_results(self, query, n):
+        entries = self._entries(self._proto_relative_url('//www.nicovideo.jp/search/%s' % query), query)
+        if n < self._MAX_RESULTS:
+            entries = itertools.islice(entries, 0, n)
+        return self.playlist_result(entries, query, query)
+
+
+class NicovideoSearchIE(NicovideoSearchBaseIE, SearchInfoExtractor):
+    IE_DESC = 'Nico video search'
+    IE_NAME = 'nicovideo:search'
+    _SEARCH_KEY = 'nicosearch'
+
+    def _search_results(self, query):
+        return self._entries(
+            self._proto_relative_url('//www.nicovideo.jp/search/%s' % query), query)
+
+
+class NicovideoSearchURLIE(NicovideoSearchBaseIE):
+    IE_NAME = '%s_url' % NicovideoSearchIE.IE_NAME
+    IE_DESC = 'Nico video search URLs'
+    _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/search/(?P<id>[^?#&]+)?'
+    _TESTS = [{
+        'url': 'http://www.nicovideo.jp/search/sm9',
+        'info_dict': {
+            'id': 'sm9',
+            'title': 'sm9'
+        },
+        'playlist_mincount': 40,
+    }, {
+        'url': 'https://www.nicovideo.jp/search/sm9?sort=h&order=d&end=2020-12-31&start=2020-01-01',
+        'info_dict': {
+            'id': 'sm9',
+            'title': 'sm9'
+        },
+        'playlist_count': 31,
+    }]
+
+    def _real_extract(self, url):
+        query = self._match_id(url)
+        return self.playlist_result(self._entries(url, query), query, query)
+
+
+class NicovideoSearchDateIE(NicovideoSearchBaseIE, SearchInfoExtractor):
+    IE_DESC = 'Nico video search, newest first'
+    IE_NAME = '%s:date' % NicovideoSearchIE.IE_NAME
+    _SEARCH_KEY = 'nicosearchdate'
+
+    _TESTS = [{
+        'url': 'nicosearchdateall:a',
+        'info_dict': {
+            'id': 'a',
+            'title': 'a'
+        },
+        'playlist_mincount': 1610,
+    }]
+
+    _START_DATE = datetime.date(2007, 1, 1)
+    _RESULTS_PER_PAGE = 32
+    _MAX_PAGES = 50
+
+    def _entries(self, url, item_id, start_date=None, end_date=None):
+        start_date, end_date = start_date or self._START_DATE, end_date or datetime.datetime.now().date()
+
+        # If the last page has a full page of videos, we need to break down the query interval further
+        last_page_len = len(list(self._get_entries_for_date(
+            url, item_id, start_date, end_date, self._MAX_PAGES,
+            note='Checking number of videos from {0} to {1}'.format(start_date, end_date))))
+        if (last_page_len == self._RESULTS_PER_PAGE and start_date != end_date):
+            midpoint = start_date + ((end_date - start_date) // 2)
+            for entry in itertools.chain(
+                    iter(self._entries(url, item_id, midpoint, end_date)),
+                    iter(self._entries(url, item_id, start_date, midpoint))):
+                yield entry
+        else:
+            self.to_screen('{0}: Downloading results from {1} to {2}'.format(item_id, start_date, end_date))
+            for entry in iter(self._get_entries_for_date(
+                    url, item_id, start_date, end_date, note='    Downloading page %(page)s')):
+                yield entry
+
+    def _get_entries_for_date(self, url, item_id, start_date, end_date=None, page_num=None, note=None):
+        query = {
+            'start': compat_str(start_date),
+            'end': compat_str(end_date or start_date),
+            'sort': 'f',
+            'order': 'd',
+        }
+        if page_num:
+            query['page'] = compat_str(page_num)
+
+        for entry in iter(super(NicovideoSearchDateIE, self)._entries(url, item_id, query=query, note=note)):
+            yield entry
+
+
+class NiconicoUserIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
+    _TEST = {
+        'url': 'https://www.nicovideo.jp/user/419948',
+        'info_dict': {
+            'id': '419948',
+        },
+        'playlist_mincount': 101,
+    }
+    _API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
+    _PAGE_SIZE = 100
+
+    _API_HEADERS = {
+        'X-Frontend-ID': '6',
+        'X-Frontend-Version': '0'
+    }
+
+    def _entries(self, list_id):
+        total_count = 1
+        count = page_num = 0
+        while count < total_count:
+            json_parsed = self._download_json(
+                self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
+                headers=self._API_HEADERS,
+                note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
+            if not page_num:
+                total_count = int_or_none(json_parsed['data'].get('totalCount'))
+            for entry in json_parsed["data"]["items"]:
+                count += 1
+                yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id'])
+            page_num += 1
+
+    def _real_extract(self, url):
+        list_id = self._match_id(url)
+        return self.playlist_result(self._entries(list_id), list_id)

From 92d73ef3936ed6de9770f613fddf2260731becc9 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 7 Dec 2021 23:30:30 +0000
Subject: [PATCH 043/312] [niconico] Implement heartbeat for download

---
 youtube_dl/downloader/__init__.py | 25 ++++++++----
 youtube_dl/downloader/niconico.py | 66 +++++++++++++++++++++++++++++++
 youtube_dl/extractor/niconico.py  | 18 +++++++++
 3 files changed, 101 insertions(+), 8 deletions(-)
 create mode 100644 youtube_dl/downloader/niconico.py

diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py
index 2e485df9d..d8f2fa342 100644
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -1,22 +1,31 @@
 from __future__ import unicode_literals
 
+from ..utils import (
+    determine_protocol,
+)
+
+
+def get_suitable_downloader(info_dict, params={}):
+    info_dict['protocol'] = determine_protocol(info_dict)
+    info_copy = info_dict.copy()
+    return _get_suitable_downloader(info_copy, params)
+
+
+# Some of these require get_suitable_downloader
 from .common import FileDownloader
+from .dash import DashSegmentsFD
 from .f4m import F4mFD
 from .hls import HlsFD
 from .http import HttpFD
 from .rtmp import RtmpFD
-from .dash import DashSegmentsFD
 from .rtsp import RtspFD
 from .ism import IsmFD
+from .niconico import NiconicoDmcFD
 from .external import (
     get_external_downloader,
     FFmpegFD,
 )
 
-from ..utils import (
-    determine_protocol,
-)
-
 PROTOCOL_MAP = {
     'rtmp': RtmpFD,
     'm3u8_native': HlsFD,
@@ -26,13 +35,12 @@ PROTOCOL_MAP = {
     'f4m': F4mFD,
     'http_dash_segments': DashSegmentsFD,
     'ism': IsmFD,
+    'niconico_dmc': NiconicoDmcFD,
 }
 
 
-def get_suitable_downloader(info_dict, params={}):
+def _get_suitable_downloader(info_dict, params={}):
     """Get the downloader class that can handle the info dict."""
-    protocol = determine_protocol(info_dict)
-    info_dict['protocol'] = protocol
 
     # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
     #     return FFmpegFD
@@ -43,6 +51,7 @@ def get_suitable_downloader(info_dict, params={}):
         if ed.can_download(info_dict):
             return ed
 
+    protocol = info_dict['protocol']
     if protocol.startswith('m3u8') and info_dict.get('is_live'):
         return FFmpegFD
 
diff --git a/youtube_dl/downloader/niconico.py b/youtube_dl/downloader/niconico.py
new file mode 100644
index 000000000..6392c9989
--- /dev/null
+++ b/youtube_dl/downloader/niconico.py
@@ -0,0 +1,66 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+try:
+    import threading
+except ImportError:
+    threading = None
+
+from .common import FileDownloader
+from ..downloader import get_suitable_downloader
+from ..extractor.niconico import NiconicoIE
+from ..utils import sanitized_Request
+
+
+class NiconicoDmcFD(FileDownloader):
+    """ Downloading niconico douga from DMC with heartbeat """
+
+    FD_NAME = 'niconico_dmc'
+
+    def real_download(self, filename, info_dict):
+        self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
+
+        ie = NiconicoIE(self.ydl)
+        info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
+
+        fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)
+        for ph in self._progress_hooks:
+            fd.add_progress_hook(ph)
+
+        if not threading:
+            self.to_screen('[%s] Threading for Heartbeat not available' % self.FD_NAME)
+            return fd.real_download(filename, info_dict)
+
+        success = download_complete = False
+        timer = [None]
+        heartbeat_lock = threading.Lock()
+        heartbeat_url = heartbeat_info_dict['url']
+        heartbeat_data = heartbeat_info_dict['data'].encode()
+        heartbeat_interval = heartbeat_info_dict.get('interval', 30)
+
+        request = sanitized_Request(heartbeat_url, heartbeat_data)
+
+        def heartbeat():
+            try:
+                self.ydl.urlopen(request).read()
+            except Exception:
+                self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
+
+            with heartbeat_lock:
+                if not download_complete:
+                    timer[0] = threading.Timer(heartbeat_interval, heartbeat)
+                    timer[0].start()
+
+        heartbeat_info_dict['ping']()
+        self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
+        try:
+            heartbeat()
+            if type(fd).__name__ == 'HlsFD':
+                info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
+            success = fd.real_download(filename, info_dict)
+        finally:
+            if heartbeat_lock:
+                with heartbeat_lock:
+                    timer[0].cancel()
+                    download_complete = True
+            return success
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
index 756ad0e25..93f813968 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -160,6 +160,24 @@ class NiconicoIE(InfoExtractor):
     }, {
         'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
         'only_matching': True,
+    }, {
+        # DMC video with heartbeat
+        'url': 'https://www.nicovideo.jp/watch/sm34815188',
+        'md5': '9360c6e1f1519d7759e2fe8e1326ae83',
+        'info_dict': {
+            'id': 'sm34815188',
+            'ext': 'mp4',
+            'title': 'md5:aee93e9f3366db72f902f6cd5d389cb7',
+            'description': 'md5:7b9149fc7a00ab053cafaf5c19662704',
+            'thumbnail': r're:https?://.*',
+            'uploader': 'md5:2762e18fa74dbb40aa1ad27c6291ee32',
+            'uploader_id': '67449889',
+            'upload_date': '20190322',
+            'timestamp': int,  # timestamp is unstable
+            'duration': 1082.0,
+            'view_count': int,
+            'comment_count': int,
+        },
     }]
 
     _VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'

From 6d4932f02347bb1d0228b20798435930022bf316 Mon Sep 17 00:00:00 2001
From: df <fieldhouse@gmx.net>
Date: Sun, 18 Apr 2021 01:46:40 +0100
Subject: [PATCH 044/312] Try for timestamp, description from
 window.__INITIAL_DATA__ pages

---
 youtube_dl/extractor/bbc.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index 247d982ce..37d427a66 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -1205,7 +1205,10 @@ class BBCIE(BBCCoUkIE):
                 if name == 'media-experience':
                     parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
                 elif name == 'article':
-                    for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
+                    for block in (try_get(resp,
+                                          (lambda x: x['data']['blocks'],
+                                           lambda x: x['data']['content']['model']['blocks'],),
+                                          list) or []):
                         if block.get('type') != 'media':
                             continue
                         parse_media(block.get('model'))

From 58babe9af79215bd6bdf07da0a8ebb1d3650e00b Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 30 Nov 2021 05:15:33 +0000
Subject: [PATCH 045/312] Support __INITIAL_DATA__ with stringified JSON

Add test and fix test for bbcthreeConfig
---
 youtube_dl/extractor/bbc.py | 50 +++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 11 deletions(-)

diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index 37d427a66..088af9823 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -12,6 +12,7 @@ from ..compat import (
     compat_HTTPError,
     compat_parse_qs,
     compat_str,
+    compat_urllib_error,
     compat_urllib_parse_urlparse,
     compat_urlparse,
 )
@@ -395,9 +396,17 @@ class BBCCoUkIE(InfoExtractor):
                         formats.extend(self._extract_mpd_formats(
                             href, programme_id, mpd_id=format_id, fatal=False))
                     elif transfer_format == 'hls':
-                        formats.extend(self._extract_m3u8_formats(
-                            href, programme_id, ext='mp4', entry_protocol='m3u8_native',
-                            m3u8_id=format_id, fatal=False))
+                        # TODO: let expected_status be passed into _extract_xxx_formats() instead
+                        try:
+                            fmts = self._extract_m3u8_formats(
+                                href, programme_id, ext='mp4', entry_protocol='m3u8_native',
+                                m3u8_id=format_id, fatal=False)
+                        except ExtractorError as e:
+                            if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError)
+                                    and e.exc_info[1].code in (403, 404)):
+                                raise
+                            fmts = []
+                        formats.extend(fmts)
                     elif transfer_format == 'hds':
                         formats.extend(self._extract_f4m_formats(
                             href, programme_id, f4m_id=format_id, fatal=False))
@@ -775,21 +784,33 @@ class BBCIE(BBCCoUkIE):
             'timestamp': 1437785037,
             'upload_date': '20150725',
         },
+    }, {
+        # video with window.__INITIAL_DATA__ and value as JSON string
+        'url': 'https://www.bbc.com/news/av/world-europe-59468682',
+        'info_dict': {
+            'id': 'p0b71qth',
+            'ext': 'mp4',
+            'title': 'Why France is making this woman a national hero',
+            'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
+            'thumbnail': r're:https?://.+/.+\.jpg',
+            'timestamp': 1638230731,
+            'upload_date': '20211130',
+        },
     }, {
         # single video article embedded with data-media-vpid
         'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
         'only_matching': True,
     }, {
+        # bbcthreeConfig
         'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
         'info_dict': {
             'id': 'p06556y7',
             'ext': 'mp4',
-            'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
-            'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
+            'title': 'Things Not To Say to people that live on council estates',
+            'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
+            'duration': 360,
+            'thumbnail': r're:https?://.+/.+\.jpg',
         },
-        'params': {
-            'skip_download': True,
-        }
     }, {
         # window.__PRELOADED_STATE__
         'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
@@ -1162,9 +1183,16 @@ class BBCIE(BBCCoUkIE):
                 return self.playlist_result(
                     entries, playlist_id, playlist_title, playlist_description)
 
-        initial_data = self._parse_json(self._search_regex(
-            r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
-            'preload state', default='{}'), playlist_id, fatal=False)
+        initial_data = self._search_regex(
+            r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
+            'quoted preload state', default=None)
+        if initial_data is None:
+            initial_data = self._search_regex(
+                r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
+                'preload state', default={})
+        else:
+            initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
+        initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
         if initial_data:
             def parse_media(media):
                 if not media:

From c820a284a23438f065171b7e222024d01893a95f Mon Sep 17 00:00:00 2001
From: Abdullah Ibn Fulan <ibnfulan@tutanota.de>
Date: Tue, 17 Aug 2021 18:22:07 +0600
Subject: [PATCH 046/312] [extractor/audiomack] Updated URL regex, corrected
 invalid testcases, fixed bug

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/audiomack.py | 40 ++++++++++++++++---------------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py
index cc7771354..638eb4041 100644
--- a/youtube_dl/extractor/audiomack.py
+++ b/youtube_dl/extractor/audiomack.py
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class AudiomackIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
+    _VALID_URL = r'https?://(?:www\.)?audiomack\.com/(?:song/|(?=.+/song/))(?P<id>[\w/-]+)'
     IE_NAME = 'audiomack'
     _TESTS = [
         # hosted on audiomack
@@ -29,25 +29,27 @@ class AudiomackIE(InfoExtractor):
             }
         },
         # audiomack wrapper around soundcloud song
+        # Needs new test URL.
         {
             'add_ie': ['Soundcloud'],
             'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
-            'info_dict': {
-                'id': '258901379',
-                'ext': 'mp3',
-                'description': 'mamba day freestyle for the legend Kobe Bryant ',
-                'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
-                'uploader': 'ILOVEMAKONNEN',
-                'upload_date': '20160414',
-            }
+            'only_matching': True,
+            # 'info_dict': {
+                # 'id': '258901379',
+                # 'ext': 'mp3',
+                # 'description': 'mamba day freestyle for the legend Kobe Bryant ',
+                # 'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
+                # 'uploader': 'ILOVEMAKONNEN',
+                # 'upload_date': '20160414',
+            # }
         },
     ]
 
     def _real_extract(self, url):
-        # URLs end with [uploader name]/[uploader title]
+        # URLs end with [uploader name]/song/[uploader title]
         # this title is whatever the user types in, and is rarely
         # the proper song title.  Real metadata is in the api response
-        album_url_tag = self._match_id(url)
+        album_url_tag = self._match_id(url).replace('/song/', '/')
 
         # Request the extended version of the api for extra fields like artist and title
         api_response = self._download_json(
@@ -79,7 +81,7 @@ class AudiomackAlbumIE(InfoExtractor):
         # Standard album playlist
         {
             'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
-            'playlist_count': 15,
+            'playlist_count': 11,
             'info_dict':
             {
                 'id': '812251',
@@ -95,24 +97,24 @@ class AudiomackAlbumIE(InfoExtractor):
             },
             'playlist': [{
                 'info_dict': {
-                    'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)',
-                    'id': '837577',
+                    'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)',
+                    'id': '837580',
                     'ext': 'mp3',
                     'uploader': 'Lil Herb a.k.a. G Herbo',
                 }
             }],
             'params': {
-                'playliststart': 9,
-                'playlistend': 9,
+                'playliststart': 2,
+                'playlistend': 2,
             }
         }
     ]
 
     def _real_extract(self, url):
-        # URLs end with [uploader name]/[uploader title]
+        # URLs end with [uploader name]/album/[uploader title]
         # this title is whatever the user types in, and is rarely
         # the proper song title.  Real metadata is in the api response
-        album_url_tag = self._match_id(url)
+        album_url_tag = self._match_id(url).replace('/album/', '/')
         result = {'_type': 'playlist', 'entries': []}
         # There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
         # Therefore we don't know how many songs the album has and must infi-loop until failure
@@ -134,7 +136,7 @@ class AudiomackAlbumIE(InfoExtractor):
                 # Pull out the album metadata and add to result (if it exists)
                 for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
                     if apikey in api_response and resultkey not in result:
-                        result[resultkey] = api_response[apikey]
+                        result[resultkey] = compat_str(api_response[apikey])
                 song_id = url_basename(api_response['url']).rpartition('.')[0]
                 result['entries'].append({
                     'id': compat_str(api_response.get('id', song_id)),

From 16a3fe2ba6b4c86e60bca930253c81c8efdd676b Mon Sep 17 00:00:00 2001
From: Abdullah Ibn Fulan <54185653+abdullah-if@users.noreply.github.com>
Date: Tue, 17 Aug 2021 19:56:39 +0000
Subject: [PATCH 047/312] Updated Album URL regex

Mistakenly forgot to edit a line in last commit.

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/audiomack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py
index 638eb4041..4d1fbad1f 100644
--- a/youtube_dl/extractor/audiomack.py
+++ b/youtube_dl/extractor/audiomack.py
@@ -75,7 +75,7 @@ class AudiomackIE(InfoExtractor):
 
 
 class AudiomackAlbumIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)'
+    _VALID_URL = r'https?://(?:www\.)?audiomack\.com/(?:album/|(?=.+/album/))(?P<id>[\w/-]+)'
     IE_NAME = 'audiomack:album'
     _TESTS = [
         # Standard album playlist

From ddc080a562cce984ac4a86969f511b1ae59421bf Mon Sep 17 00:00:00 2001
From: df <fieldhouse@gmx.net>
Date: Mon, 18 Oct 2021 15:54:26 +0100
Subject: [PATCH 048/312] Add ArteTVCategoryIE to support category playlists

---
 youtube_dl/extractor/arte.py       | 47 ++++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |  1 +
 2 files changed, 48 insertions(+)

diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 03abdbfaf..5bfe57b10 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -12,6 +12,7 @@ from ..utils import (
     ExtractorError,
     int_or_none,
     qualities,
+    strip_or_none,
     try_get,
     unified_strdate,
     url_or_none,
@@ -252,3 +253,49 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
         title = collection.get('title')
         description = collection.get('shortDescription') or collection.get('teaserText')
         return self.playlist_result(entries, playlist_id, title, description)
+
+
+class ArteTVCategoryIE(ArteTVBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
+    _TESTS = [{
+        'url': 'https://www.arte.tv/en/videos/politics-and-society/',
+        'info_dict': {
+            'id': 'politics-and-society',
+            'title': 'Politics and society',
+            'description': 'Investigative documentary series, geopolitical analysis, and international commentary',
+        },
+        'playlist_mincount': 13,
+    },
+    ]
+
+    @classmethod
+    def suitable(cls, url):
+        return (
+            not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
+            and super(ArteTVCategoryIE, cls).suitable(url))
+
+    def _real_extract(self, url):
+        lang, playlist_id = re.match(self._VALID_URL, url).groups()
+        webpage = self._download_webpage(url, playlist_id)
+
+        items = []
+        for video in re.finditer(
+                r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
+                webpage):
+            video = video.group('url')
+            if video == url:
+                continue
+            if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
+                items.append(video)
+
+        if items:
+            title = (self._og_search_title(webpage, default=None)
+                     or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
+            title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)
+
+            result = self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title)
+            if result:
+                description = self._og_search_description(webpage, default=None)
+                if description:
+                    result['description'] = description
+                return result
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index e70daf2b1..50b7cb4a0 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -71,6 +71,7 @@ from .arte import (
     ArteTVIE,
     ArteTVEmbedIE,
     ArteTVPlaylistIE,
+    ArteTVCategoryIE,
 )
 from .arnes import ArnesIE
 from .asiancrush import (

From 734dfbb4e3ad4ee4d98609dc902ac864b94033a4 Mon Sep 17 00:00:00 2001
From: Seonghyeon Cho <seonghyeoncho96@gmail.com>
Date: Wed, 13 Oct 2021 20:27:40 +0900
Subject: [PATCH 049/312] Remove redundant assigning `format_id`

---
 youtube_dl/extractor/uol.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/youtube_dl/extractor/uol.py b/youtube_dl/extractor/uol.py
index 628adf219..59f8e5dc3 100644
--- a/youtube_dl/extractor/uol.py
+++ b/youtube_dl/extractor/uol.py
@@ -95,7 +95,6 @@ class UOLIE(InfoExtractor):
                 if v:
                     query[k] = v
             f_url = update_url_query(f_url, query)
-            format_id = format_id
             if format_id == 'HLS':
                 m3u8_formats = self._extract_m3u8_formats(
                     f_url, media_id, 'mp4', 'm3u8_native',

From 47b0c8697a39bbd64d5b922f81ad74ee4d2a3136 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 7 Feb 2022 13:28:21 +0000
Subject: [PATCH 050/312] [ARD] Back-port subtitle extraction from yt-dlp PR
 2409

Authored by: fstirlitz
Fixes #30543
Closes #17766 (thanks ngdio)
---
 youtube_dl/extractor/ard.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index d45a9fe52..a5b1f54d5 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -332,9 +332,24 @@ class ARDIE(InfoExtractor):
             formats.append(f)
         self._sort_formats(formats)
 
+        _SUB_FORMATS = (
+            ('./dataTimedText', 'ttml'),
+            ('./dataTimedTextNoOffset', 'ttml'),
+            ('./dataTimedTextVtt', 'vtt'),
+        )
+
+        subtitles = {}
+        for subsel, subext in _SUB_FORMATS:
+            for node in video_node.findall(subsel):
+                subtitles.setdefault('de', []).append({
+                    'url': node.attrib['url'],
+                    'ext': subext,
+                })
+
         return {
             'id': xpath_text(video_node, './videoId', default=display_id),
             'formats': formats,
+            'subtitles': subtitles,
             'display_id': display_id,
             'title': video_node.find('./title').text,
             'duration': parse_duration(video_node.find('./duration').text),

From 825d3426c56aabfc91aea139f2e6e0589f8096bc Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 9 Feb 2022 02:40:34 +0000
Subject: [PATCH 051/312] [Nuvid] Use site JSON for video details (#29332)

Back-port yt-dlp PR 1022 onto PR #17890 and update

Video details aren't in the original HTML now but populated by async JS

Co-authored by: u-spec-png
Co-authored by: vidaritos
---
 youtube_dl/extractor/nuvid.py | 120 +++++++++++++++++++++++-----------
 1 file changed, 81 insertions(+), 39 deletions(-)

diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py
index ab6bfcd7f..f6c94dd77 100644
--- a/youtube_dl/extractor/nuvid.py
+++ b/youtube_dl/extractor/nuvid.py
@@ -1,71 +1,113 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     parse_duration,
+    int_or_none,
+    try_get,
+    url_or_none,
 )
 
+import re
+
 
 class NuvidIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'http://m.nuvid.com/video/1310741/',
-        'md5': 'eab207b7ac4fccfb4e23c86201f11277',
+    _TESTS = [{
+        'url': 'https://www.nuvid.com/video/6513023/italian-babe',
+        'md5': '772d2f8288f3d3c5c45f7a41761c7844',
         'info_dict': {
-            'id': '1310741',
+            'id': '6513023',
             'ext': 'mp4',
-            'title': 'Horny babes show their awesome bodeis and',
-            'duration': 129,
+            'title': 'italian babe',
+            'format_id': '360p',
+            'duration': 321.0,
             'age_limit': 18,
+            'thumbnail': r're:https?://.+\.jpg',
+            'thumbnails': list,
         }
-    }
+    }, {
+        'url': 'https://m.nuvid.com/video/6523263',
+        'md5': 'ebd22ce8e47e1d9a4d0756a15c67da52',
+        'info_dict': {
+            'id': '6523263',
+            'ext': 'mp4',
+            'title': 'Slut brunette college student anal dorm',
+            'format_id': '720p',
+            'duration': 421.0,
+            'age_limit': 18,
+            'thumbnail': r're:https?://.+\.jpg',
+            'thumbnails': list,
+        }
+    }, {
+        'url': 'http://m.nuvid.com/video/6415801/',
+        'md5': '638d5ececb138d5753593f751ae3f697',
+        'info_dict': {
+            'id': '6415801',
+            'ext': 'mp4',
+            'title': 'My best friend wanted to fuck my wife for a long time',
+            'format_id': '720p',
+            'duration': 1882,
+            'age_limit': 18,
+            'thumbnail': r're:https?://.+\.jpg',
+            'thumbnails': list,
+        }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        page_url = 'http://m.nuvid.com/video/%s' % video_id
+        qualities = {
+            'lq': '360p',
+            'hq': '720p',
+        }
+
+        json_url = 'https://www.nuvid.com/player_config_json/?vid={video_id}&aid=0&domain_id=0&embed=0&check_speed=0'.format(**locals())
+        video_data = self._download_json(
+            json_url, video_id, headers={
+                'Accept': 'application/json, text/javascript, */*; q = 0.01',
+                'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
+            }) or {}
+
+        # nice to have, not required
         webpage = self._download_webpage(
-            page_url, video_id, 'Downloading video page')
-        # When dwnld_speed exists and has a value larger than the MP4 file's
-        # bitrate, Nuvid returns the MP4 URL
-        # It's unit is 100bytes/millisecond, see mobile-nuvid-min.js for the algorithm
-        self._set_cookie('nuvid.com', 'dwnld_speed', '10.0')
-        mp4_webpage = self._download_webpage(
-            page_url, video_id, 'Downloading video page for MP4 format')
+            'http://m.nuvid.com/video/%s' % (video_id, ),
+            video_id, 'Downloading video page', fatal=False) or ''
+
+        title = (
+            try_get(video_data, lambda x: x['title'], compat_str)
+            or self._html_search_regex(
+                (r'''<span\s[^>]*?\btitle\s*=\s*(?P<q>"|'|\b)(?P<title>[^"]+)(?P=q)\s*>''',
+                 r'''<div\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)thumb-holder video(?P=q)>\s*<h5\b[^>]*>(?P<title>[^<]+)</h5''',
+                 r'''<span\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)title_thumb(?P=q)>(?P<title>[^<]+)</span'''),
+                webpage, 'title', group='title')).strip()
 
-        html5_video_re = r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']',
-        video_url = self._html_search_regex(html5_video_re, webpage, video_id)
-        mp4_video_url = self._html_search_regex(html5_video_re, mp4_webpage, video_id)
         formats = [{
-            'url': video_url,
-        }]
-        if mp4_video_url != video_url:
-            formats.append({
-                'url': mp4_video_url,
-            })
+            'url': source,
+            'format_id': qualities.get(quality),
+            'height': int_or_none(qualities.get(quality)[:-1]),
+        } for quality, source in video_data.get('files').items() if source]
 
-        title = self._html_search_regex(
-            [r'<span title="([^"]+)">',
-             r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>',
-             r'<span[^>]+class="title_thumb">([^<]+)</span>'], webpage, 'title').strip()
+        self._check_formats(formats, video_id)
+        self._sort_formats(formats)
+
+        duration = parse_duration(video_data.get('duration') or video_data.get('duration_format'))
         thumbnails = [
-            {
-                'url': thumb_url,
-            } for thumb_url in re.findall(r'<img src="([^"]+)" alt="" />', webpage)
+            {'url': thumb_url, }
+            for thumb_url in (
+                url_or_none(src) for src in re.findall(
+                    r'<div\s+class\s*=\s*"video-tmb-wrap"\s*>\s*<img\s+src\s*=\s*"([^"]+)"\s*/>',
+                    webpage))
         ]
-        thumbnail = thumbnails[0]['url'] if thumbnails else None
-        duration = parse_duration(self._html_search_regex(
-            [r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})',
-             r'<span[^>]+class="view_time">([^<]+)</span>'], webpage, 'duration', fatal=False))
 
         return {
             'id': video_id,
+            'formats': formats,
             'title': title,
+            'thumbnail': url_or_none(video_data.get('poster')),
             'thumbnails': thumbnails,
-            'thumbnail': thumbnail,
             'duration': duration,
             'age_limit': 18,
-            'formats': formats,
         }

From 266b6ef18520f8de60fa143e154e4b12be12afb7 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 9 Feb 2022 21:21:59 +0000
Subject: [PATCH 052/312] [BBC] Also allow PID with leading 'l' (live?)

---
 youtube_dl/extractor/bbc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index 088af9823..378b52f4f 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -40,7 +40,7 @@ from ..utils import (
 class BBCCoUkIE(InfoExtractor):
     IE_NAME = 'bbc.co.uk'
     IE_DESC = 'BBC iPlayer'
-    _ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
+    _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
     _VALID_URL = r'''(?x)
                     https?://
                         (?:www\.)?bbc\.co\.uk/

From 8ff961d10faed848009f9e2ec03fa390b486694d Mon Sep 17 00:00:00 2001
From: kikuyan <kikuyan@users.noreply.github.com>
Date: Thu, 23 Dec 2021 11:40:45 +0900
Subject: [PATCH 053/312] [extractor/videa] fix extraction in Py2

Fixes #30416
---
 youtube_dl/extractor/videa.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py
index ab2c15cde..bdb95891d 100644
--- a/youtube_dl/extractor/videa.py
+++ b/youtube_dl/extractor/videa.py
@@ -91,7 +91,7 @@ class VideaIE(InfoExtractor):
             k = S[(S[i] + S[j]) % 256]
             res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m]))
 
-        return res.decode()
+        return res.decode('utf-8')
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -121,7 +121,7 @@ class VideaIE(InfoExtractor):
                 compat_b64decode(b64_info), key), video_id)
 
         video = xpath_element(info, './video', 'video')
-        if not video:
+        if video is None:
             raise ExtractorError(xpath_element(
                 info, './error', fatal=True), expected=True)
         sources = xpath_element(

From 74f8cc48afa59e1a125f939c060b21654d29789c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 9 Feb 2022 04:37:28 +0000
Subject: [PATCH 054/312] [extractor/videa] Back-port from yt-dlp PRs 463+1028

Authored by: nyuszika7h
---
 youtube_dl/extractor/videa.py | 53 ++++++++++++++++++++++++++---------
 1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py
index bdb95891d..4589e78a1 100644
--- a/youtube_dl/extractor/videa.py
+++ b/youtube_dl/extractor/videa.py
@@ -12,6 +12,7 @@ from ..utils import (
     mimetype2ext,
     parse_codecs,
     update_url_query,
+    urljoin,
     xpath_element,
     xpath_text,
 )
@@ -19,6 +20,7 @@ from ..compat import (
     compat_b64decode,
     compat_ord,
     compat_struct_pack,
+    compat_urlparse,
 )
 
 
@@ -45,10 +47,24 @@ class VideaIE(InfoExtractor):
         },
     }, {
         'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
-        'only_matching': True,
+        'md5': 'd57ccd8812c7fd491d33b1eab8c99975',
+        'info_dict': {
+            'id': 'jAHDWfWSJH5XuFhH',
+            'ext': 'mp4',
+            'title': 'Supercars előzés',
+            'thumbnail': r're:^https?://.*',
+            'duration': 64,
+        },
     }, {
         'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
-        'only_matching': True,
+        'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
+        'info_dict': {
+            'id': '8YfIAjxwWGwT8HVQ',
+            'ext': 'mp4',
+            'title': 'Az őrült kígyász 285 kígyót enged szabadon',
+            'thumbnail': r're:^https?://.*',
+            'duration': 21,
+        },
     }, {
         'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
         'only_matching': True,
@@ -95,9 +111,16 @@ class VideaIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        query = {'v': video_id}
-        player_page = self._download_webpage(
-            'https://videa.hu/player', video_id, query=query)
+        video_page = self._download_webpage(url, video_id)
+
+        if 'videa.hu/player' in url:
+            player_url = url
+            player_page = video_page
+        else:
+            player_url = self._search_regex(
+                r'<iframe.*?src="(/player\?[^"]+)"', video_page, 'player url')
+            player_url = urljoin(url, player_url)
+            player_page = self._download_webpage(player_url, video_id)
 
         nonce = self._search_regex(
             r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
@@ -107,6 +130,7 @@ class VideaIE(InfoExtractor):
         for i in range(0, 32):
             result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
 
+        query = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query)
         random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
         query['_s'] = random_seed
         query['_t'] = result[:16]
@@ -127,7 +151,7 @@ class VideaIE(InfoExtractor):
         sources = xpath_element(
             info, './video_sources', 'sources', fatal=True)
         hash_values = xpath_element(
-            info, './hash_values', 'hash values', fatal=True)
+            info, './hash_values', 'hash values', fatal=False)
 
         title = xpath_text(video, './title', fatal=True)
 
@@ -136,15 +160,16 @@ class VideaIE(InfoExtractor):
             source_url = source.text
             source_name = source.get('name')
             source_exp = source.get('exp')
-            if not (source_url and source_name and source_exp):
+            if not (source_url and source_name):
                 continue
-            hash_value = xpath_text(hash_values, 'hash_value_' + source_name)
-            if not hash_value:
-                continue
-            source_url = update_url_query(source_url, {
-                'md5': hash_value,
-                'expires': source_exp,
-            })
+            hash_value = (
+                xpath_text(hash_values, 'hash_value_' + source_name)
+                if hash_values is not None else None)
+            if hash_value and source_exp:
+                source_url = update_url_query(source_url, {
+                    'md5': hash_value,
+                    'expires': source_exp,
+                })
             f = parse_codecs(source.get('codecs'))
             f.update({
                 'url': self._proto_relative_url(source_url),

From 29f7bfc4d7a80cecd67c19c25134481fbba6e175 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20Van=C4=9Bk?= <arkamar@atlas.cz>
Date: Tue, 11 Jan 2022 17:56:18 +0100
Subject: [PATCH 055/312] [streamcz] cherry-pick from yt-dlp

Cherry-picked-from: 7d449fff5346 ("[streamcz] Fix extractor (#1616)")
---
 youtube_dl/extractor/streamcz.py | 157 ++++++++++++++++---------------
 1 file changed, 80 insertions(+), 77 deletions(-)

diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py
index 58e0b4c80..0191c77de 100644
--- a/youtube_dl/extractor/streamcz.py
+++ b/youtube_dl/extractor/streamcz.py
@@ -1,105 +1,108 @@
 # coding: utf-8
-from __future__ import unicode_literals
-
-import hashlib
-import time
+import json
 
 from .common import InfoExtractor
 from ..utils import (
+    float_or_none,
     int_or_none,
-    sanitized_Request,
+    parse_codecs,
+    traverse_obj,
+    urljoin,
 )
 
 
-def _get_api_key(api_path):
-    if api_path.endswith('?'):
-        api_path = api_path[:-1]
-
-    api_key = 'fb5f58a820353bd7095de526253c14fd'
-    a = '{0:}{1:}{2:}'.format(api_key, api_path, int(round(time.time() / 24 / 3600)))
-    return hashlib.md5(a.encode('ascii')).hexdigest()
-
-
 class StreamCZIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)'
-    _API_URL = 'http://www.stream.cz/API'
-
+    _VALID_URL = r'https?://(?:www\.)?(?:stream|televizeseznam)\.cz/[^?#]+/(?P<display_id>[^?#]+)-(?P<id>[0-9]+)'
     _TESTS = [{
-        'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
-        'md5': '934bb6a6d220d99c010783c9719960d5',
+        'url': 'https://www.televizeseznam.cz/video/lajna/buh-57953890',
+        'md5': '40c41ade1464a390a0b447e333df4239',
         'info_dict': {
-            'id': '765767',
+            'id': '57953890',
             'ext': 'mp4',
-            'title': 'Peklo na talíři: Éčka pro děti',
-            'description': 'Taška s grónskou pomazánkou a další pekelnosti ZDE',
-            'thumbnail': 're:^http://im.stream.cz/episode/52961d7e19d423f8f06f0100',
-            'duration': 256,
-        },
+            'title': 'Bůh',
+            'display_id': 'buh',
+            'description': 'md5:8f5f09b9b7bc67df910486cdd88f7165',
+        }
     }, {
-        'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka',
-        'md5': '849a88c1e1ca47d41403c2ba5e59e261',
+        'url': 'https://www.stream.cz/tajemno/znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili-64147267',
+        'md5': '3ee4d0be040e8f4a543e67e509d55e3f',
         'info_dict': {
-            'id': '10002447',
+            'id': '64147267',
             'ext': 'mp4',
-            'title': 'Kancelář Blaník: Tři roky pro Mazánka',
-            'description': 'md5:3862a00ba7bf0b3e44806b544032c859',
-            'thumbnail': 're:^http://im.stream.cz/episode/537f838c50c11f8d21320000',
-            'duration': 368,
-        },
+            'title': 'Zničehonic jim skrz střechu prolítnul záhadný předmět. Badatelé vše objasnili',
+            'display_id': 'znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili',
+            'description': 'md5:1dcb5e010eb697dedc5942f76c5b3744',
+        }
     }]
 
+    def _extract_formats(self, spl_url, video):
+        for ext, pref, streams in (
+                ('ts', -1, traverse_obj(video, ('http_stream', 'qualities'))),
+                ('mp4', 1, video.get('mp4'))):
+            for format_id, stream in streams.items():
+                if not stream.get('url'):
+                    continue
+                yield {
+                    'format_id': f'{format_id}-{ext}',
+                    'ext': ext,
+                    'source_preference': pref,
+                    'url': urljoin(spl_url, stream['url']),
+                    'tbr': float_or_none(stream.get('bandwidth'), scale=1000),
+                    'duration': float_or_none(stream.get('duration'), scale=1000),
+                    'width': traverse_obj(stream, ('resolution', 0)),
+                    'height': traverse_obj(stream, ('resolution', 1)) or int_or_none(format_id.replace('p', '')),
+                    **parse_codecs(stream.get('codec')),
+                }
+
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        api_path = '/episode/%s' % video_id
+        display_id, video_id = self._match_valid_url(url).groups()
 
-        req = sanitized_Request(self._API_URL + api_path)
-        req.add_header('Api-Password', _get_api_key(api_path))
-        data = self._download_json(req, video_id)
+        data = self._download_json(
+            'https://www.televizeseznam.cz/api/graphql', video_id, 'Downloading GraphQL result',
+            data=json.dumps({
+                'variables': {'urlName': video_id},
+                'query': '''
+                    query LoadEpisode($urlName : String){ episode(urlName: $urlName){ ...VideoDetailFragmentOnEpisode } }
+                    fragment VideoDetailFragmentOnEpisode on Episode {
+                        id
+                        spl
+                        urlName
+                        name
+                        perex
+                        duration
+                        views
+                    }'''
+            }).encode('utf-8'),
+            headers={'Content-Type': 'application/json;charset=UTF-8'}
+        )['data']['episode']
 
-        formats = []
-        for quality, video in enumerate(data['video_qualities']):
-            for f in video['formats']:
-                typ = f['type'].partition('/')[2]
-                qlabel = video.get('quality_label')
-                formats.append({
-                    'format_note': '%s-%s' % (qlabel, typ) if qlabel else typ,
-                    'format_id': '%s-%s' % (typ, f['quality']),
-                    'url': f['source'],
-                    'height': int_or_none(f['quality'].rstrip('p')),
-                    'quality': quality,
-                })
-        self._sort_formats(formats)
-
-        image = data.get('image')
-        if image:
-            thumbnail = self._proto_relative_url(
-                image.replace('{width}', '1240').replace('{height}', '697'),
-                scheme='http:',
-            )
-        else:
-            thumbnail = None
-
-        stream = data.get('_embedded', {}).get('stream:show', {}).get('name')
-        if stream:
-            title = '%s: %s' % (stream, data['name'])
-        else:
-            title = data['name']
+        spl_url = data['spl'] + 'spl2,3'
+        metadata = self._download_json(spl_url, video_id, 'Downloading playlist')
+        if 'Location' in metadata and 'data' not in metadata:
+            spl_url = metadata['Location']
+            metadata = self._download_json(spl_url, video_id, 'Downloading redirected playlist')
+        video = metadata['data']
 
         subtitles = {}
-        srt_url = data.get('subtitles_srt')
-        if srt_url:
-            subtitles['cs'] = [{
-                'ext': 'srt',
-                'url': srt_url,
-            }]
+        for subs in video.get('subtitles', {}).values():
+            if not subs.get('language'):
+                continue
+            for ext, sub_url in subs.get('urls').items():
+                subtitles.setdefault(subs['language'], []).append({
+                    'ext': ext,
+                    'url': urljoin(spl_url, sub_url)
+                })
+
+        formats = list(self._extract_formats(spl_url, video))
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'formats': formats,
-            'description': data.get('web_site_text'),
-            'duration': int_or_none(data.get('duration')),
+            'display_id': display_id,
+            'title': data.get('name'),
+            'description': data.get('perex'),
+            'duration': float_or_none(data.get('duration')),
             'view_count': int_or_none(data.get('views')),
+            'formats': formats,
             'subtitles': subtitles,
         }

From 8088ce036ac4ce282f8f864c6b5f4f3987647221 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20Van=C4=9Bk?= <arkamar@atlas.cz>
Date: Sat, 12 Feb 2022 11:55:13 +0100
Subject: [PATCH 056/312] revert: use _match_valid_url function

---
 youtube_dl/extractor/streamcz.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py
index 0191c77de..998342e93 100644
--- a/youtube_dl/extractor/streamcz.py
+++ b/youtube_dl/extractor/streamcz.py
@@ -1,5 +1,6 @@
 # coding: utf-8
 import json
+import re
 
 from .common import InfoExtractor
 from ..utils import (
@@ -55,7 +56,7 @@ class StreamCZIE(InfoExtractor):
                 }
 
     def _real_extract(self, url):
-        display_id, video_id = self._match_valid_url(url).groups()
+        display_id, video_id = re.match(self._VALID_URL, url).groups()
 
         data = self._download_json(
             'https://www.televizeseznam.cz/api/graphql', video_id, 'Downloading GraphQL result',

From b1297308fb7b423a60c3a28c74ac014d7b385a2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20Van=C4=9Bk?= <arkamar@atlas.cz>
Date: Sat, 12 Feb 2022 12:28:30 +0100
Subject: [PATCH 057/312] avoid traverse_obj function

---
 youtube_dl/extractor/streamcz.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py
index 998342e93..fbdc44505 100644
--- a/youtube_dl/extractor/streamcz.py
+++ b/youtube_dl/extractor/streamcz.py
@@ -7,7 +7,6 @@ from ..utils import (
     float_or_none,
     int_or_none,
     parse_codecs,
-    traverse_obj,
     urljoin,
 )
 
@@ -38,7 +37,7 @@ class StreamCZIE(InfoExtractor):
 
     def _extract_formats(self, spl_url, video):
         for ext, pref, streams in (
-                ('ts', -1, traverse_obj(video, ('http_stream', 'qualities'))),
+                ('ts', -1, video.get('http_stream', {}).get('qualities', {})),
                 ('mp4', 1, video.get('mp4'))):
             for format_id, stream in streams.items():
                 if not stream.get('url'):
@@ -50,8 +49,8 @@ class StreamCZIE(InfoExtractor):
                     'url': urljoin(spl_url, stream['url']),
                     'tbr': float_or_none(stream.get('bandwidth'), scale=1000),
                     'duration': float_or_none(stream.get('duration'), scale=1000),
-                    'width': traverse_obj(stream, ('resolution', 0)),
-                    'height': traverse_obj(stream, ('resolution', 1)) or int_or_none(format_id.replace('p', '')),
+                    'width': stream.get('resolution', 2 * [0])[0] or None,
+                    'height': stream.get('resolution', 2 * [0])[1] or int_or_none(format_id.replace('p', '')),
                     **parse_codecs(stream.get('codec')),
                 }
 

From d02064218be76eba6350a13ccbbc473b1b439570 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20Van=C4=9Bk?= <arkamar@atlas.cz>
Date: Sat, 12 Feb 2022 12:30:29 +0100
Subject: [PATCH 058/312] do not use f-strings

---
 youtube_dl/extractor/streamcz.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py
index fbdc44505..d1736c023 100644
--- a/youtube_dl/extractor/streamcz.py
+++ b/youtube_dl/extractor/streamcz.py
@@ -43,7 +43,7 @@ class StreamCZIE(InfoExtractor):
                 if not stream.get('url'):
                     continue
                 yield {
-                    'format_id': f'{format_id}-{ext}',
+                    'format_id': '{}-{}'.format(format_id, ext),
                     'ext': ext,
                     'source_preference': pref,
                     'url': urljoin(spl_url, stream['url']),

From d8adca1b664fceb07f2b28b55c7e1855407296ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20Van=C4=9Bk?= <arkamar@atlas.cz>
Date: Sat, 12 Feb 2022 13:13:20 +0100
Subject: [PATCH 059/312] [streamcz] test fixes and one additional test

---
 youtube_dl/extractor/streamcz.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py
index d1736c023..60e770448 100644
--- a/youtube_dl/extractor/streamcz.py
+++ b/youtube_dl/extractor/streamcz.py
@@ -22,6 +22,20 @@ class StreamCZIE(InfoExtractor):
             'title': 'Bůh',
             'display_id': 'buh',
             'description': 'md5:8f5f09b9b7bc67df910486cdd88f7165',
+            'duration': 1369.6,
+            'view_count': int,
+        }
+    }, {
+        'url': 'https://www.stream.cz/kdo-to-mluvi/kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna-64087937',
+        'md5': '41fd358000086a1ccdb068c77809b158',
+        'info_dict': {
+            'id': '64087937',
+            'ext': 'mp4',
+            'title': 'Kdo to mluví? Velké odhalení přináší nový pořad už od 25. srpna',
+            'display_id': 'kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna',
+            'description': 'md5:97a811000a6460266029d6c1c2ebcd59',
+            'duration': 50.2,
+            'view_count': int,
         }
     }, {
         'url': 'https://www.stream.cz/tajemno/znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili-64147267',
@@ -31,7 +45,9 @@ class StreamCZIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Zničehonic jim skrz střechu prolítnul záhadný předmět. Badatelé vše objasnili',
             'display_id': 'znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili',
-            'description': 'md5:1dcb5e010eb697dedc5942f76c5b3744',
+            'description': 'md5:4b8ada6718d34bb011c4e04ca4bc19bf',
+            'duration': 442.84,
+            'view_count': int,
         }
     }]
 

From 85bf26c1d01f94b83476703e5c70022f01164ccf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20Van=C4=9Bk?= <arkamar@atlas.cz>
Date: Sat, 12 Feb 2022 15:02:08 +0100
Subject: [PATCH 060/312] resolve problem with unpacking operator for <py3.5

---
 youtube_dl/extractor/streamcz.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py
index 60e770448..179bdcaba 100644
--- a/youtube_dl/extractor/streamcz.py
+++ b/youtube_dl/extractor/streamcz.py
@@ -6,6 +6,7 @@ from .common import InfoExtractor
 from ..utils import (
     float_or_none,
     int_or_none,
+    merge_dicts,
     parse_codecs,
     urljoin,
 )
@@ -58,7 +59,7 @@ class StreamCZIE(InfoExtractor):
             for format_id, stream in streams.items():
                 if not stream.get('url'):
                     continue
-                yield {
+                yield merge_dicts({
                     'format_id': '{}-{}'.format(format_id, ext),
                     'ext': ext,
                     'source_preference': pref,
@@ -67,8 +68,7 @@ class StreamCZIE(InfoExtractor):
                     'duration': float_or_none(stream.get('duration'), scale=1000),
                     'width': stream.get('resolution', 2 * [0])[0] or None,
                     'height': stream.get('resolution', 2 * [0])[1] or int_or_none(format_id.replace('p', '')),
-                    **parse_codecs(stream.get('codec')),
-                }
+                }, parse_codecs(stream.get('codec')))
 
     def _real_extract(self, url):
         display_id, video_id = re.match(self._VALID_URL, url).groups()

From bf23bc0489cf304b2a8ab756f2f63b2cfa5586fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20Van=C4=9Bk?= <arkamar@atlas.cz>
Date: Sat, 12 Feb 2022 15:27:10 +0100
Subject: [PATCH 061/312] add missing __future__ import unicode_literals

---
 youtube_dl/extractor/streamcz.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py
index 179bdcaba..060ba32e0 100644
--- a/youtube_dl/extractor/streamcz.py
+++ b/youtube_dl/extractor/streamcz.py
@@ -1,4 +1,6 @@
 # coding: utf-8
+from __future__ import unicode_literals
+
 import json
 import re
 

From 34722270741fb9c06f978861c1e5f503291070d8 Mon Sep 17 00:00:00 2001
From: Vladimir Stavrinov <9163352+vstavrinov@users.noreply.github.com>
Date: Mon, 14 Feb 2022 20:54:31 +0300
Subject: [PATCH 062/312] [rutv] fix vbr for empty string value (#30623)

* [rutv] use str_to_int() (thx dirkf)
---
 youtube_dl/extractor/rutv.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py
index d2713c19a..05f319396 100644
--- a/youtube_dl/extractor/rutv.py
+++ b/youtube_dl/extractor/rutv.py
@@ -6,7 +6,8 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
-    int_or_none
+    int_or_none,
+    str_to_int
 )
 
 
@@ -179,7 +180,7 @@ class RUTVIE(InfoExtractor):
                         'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22',
                         'rtmp_live': True,
                         'ext': 'flv',
-                        'vbr': int(quality),
+                        'vbr': str_to_int(quality),
                         'preference': preference,
                     }
                 elif transport == 'm3u8':

From 782bfd26dbebea60e35f58ab18e218bedbecb782 Mon Sep 17 00:00:00 2001
From: "Lesmiscore (Naoya Ozaki)" <nao20010128@gmail.com>
Date: Thu, 24 Feb 2022 22:34:32 +0900
Subject: [PATCH 063/312] [bigo] add support for bigo.tv (#30635)

* [bigo] add support for bigo.tv

* [bigo] prepend "Bigo says"

* title fallback

* add error for invalid json data
---
 youtube_dl/extractor/bigo.py       | 59 ++++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |  1 +
 2 files changed, 60 insertions(+)
 create mode 100644 youtube_dl/extractor/bigo.py

diff --git a/youtube_dl/extractor/bigo.py b/youtube_dl/extractor/bigo.py
new file mode 100644
index 000000000..ddf76ac55
--- /dev/null
+++ b/youtube_dl/extractor/bigo.py
@@ -0,0 +1,59 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError, urlencode_postdata
+
+
+class BigoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?bigo\.tv/(?:[a-z]{2,}/)?(?P<id>[^/]+)'
+
+    _TESTS = [{
+        'url': 'https://www.bigo.tv/ja/221338632',
+        'info_dict': {
+            'id': '6576287577575737440',
+            'title': '土よ〜💁‍♂️ 休憩室/REST room',
+            'thumbnail': r're:https?://.+',
+            'uploader': '✨Shin💫',
+            'uploader_id': '221338632',
+            'is_live': True,
+        },
+        'skip': 'livestream',
+    }, {
+        'url': 'https://www.bigo.tv/th/Tarlerm1304',
+        'only_matching': True,
+    }, {
+        'url': 'https://bigo.tv/115976881',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        user_id = self._match_id(url)
+
+        info_raw = self._download_json(
+            'https://bigo.tv/studio/getInternalStudioInfo',
+            user_id, data=urlencode_postdata({'siteId': user_id}))
+
+        if not isinstance(info_raw, dict):
+            raise ExtractorError('Received invalid JSON data')
+        if info_raw.get('code'):
+            raise ExtractorError(
+                'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True)
+        info = info_raw.get('data') or {}
+
+        if not info.get('alive'):
+            raise ExtractorError('This user is offline.', expected=True)
+
+        return {
+            'id': info.get('roomId') or user_id,
+            'title': info.get('roomTopic') or info.get('nick_name') or user_id,
+            'formats': [{
+                'url': info.get('hls_src'),
+                'ext': 'mp4',
+                'protocol': 'm3u8',
+            }],
+            'thumbnail': info.get('snapshot'),
+            'uploader': info.get('nick_name'),
+            'uploader_id': user_id,
+            'is_live': True,
+        }
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 50b7cb4a0..c73c4cd6c 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -115,6 +115,7 @@ from .bfmtv import (
 )
 from .bibeltv import BibelTVIE
 from .bigflix import BigflixIE
+from .bigo import BigoIE
 from .bild import BildIE
 from .bilibili import (
     BiliBiliIE,

From 923292ba643bf2a5c1fade797bd87a0de4f58d25 Mon Sep 17 00:00:00 2001
From: marieell <marieell@tuta.io>
Date: Thu, 10 Feb 2022 10:36:24 +0100
Subject: [PATCH 064/312] [aliexpress] Fix test case

---
 youtube_dl/extractor/aliexpress.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/aliexpress.py b/youtube_dl/extractor/aliexpress.py
index 6f241e683..9722fe9ac 100644
--- a/youtube_dl/extractor/aliexpress.py
+++ b/youtube_dl/extractor/aliexpress.py
@@ -18,7 +18,7 @@ class AliExpressLiveIE(InfoExtractor):
             'id': '2800002704436634',
             'ext': 'mp4',
             'title': 'CASIMA7.22',
-            'thumbnail': r're:http://.*\.jpg',
+            'thumbnail': r're:https?://.*\.jpg',
             'uploader': 'CASIMA Official Store',
             'timestamp': 1500717600,
             'upload_date': '20170722',

From 1f13ccfd7fcafbfd79ddd652967e02f9eda7ce79 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 24 Feb 2022 18:26:58 +0000
Subject: [PATCH 065/312] Fixed groups() call on potentially empty regex search
 object (#30676)

* Fixed groups() call on potentially empty regex search object.
- https://github.com/ytdl-org/youtube-dl/issues/30521

* minimising lines changed

Co-authored-by: yayorbitgum <50963144+yayorbitgum@users.noreply.github.com>
---
 youtube_dl/extractor/myspass.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py
index db7ebc94c..f540c52ee 100644
--- a/youtube_dl/extractor/myspass.py
+++ b/youtube_dl/extractor/myspass.py
@@ -35,7 +35,9 @@ class MySpassIE(InfoExtractor):
         title = xpath_text(metadata, 'title', fatal=True)
         video_url = xpath_text(metadata, 'url_flv', 'download url', True)
         video_id_int = int(video_id)
-        for group in re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url).groups():
+
+        grps = re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url)
+        for group in grps.groups() if grps else []:
             group_int = int(group)
             if group_int > video_id_int:
                 video_url = video_url.replace(

From c4d1738316db45e03e0625650b3550334b66ab7f Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 24 Feb 2022 09:16:16 +0000
Subject: [PATCH 066/312] [CPAC] Add extractor for Canadian Parliament

CPACIE: single episode
CPACPlaylistIE: playlists and searches
---
 youtube_dl/extractor/cpac.py       | 148 +++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |   4 +
 2 files changed, 152 insertions(+)
 create mode 100644 youtube_dl/extractor/cpac.py

diff --git a/youtube_dl/extractor/cpac.py b/youtube_dl/extractor/cpac.py
new file mode 100644
index 000000000..22741152c
--- /dev/null
+++ b/youtube_dl/extractor/cpac.py
@@ -0,0 +1,148 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    int_or_none,
+    str_or_none,
+    try_get,
+    unified_timestamp,
+    update_url_query,
+    urljoin,
+)
+
+# compat_range
+try:
+    if callable(xrange):
+        range = xrange
+except (NameError, TypeError):
+    pass
+
+
+class CPACIE(InfoExtractor):
+    IE_NAME = 'cpac'
+    _VALID_URL = r'https?://(?:www\.)?cpac\.ca/(?P<fr>l-)?episode\?id=(?P<id>[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12})'
+    _TEST = {
+        # 'url': 'http://www.cpac.ca/en/programs/primetime-politics/episodes/65490909',
+        'url': 'https://www.cpac.ca/episode?id=fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
+        'md5': 'e46ad699caafd7aa6024279f2614e8fa',
+        'info_dict': {
+            'id': 'fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
+            'ext': 'mp4',
+            'upload_date': '20220215',
+            'title': 'News Conference to Celebrate National Kindness Week – February 15, 2022',
+            'description': 'md5:466a206abd21f3a6f776cdef290c23fb',
+            'timestamp': 1644901200,
+        },
+        'params': {
+            'format': 'bestvideo',
+            'hls_prefer_native': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        url_lang = 'fr' if '/l-episode?' in url else 'en'
+
+        content = self._download_json(
+            'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/episode/index.xml&crafterSite=cpacca&id=' + video_id,
+            video_id)
+        video_url = try_get(content, lambda x: x['page']['details']['videoUrl'], compat_str)
+        formats = []
+        if video_url:
+            content = content['page']
+            title = str_or_none(content['details']['title_%s_t' % (url_lang, )])
+            formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', ext='mp4')
+            for fmt in formats:
+                # prefer language to match URL
+                fmt_lang = fmt.get('language')
+                if fmt_lang == url_lang:
+                    fmt['language_preference'] = 10
+                elif not fmt_lang:
+                    fmt['language_preference'] = -1
+                else:
+                    fmt['language_preference'] = -10
+
+        self._sort_formats(formats)
+
+        category = str_or_none(content['details']['category_%s_t' % (url_lang, )])
+
+        def is_live(v_type):
+            return (v_type == 'live') if v_type is not None else None
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': title,
+            'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))),
+            'timestamp': unified_timestamp(content['details'].get('liveDateTime')),
+            'category': [category] if category else None,
+            'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))),
+            'is_live': is_live(content['details'].get('type')),
+        }
+
+
+class CPACPlaylistIE(InfoExtractor):
+    IE_NAME = 'cpac:playlist'
+    _VALID_URL = r'(?i)https?://(?:www\.)?cpac\.ca/(?:program|search|(?P<fr>emission|rechercher))\?(?:[^&]+&)*?(?P<id>(?:id=\d+|programId=\d+|key=[^&]+))'
+
+    _TESTS = [{
+        'url': 'https://www.cpac.ca/program?id=6',
+        'info_dict': {
+            'id': 'id=6',
+            'title': 'Headline Politics',
+            'description': 'Watch CPAC’s signature long-form coverage of the day’s pressing political events as they unfold.',
+        },
+        'playlist_count': 10,
+    }, {
+        'url': 'https://www.cpac.ca/search?key=hudson&type=all&order=desc',
+        'info_dict': {
+            'id': 'key=hudson',
+            'title': 'hudson',
+        },
+        'playlist_count': 22,
+    }, {
+        'url': 'https://www.cpac.ca/search?programId=50',
+        'info_dict': {
+            'id': 'programId=50',
+            'title': '50',
+        },
+        'playlist_count': 9,
+    }, {
+        'url': 'https://www.cpac.ca/emission?id=6',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.cpac.ca/rechercher?key=hudson&type=all&order=desc',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        url_lang = 'fr' if any(x in url for x in ('/emission?', '/rechercher?')) else 'en'
+        pl_type, list_type = ('program', 'itemList') if any(x in url for x in ('/program?', '/emission?')) else ('search', 'searchResult')
+        api_url = (
+            'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/%s/index.xml&crafterSite=cpacca&%s'
+            % (pl_type, video_id, ))
+        content = self._download_json(api_url, video_id)
+        entries = []
+        total_pages = int_or_none(try_get(content, lambda x: x['page'][list_type]['totalPages']), default=1)
+        for page in range(1, total_pages + 1):
+            if page > 1:
+                api_url = update_url_query(api_url, {'page': '%d' % (page, ), })
+                content = self._download_json(
+                    api_url, video_id,
+                    note='Downloading continuation - %d' % (page, ),
+                    fatal=False)
+
+            for item in try_get(content, lambda x: x['page'][list_type]['item'], list) or []:
+                episode_url = urljoin(url, try_get(item, lambda x: x['url_%s_s' % (url_lang, )]))
+                if episode_url:
+                    entries.append(episode_url)
+
+        return self.playlist_result(
+            (self.url_result(entry) for entry in entries),
+            playlist_id=video_id,
+            playlist_title=try_get(content, lambda x: x['page']['program']['title_%s_t' % (url_lang, )]) or video_id.split('=')[-1],
+            playlist_description=try_get(content, lambda x: x['page']['program']['description_%s_t' % (url_lang, )]),
+        )
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index c73c4cd6c..7c99cb7e0 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -255,6 +255,10 @@ from .commonprotocols import (
 from .condenast import CondeNastIE
 from .contv import CONtvIE
 from .corus import CorusIE
+from .cpac import (
+    CPACIE,
+    CPACPlaylistIE,
+)
 from .cracked import CrackedIE
 from .crackle import CrackleIE
 from .crooksandliars import CrooksAndLiarsIE

From f8e543c9063c1c7ad157936cb6a15b428ddb3896 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 7 Feb 2022 20:06:27 +0000
Subject: [PATCH 067/312] [Alsace20TV] Add new extractors Alsace20TVIE,
 Alsace20TVEmbedIE

---
 youtube_dl/extractor/alsace20tv.py | 89 ++++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |  4 ++
 2 files changed, 93 insertions(+)
 create mode 100644 youtube_dl/extractor/alsace20tv.py

diff --git a/youtube_dl/extractor/alsace20tv.py b/youtube_dl/extractor/alsace20tv.py
new file mode 100644
index 000000000..228cec3ec
--- /dev/null
+++ b/youtube_dl/extractor/alsace20tv.py
@@ -0,0 +1,89 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    dict_get,
+    get_element_by_class,
+    int_or_none,
+    unified_strdate,
+    url_or_none,
+)
+
+
+class Alsace20TVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?alsace20\.tv/(?:[\w-]+/)+[\w-]+-(?P<id>[\w]+)'
+    _TESTS = [{
+        'url': 'https://www.alsace20.tv/VOD/Actu/JT/Votre-JT-jeudi-3-fevrier-lyNHCXpYJh.html',
+        # 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
+        'info_dict': {
+            'id': 'lyNHCXpYJh',
+            'ext': 'mp4',
+            'description': 'md5:fc0bc4a0692d3d2dba4524053de4c7b7',
+            'title': 'Votre JT du jeudi 3 février',
+            'upload_date': '20220203',
+            'thumbnail': r're:https?://.+\.jpg',
+            'duration': 1073,
+            'view_count': int,
+        },
+        'params': {
+            'format': 'bestvideo',
+        },
+    }]
+
+    def _extract_video(self, video_id, url=None):
+        info = self._download_json(
+            'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
+            video_id) or {}
+        title = info['titre']
+
+        formats = []
+        for res, fmt_url in (info.get('files') or {}).items():
+            formats.extend(
+                self._extract_smil_formats(fmt_url, video_id, fatal=False)
+                if '/smil:_' in fmt_url
+                else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
+        self._sort_formats(formats)
+
+        webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
+        thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
+        upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
+        upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'description': clean_html(get_element_by_class('wysiwyg', webpage)),
+            'upload_date': upload_date,
+            'thumbnail': thumbnail,
+            'duration': int_or_none(self._og_search_property('video:duration', webpage) if webpage else None),
+            'view_count': int_or_none(info.get('nb_vues')),
+        }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        return self._extract_video(video_id, url)
+
+
+class Alsace20TVEmbedIE(Alsace20TVIE):
+    _VALID_URL = r'https?://(?:www\.)?alsace20\.tv/emb/(?P<id>[\w]+)'
+    _TESTS = [{
+        'url': 'https://www.alsace20.tv/emb/lyNHCXpYJh',
+        # 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
+        'info_dict': {
+            'id': 'lyNHCXpYJh',
+            'ext': 'mp4',
+            'title': 'Votre JT du jeudi 3 février',
+            'upload_date': '20220203',
+            'thumbnail': r're:https?://.+\.jpg',
+            'view_count': int,
+        },
+        'params': {
+            'format': 'bestvideo',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        return self._extract_video(video_id)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 7c99cb7e0..535080d0a 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -51,6 +51,10 @@ from .anvato import AnvatoIE
 from .aol import AolIE
 from .allocine import AllocineIE
 from .aliexpress import AliExpressLiveIE
+from .alsace20tv import (
+    Alsace20TVIE,
+    Alsace20TVEmbedIE,
+)
 from .apa import APAIE
 from .aparat import AparatIE
 from .appleconnect import AppleConnectIE

From 4194d253c0b922addf0439228066cb4fb487bac3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 30 Jul 2021 12:58:19 +0100
Subject: [PATCH 068/312] Avoid skipping ID when unlisted_hash is numeric

Pattern needed a non-greedy match; also replaced a redundant test with one for this, issue 29690
---
 youtube_dl/extractor/vimeo.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 0b386f450..a66912502 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -271,7 +271,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                         )?
                         vimeo(?:pro)?\.com/
                         (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
-                        (?:.*?/)?
+                        (?:.*?/)??
                         (?:
                             (?:
                                 play_redirect_hls|
@@ -517,14 +517,28 @@ class VimeoIE(VimeoBaseInfoExtractor):
             'url': 'https://vimeo.com/7809605',
             'only_matching': True,
         },
-        {
-            'url': 'https://vimeo.com/160743502/abd0e13fb4',
-            'only_matching': True,
-        },
         {
             # requires passing unlisted_hash(a52724358e) to load_download_config request
             'url': 'https://vimeo.com/392479337/a52724358e',
             'only_matching': True,
+        },
+        {
+            # similar, but all numeric: ID must be 581039021, not 9603038895
+            # issue #29690
+            'url': 'https://vimeo.com/581039021/9603038895',
+            'info_dict': {
+                'id': '581039021',
+                # these have to be provided but we don't care
+                'ext': 'mp4',
+                'timestamp': 1627621014,
+                'title': 're:.+',
+                'uploader_id': 're:.+',
+                'uploader': 're:.+',
+                'upload_date': r're:\d+',
+            },
+            'params': {
+                'skip_download': True,
+            },
         }
         # https://gettingthingsdone.com/workflowmap/
         # vimeo embed with check-password page protected by Referer header

From 6508688e88c83bb811653083db9351702cd39a6a Mon Sep 17 00:00:00 2001
From: df <fieldhouse@gmx.net>
Date: Sun, 1 Aug 2021 09:42:57 +0100
Subject: [PATCH 069/312] Make default upload_/release_date a compat_str

Ensures download tests pass in Python 2 as well as 3; also
add YoutubeDL tests for timestamp -> upload_date etc.
---
 test/test_YoutubeDL.py  | 19 +++++++++++++++++++
 youtube_dl/YoutubeDL.py |  2 +-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index a35effe0e..f8c8e619c 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -997,6 +997,25 @@ class TestYoutubeDL(unittest.TestCase):
         self.assertEqual(downloaded['extractor'], 'Video')
         self.assertEqual(downloaded['extractor_key'], 'Video')
 
+    def test_default_times(self):
+        """Test addition of missing upload/release/_date from /release_/timestamp"""
+        info = {
+            'id': '1234',
+            'url': TEST_URL,
+            'title': 'Title',
+            'ext': 'mp4',
+            'timestamp': 1631352900,
+            'release_timestamp': 1632995931,
+        }
+
+        params = {'simulate': True, }
+        ydl = FakeYDL(params)
+        out_info = ydl.process_ie_result(info)
+        self.assertTrue(isinstance(out_info['upload_date'], compat_str))
+        self.assertEqual(out_info['upload_date'], '20210911')
+        self.assertTrue(isinstance(out_info['release_date'], compat_str))
+        self.assertEqual(out_info['release_date'], '20210930')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index fe30758ef..69736acff 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1529,7 +1529,7 @@ class YoutubeDL(object):
                 # see http://bugs.python.org/issue1646728)
                 try:
                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
-                    info_dict[date_key] = upload_date.strftime('%Y%m%d')
+                    info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
                 except (ValueError, OverflowError, OSError):
                     pass
 

From 49c5293014bc11ec8c009856cd63cffa6296c1e1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 22 Feb 2022 11:24:06 +0000
Subject: [PATCH 070/312] Ignore --external-downloader-args if
 --external-downloader was rejected

... and generate warning
---
 youtube_dl/YoutubeDL.py           | 11 ++++++++++-
 youtube_dl/downloader/__init__.py |  3 +++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 69736acff..019e309cb 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1906,8 +1906,17 @@ class YoutubeDL(object):
 
         if not self.params.get('skip_download', False):
             try:
+                def checked_get_suitable_downloader(info_dict, params):
+                    ed_args = params.get('external_downloader_args')
+                    dler = get_suitable_downloader(info_dict, params)
+                    if ed_args and not params.get('external_downloader_args'):
+                        # external_downloader_args was cleared because external_downloader was rejected
+                        self.report_warning('Requested external downloader cannot be used: '
+                                            'ignoring --external-downloader-args.')
+                    return dler
+
                 def dl(name, info):
-                    fd = get_suitable_downloader(info, self.params)(self, self.params)
+                    fd = checked_get_suitable_downloader(info, self.params)(self, self.params)
                     for ph in self._progress_hooks:
                         fd.add_progress_hook(ph)
                     if self.params.get('verbose'):
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py
index d8f2fa342..d701d6292 100644
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -50,6 +50,9 @@ def _get_suitable_downloader(info_dict, params={}):
         ed = get_external_downloader(external_downloader)
         if ed.can_download(info_dict):
             return ed
+        # Avoid using unwanted args since external_downloader was rejected
+        if params.get('external_downloader_args'):
+            params['external_downloader_args'] = None
 
     protocol = info_dict['protocol']
     if protocol.startswith('m3u8') and info_dict.get('is_live'):

From 17d295a1ec6d04362740dd8a0c583690f5ba082a Mon Sep 17 00:00:00 2001
From: lihan7 <lihan7@xiaomi.com>
Date: Fri, 25 Mar 2022 15:46:28 +0800
Subject: [PATCH 071/312] [extractor/bilibili] Fix path "/audio/auxxxxx"
 download return 403

---
 youtube_dl/extractor/bilibili.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py
index bff6ea194..d42f0e98a 100644
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -369,6 +369,11 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
             'filesize': int_or_none(play_data.get('size')),
         }]
 
+        for a_format in formats:
+            a_format.setdefault('http_headers', {}).update({
+                'Referer': url,
+            })
+
         song = self._call_api('song/info', au_id)
         title = song['title']
         statistic = song.get('statistic') or {}

From 9e5ca66f16998eb2a680e23a6e769e34001898c5 Mon Sep 17 00:00:00 2001
From: nixxo <nixxo@protonmail.com>
Date: Mon, 4 Jan 2021 15:11:47 +0100
Subject: [PATCH 072/312] [RAI] Added checks for DRM protected content (PR
 #27657)

reviewed by pukkandan (https://github.com/yt-dlp/yt-dlp/pull/150)
---
 youtube_dl/extractor/rai.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py
index 67b86fc72..2abe164e0 100644
--- a/youtube_dl/extractor/rai.py
+++ b/youtube_dl/extractor/rai.py
@@ -158,6 +158,10 @@ class RaiPlayIE(RaiBaseIE):
         # subtitles at 'subtitlesArray' key (see #27698)
         'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
         'only_matching': True,
+    }, {
+        # DRM protected
+        'url': 'https://www.raiplay.it/video/2020/09/Lo-straordinario-mondo-di-Zoey-S1E1-Lo-straordinario-potere-di-Zoey-ed493918-1d32-44b7-8454-862e473d00ff.html',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -166,6 +170,13 @@ class RaiPlayIE(RaiBaseIE):
         media = self._download_json(
             base + '.json', video_id, 'Downloading video JSON')
 
+        if try_get(
+                media,
+                (lambda x: x['rights_management']['rights']['drm'],
+                 lambda x: x['program_info']['rights_management']['rights']['drm']),
+                dict):
+            raise ExtractorError('This video is DRM protected.', expected=True)
+
         title = media['name']
 
         video = media['video']

From 1f50a07771fddb5f64617617d156bfdd593f951e Mon Sep 17 00:00:00 2001
From: nixxo <nixxo@protonmail.com>
Date: Wed, 27 Jan 2021 12:24:50 +0100
Subject: [PATCH 073/312] [RAI] Extend formats with direct http mp4 link (PR
 #27990)

* initial support for creating direct mp4 link
* improved regexes and info extraction
* added "connection: close" to request headers
* updated to https://github.com/yt-dlp/yt-dlp/pull/208
---
 youtube_dl/extractor/rai.py | 111 +++++++++++++++++++++++++++++++++++-
 1 file changed, 108 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py
index 2abe164e0..7b0315a62 100644
--- a/youtube_dl/extractor/rai.py
+++ b/youtube_dl/extractor/rai.py
@@ -5,15 +5,16 @@ import re
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urlparse,
     compat_str,
+    compat_urlparse,
 )
 from ..utils import (
-    ExtractorError,
     determine_ext,
+    ExtractorError,
     find_xpath_attr,
     fix_xml_ampersands,
     GeoRestrictedError,
+    HEADRequest,
     int_or_none,
     parse_duration,
     remove_start,
@@ -96,12 +97,100 @@ class RaiBaseIE(InfoExtractor):
         if not formats and geoprotection is True:
             self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
 
+        formats.extend(self._create_http_urls(relinker_url, formats))
+
         return dict((k, v) for k, v in {
             'is_live': is_live,
             'duration': duration,
             'formats': formats,
         }.items() if v is not None)
 
+    def _create_http_urls(self, relinker_url, fmts):
+        _RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\d+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?'
+        _MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
+        _QUALITY = {
+            # tbr: w, h
+            '250': [352, 198],
+            '400': [512, 288],
+            '700': [512, 288],
+            '800': [700, 394],
+            '1200': [736, 414],
+            '1800': [1024, 576],
+            '2400': [1280, 720],
+            '3200': [1440, 810],
+            '3600': [1440, 810],
+            '5000': [1920, 1080],
+            '10000': [1920, 1080],
+        }
+
+        def test_url(url):
+            resp = self._request_webpage(
+                HEADRequest(url), None, headers={'User-Agent': 'Rai'},
+                fatal=False, errnote=False, note=False)
+
+            if resp is False:
+                return False
+
+            if resp.code == 200:
+                return False if resp.url == url else resp.url
+            return None
+
+        def get_format_info(tbr):
+            import math
+            br = int_or_none(tbr)
+            if len(fmts) == 1 and not br:
+                br = fmts[0].get('tbr')
+            if br > 300:
+                tbr = compat_str(math.floor(br / 100) * 100)
+            else:
+                tbr = '250'
+
+            # try extracting info from available m3u8 formats
+            format_copy = None
+            for f in fmts:
+                if f.get('tbr'):
+                    br_limit = math.floor(br / 100)
+                    if br_limit - 1 <= math.floor(f['tbr'] / 100) <= br_limit + 1:
+                        format_copy = f.copy()
+            return {
+                'width': format_copy.get('width'),
+                'height': format_copy.get('height'),
+                'tbr': format_copy.get('tbr'),
+                'vcodec': format_copy.get('vcodec'),
+                'acodec': format_copy.get('acodec'),
+                'fps': format_copy.get('fps'),
+                'format_id': 'https-%s' % tbr,
+            } if format_copy else {
+                'width': _QUALITY[tbr][0],
+                'height': _QUALITY[tbr][1],
+                'format_id': 'https-%s' % tbr,
+                'tbr': int(tbr),
+            }
+
+        loc = test_url(_MP4_TMPL % (relinker_url, '*'))
+        if not isinstance(loc, compat_str):
+            return []
+
+        mobj = re.match(
+            _RELINKER_REG,
+            test_url(relinker_url) or '')
+        if not mobj:
+            return []
+
+        available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*']
+        available_qualities = [i for i in available_qualities if i]
+
+        formats = []
+        for q in available_qualities:
+            fmt = {
+                'url': _MP4_TMPL % (relinker_url, q),
+                'protocol': 'https',
+                'ext': 'mp4',
+            }
+            fmt.update(get_format_info(q))
+            formats.append(fmt)
+        return formats
+
     @staticmethod
     def _extract_subtitles(url, video_data):
         STL_EXT = 'stl'
@@ -151,6 +240,22 @@ class RaiPlayIE(RaiBaseIE):
         'params': {
             'skip_download': True,
         },
+    }, {
+        # 1080p direct mp4 url
+        'url': 'https://www.raiplay.it/video/2021/03/Leonardo-S1E1-b5703b02-82ee-475a-85b6-c9e4a8adf642.html',
+        'md5': '2e501e8651d72f05ffe8f5d286ad560b',
+        'info_dict': {
+            'id': 'b5703b02-82ee-475a-85b6-c9e4a8adf642',
+            'ext': 'mp4',
+            'title': 'Leonardo - S1E1',
+            'alt_title': 'St 1 Ep 1 - Episodio 1',
+            'description': 'md5:f5360cd267d2de146e4e3879a5a47d31',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'uploader': 'Rai 1',
+            'duration': 3229,
+            'series': 'Leonardo',
+            'season': 'Season 1',
+        },
     }, {
         'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
         'only_matching': True,
@@ -318,7 +423,7 @@ class RaiIE(RaiBaseIE):
     }, {
         # with ContentItem in og:url
         'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
-        'md5': '6865dd00cf0bbf5772fdd89d59bd768a',
+        'md5': '06345bd97c932f19ffb129973d07a020',
         'info_dict': {
             'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
             'ext': 'mp4',

From 871645a4a4a0e12ec8f7bf78a3ad7bf75838ee5c Mon Sep 17 00:00:00 2001
From: nixxo <nixxo@protonmail.com>
Date: Sat, 2 Apr 2022 07:57:56 +0200
Subject: [PATCH 074/312] [RAI] Fix extraction of http formats

From https://github.com/yt-dlp/yt-dlp/pull/3272
Closes https://github.com/yt-dlp/yt-dlp/issues/3270
Authored by: nixxo
---
 youtube_dl/extractor/rai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py
index 7b0315a62..563d3400f 100644
--- a/youtube_dl/extractor/rai.py
+++ b/youtube_dl/extractor/rai.py
@@ -106,7 +106,7 @@ class RaiBaseIE(InfoExtractor):
         }.items() if v is not None)
 
     def _create_http_urls(self, relinker_url, fmts):
-        _RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\d+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?'
+        _RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?'
         _MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
         _QUALITY = {
             # tbr: w, h

From b764dbe7730bc5b0a4f30f4f89fd85e096d0c4a0 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 10 Apr 2022 05:49:09 +0100
Subject: [PATCH 075/312] Disable blank issues

---
 .github/ISSUE_TEMPLATE/config.yml | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .github/ISSUE_TEMPLATE/config.yml

diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 000000000..3ba13e0ce
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1 @@
+blank_issues_enabled: false

From a0068bd6bec16008bda7a39caecccbf84881c603 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 15 Apr 2022 16:07:09 +0100
Subject: [PATCH 076/312] [Youtube] Fix "n" descrambling for player fae06c11

Resolves #30856.
---
 youtube_dl/extractor/youtube.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 41695a561..ff6c7b0f8 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1464,15 +1464,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
     # 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
     # 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
     def _extract_n_function_name(self, jscode):
-        target = r'(?P<nfunc>[a-zA-Z0-9$]{3})(?:\[(?P<idx>\d+)\])?'
+        target = r'(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?'
         nfunc_and_idx = self._search_regex(
-            r'\.get\("n"\)\)&&\(b=(%s)\([a-zA-Z0-9]\)' % (target, ),
+            r'\.get\("n"\)\)&&\(b=(%s)\([\w$]+\)' % (target, ),
             jscode, 'Initial JS player n function name')
         nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
         if not idx:
             return nfunc
         return self._parse_json(self._search_regex(
-            r'var %s\s*=\s*(\[.+?\]);' % (nfunc, ), jscode,
+            r'var %s\s*=\s*(\[.+?\]);' % (re.escape(nfunc), ), jscode,
             'Initial JS player n function list ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
 
     def _extract_n_function(self, video_id, player_url):

From ebc627847cd1f5faddf4bd90376c1635777283cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81rni=20Dagur?= <arni@dagur.eu>
Date: Thu, 28 Apr 2022 11:18:10 +0200
Subject: [PATCH 077/312] [KTH] Add new extractor for KTH play (#30885)

* Implement extractor for KTH play
* Make KTH Play url regex more relaxed
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/kaltura.py    |  2 +-
 youtube_dl/extractor/kth.py        | 31 ++++++++++++++++++++++++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 youtube_dl/extractor/kth.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 535080d0a..452caeade 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -557,6 +557,7 @@ from .kinja import KinjaEmbedIE
 from .kinopoisk import KinoPoiskIE
 from .konserthusetplay import KonserthusetPlayIE
 from .krasview import KrasViewIE
+from .kth import KTHIE
 from .ku6 import Ku6IE
 from .kusi import KUSIIE
 from .kuwo import (
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
index c731612c4..6d4d93394 100644
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@@ -373,5 +373,5 @@ class KalturaIE(InfoExtractor):
             'duration': info.get('duration'),
             'timestamp': info.get('createdAt'),
             'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
-            'view_count': info.get('plays'),
+            'view_count': int_or_none(info.get('plays')),
         }
diff --git a/youtube_dl/extractor/kth.py b/youtube_dl/extractor/kth.py
new file mode 100644
index 000000000..b8db461f5
--- /dev/null
+++ b/youtube_dl/extractor/kth.py
@@ -0,0 +1,31 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class KTHIE(InfoExtractor):
+    _VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
+    _TEST = {
+        'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
+        'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
+        'info_dict': {
+            'id': '0_uoop6oz9',
+            'ext': 'mp4',
+            'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
+            'thumbnail': 're:https?://.+/thumbnail/.+',
+            'duration': 3516,
+            'timestamp': 1647345358,
+            'upload_date': '20220315',
+            'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        result = self.url_result(
+            smuggle_url('kaltura:308:%s' % video_id, {
+                'service_url': 'https://api.kaltura.nordu.net'}),
+            'Kaltura')
+        return result

From e27d8d819fa69d5714ea1682a1d5d56f617461fc Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 29 Apr 2022 13:36:02 +0100
Subject: [PATCH 078/312] [streamcz] Remove empty `'{}'.format()` for Py2.6

Use `'-join()'` here, or `{0}`, ..., in general.
---
 youtube_dl/extractor/streamcz.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py
index 060ba32e0..97b2eb7f8 100644
--- a/youtube_dl/extractor/streamcz.py
+++ b/youtube_dl/extractor/streamcz.py
@@ -62,7 +62,7 @@ class StreamCZIE(InfoExtractor):
                 if not stream.get('url'):
                     continue
                 yield merge_dicts({
-                    'format_id': '{}-{}'.format(format_id, ext),
+                    'format_id': '-'.join((format_id, ext)),
                     'ext': ext,
                     'source_preference': pref,
                     'url': urljoin(spl_url, stream['url']),

From e988fa4523e489596a2a27c4d45275e44db49406 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 28 Apr 2022 15:25:49 +0100
Subject: [PATCH 079/312] [doc] Clarify test naming

---
 README.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 2841ed68f..cd888c731 100644
--- a/README.md
+++ b/README.md
@@ -1069,9 +1069,11 @@ After you have ensured this site is distributing its content legally, you can fo
             }
     ```
 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
-6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
-7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
-8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
+6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test (actually, test case) then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note:
+    * the test names use the extractor class name **without the trailing `IE`**
+    * tests with `only_matching` key in test's dict are not counted.
+8. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
+9. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
 
         $ flake8 youtube_dl/extractor/yourextractor.py
 

From c7965b9fc2cae54f244f31f5373cb81a40e822ab Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 9 May 2022 18:54:41 +0100
Subject: [PATCH 080/312] [NHK] Support alphabetic characters in 7-char NhkVod
 IDs (#29682)

---
 youtube_dl/extractor/nhk.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py
index 8a9331a79..46a800e7e 100644
--- a/youtube_dl/extractor/nhk.py
+++ b/youtube_dl/extractor/nhk.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -23,7 +24,7 @@ class NhkBaseIE(InfoExtractor):
     def _extract_episode_info(self, url, episode=None):
         fetch_episode = episode is None
         lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
-        if episode_id.isdigit():
+        if len(episode_id) == 7:
             episode_id = episode_id[:4] + '-' + episode_id[4:]
 
         is_video = m_type == 'video'
@@ -84,7 +85,8 @@ class NhkBaseIE(InfoExtractor):
 
 
 class NhkVodIE(NhkBaseIE):
-    _VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
+    # the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
+    _VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
     # Content available only for a limited period of time. Visit
     # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
     _TESTS = [{
@@ -124,6 +126,19 @@ class NhkVodIE(NhkBaseIE):
     }, {
         'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
         'only_matching': True,
+    }, {
+        # video, alphabetic character in ID #29670
+        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
+        'only_matching': True,
+        'info_dict': {
+            'id': 'qfjay6cg',
+            'ext': 'mp4',
+            'title': 'DESIGN TALKS plus - Fishermen’s Finery',
+            'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
+            'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
+            'upload_date': '20210615',
+            'timestamp': 1623722008,
+        }
     }]
 
     def _real_extract(self, url):

From c3deca86aedd2d8ab7cd0c596fd68b7aeb7c042d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 19 May 2022 17:41:48 +0000
Subject: [PATCH 081/312] [wat.tv] Add version `pver` to metadata API call

Resolves #30959.
---
 youtube_dl/extractor/wat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py
index f1bccc2d6..b15e03768 100644
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@@ -57,7 +57,7 @@ class WatIE(InfoExtractor):
         #     'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)
         video_data = self._download_json(
             'https://mediainfo.tf1.fr/mediainfocombo/' + video_id,
-            video_id, query={'context': 'MYTF1'})
+            video_id, query={'context': 'MYTF1', 'pver': '4001000'})
         video_info = video_data['media']
 
         error_desc = video_info.get('error_desc')

From be35e5343a6c31f5f32ee216ab4486a1992260c5 Mon Sep 17 00:00:00 2001
From: Jacob Chapman <7908073+chapmanjacobd@users.noreply.github.com>
Date: Wed, 13 Apr 2022 07:21:23 -0500
Subject: [PATCH 082/312] Update options.py

---
 youtube_dl/options.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 0a0641bd4..6521ad881 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -270,11 +270,11 @@ def parseOpts(overrideArguments=None):
     selection.add_option(
         '--match-title',
         dest='matchtitle', metavar='REGEX',
-        help='Download only matching titles (regex or caseless sub-string)')
+        help='Download only matching titles (case-insensitive regex or sub-string)')
     selection.add_option(
         '--reject-title',
         dest='rejecttitle', metavar='REGEX',
-        help='Skip download for matching titles (regex or caseless sub-string)')
+        help='Skip download for matching titles (case-insensitive regex or sub-string)')
     selection.add_option(
         '--max-downloads',
         dest='max_downloads', metavar='NUMBER', type=int, default=None,

From 187a48aee29847664e0c4cd80fe90c32e1fb334b Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 24 May 2022 15:33:00 +0100
Subject: [PATCH 083/312] [YouTube] Handle player c5a4daa1 with indirect
 n-function definition

* resolves #30976
---
 youtube_dl/extractor/youtube.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index ff6c7b0f8..9c62b8890 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1471,9 +1471,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
         if not idx:
             return nfunc
+        if int_or_none(idx) == 0:
+            real_nfunc = self._search_regex(
+                r'var %s\s*=\s*\[([a-zA-Z_$][\w$]*)\];' % (re.escape(nfunc), ), jscode,
+                'Initial JS player n function alias ({nfunc}[{idx}])'.format(**locals()))
+            if real_nfunc:
+                return real_nfunc
         return self._parse_json(self._search_regex(
             r'var %s\s*=\s*(\[.+?\]);' % (re.escape(nfunc), ), jscode,
-            'Initial JS player n function list ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
+            'Initial JS player n function name ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
 
     def _extract_n_function(self, video_id, player_url):
         player_id = self._extract_player_info(player_url)
@@ -1482,7 +1488,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         if func_code:
             jsi = JSInterpreter(func_code)
         else:
-            player_id = self._extract_player_info(player_url)
             jscode = self._get_player_code(video_id, player_url, player_id)
             funcname = self._extract_n_function_name(jscode)
             jsi = JSInterpreter(jscode)

From 52c3751df722ab6f31f0229a415c7389a95c2307 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 28 May 2022 13:52:51 +0100
Subject: [PATCH 084/312] [utils] Enable ALPN in HTTPS to satisfy broken
 servers

See https://github.com/yt-dlp/yt-dlp/issues/3878
---
 youtube_dl/utils.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index e722eed58..4ff27db3d 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2292,12 +2292,30 @@ def formatSeconds(secs):
 
 
 def make_HTTPS_handler(params, **kwargs):
+
+    # https://www.rfc-editor.org/info/rfc7301
+    ALPN_PROTOCOLS = ['http/1.1']
+
+    def set_alpn_protocols(ctx):
+        # From https://github.com/yt-dlp/yt-dlp/commit/2c6dcb65fb612fc5bc5c61937bf438d3c473d8d0
+        # Thanks @coletdjnz
+        # Some servers may (wrongly) reject requests if ALPN extension is not sent. See:
+        # https://github.com/python/cpython/issues/85140
+        # https://github.com/yt-dlp/yt-dlp/issues/3878
+        try:
+            ctx.set_alpn_protocols(ALPN_PROTOCOLS)
+        except (AttributeError, NotImplementedError):
+            # Python < 2.7.10, not ssl.HAS_ALPN
+            pass
+
     opts_no_check_certificate = params.get('nocheckcertificate', False)
     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
+        set_alpn_protocols(context)
         if opts_no_check_certificate:
             context.check_hostname = False
             context.verify_mode = ssl.CERT_NONE
+
         try:
             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
         except TypeError:
@@ -2313,6 +2331,7 @@ def make_HTTPS_handler(params, **kwargs):
                                if opts_no_check_certificate
                                else ssl.CERT_REQUIRED)
         context.set_default_verify_paths()
+        set_alpn_protocols(context)
         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 
 

From 04fd3289d30de3c99c7d2de34d555b050bc96d4d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 28 May 2022 13:54:32 +0100
Subject: [PATCH 085/312] [YouPorn] Improve `upload_date` extraction

See https://github.com/yt-dlp/yt-dlp/issues/2701#issuecomment-1034341883
---
 youtube_dl/extractor/youporn.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
index 7084d3d12..31e8abb72 100644
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -137,9 +138,10 @@ class YouPornIE(InfoExtractor):
             r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
             webpage, 'uploader', fatal=False)
         upload_date = unified_strdate(self._html_search_regex(
-            [r'UPLOADED:\s*<span>([^<]+)',
+            (r'UPLOADED:\s*<span>([^<]+)',
              r'Date\s+[Aa]dded:\s*<span>([^<]+)',
-             r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
+             r'''(?s)<div[^>]+class=["']videoInfo(?:Date|Time)\b[^>]*>(.+?)</div>''',
+             r'(?s)<label\b[^>]*>Uploaded[^<]*</label>\s*<span\b[^>]*>(.+?)</span>'),
             webpage, 'upload date', fatal=False))
 
         age_limit = self._rta_search(webpage)

From 9aa8e5340f3d5ece372b983f8e399277ca1f1fe4 Mon Sep 17 00:00:00 2001
From: LewdyCoder <88900506+LewdyCoder@users.noreply.github.com>
Date: Mon, 30 May 2022 03:50:50 +0200
Subject: [PATCH 086/312] [Readme] Clarified extractor naming (#29799)

* Exported usable extractors must be named `xxxxIE`

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 CONTRIBUTING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 58ab3a4b8..ff40cef78 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -150,7 +150,7 @@ After you have ensured this site is distributing its content legally, you can fo
                 # TODO more properties (see youtube_dl/extractor/common.py)
             }
     ```
-5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
+5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). This makes the extractor available for use, as long as the class ends with `IE`.
 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
 8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):

From 1baa0f5f6678c047624785dc9a3ab3cb44a72809 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 29 Apr 2021 04:56:09 +0530
Subject: [PATCH 087/312] [utils] Escape URL while sanitizing Closes #31008,
 #yt-dlp/263

While this fixes the issue in question, it does not try to address the root-cause of the problem
Refer: 915f911e365736227e134ad654601443dbfd7ccb, f5fa042c82300218a2d07b95dd6b9c0756745db3
---
 youtube_dl/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 4ff27db3d..8aa2a43a2 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2151,7 +2151,7 @@ def sanitize_url(url):
     for mistake, fixup in COMMON_TYPOS:
         if re.match(mistake, url):
             return re.sub(mistake, fixup, url)
-    return url
+    return escape_url(url)
 
 
 def sanitized_Request(url, *args, **kwargs):

From 530f4582d011cd94986cf4d233f9fb9263f72150 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 6 Jun 2022 19:29:48 +0100
Subject: [PATCH 088/312] [HRFernsehen] Back-port new extractor from yt-dlp
 Closes #26445, where this was originally proposed.

---
 youtube_dl/extractor/extractors.py  |   1 +
 youtube_dl/extractor/hrfernsehen.py | 101 ++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+)
 create mode 100644 youtube_dl/extractor/hrfernsehen.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 452caeade..751fc38b6 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -479,6 +479,7 @@ from .hotstar import (
 )
 from .howcast import HowcastIE
 from .howstuffworks import HowStuffWorksIE
+from .hrfernsehen import HRFernsehenIE
 from .hrti import (
     HRTiIE,
     HRTiPlaylistIE,
diff --git a/youtube_dl/extractor/hrfernsehen.py b/youtube_dl/extractor/hrfernsehen.py
new file mode 100644
index 000000000..11b879dbd
--- /dev/null
+++ b/youtube_dl/extractor/hrfernsehen.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import json
+import re
+
+from ..utils import (
+    int_or_none,
+    unified_timestamp,
+    unescapeHTML
+)
+from .common import InfoExtractor
+
+
+class HRFernsehenIE(InfoExtractor):
+    IE_NAME = 'hrfernsehen'
+    _VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
+
+    _TESTS = [{
+        'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
+        'md5': '5c4e0ba94677c516a2f65a84110fc536',
+        'info_dict': {
+            'id': '130546',
+            'ext': 'mp4',
+            'description': 'Sturmtief Kirsten fegt über Hessen / Die Corona-Pandemie – eine Chronologie / '
+                           'Sterbehilfe: Die Lage in Hessen / Miss Hessen leitet zwei eigene Unternehmen / '
+                           'Pop-Up Museum zeigt Schwarze Unterhaltung und Black Music',
+            'subtitles': {'de': [{
+                'url': 'https://hr-a.akamaihd.net/video/as/hessenschau/2020_08/hrLogo_200826200407_L385592_512x288-25p-500kbit.vtt'
+            }]},
+            'timestamp': 1598470200,
+            'upload_date': '20200826',
+            'thumbnail': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9__medium.jpg',
+            'title': 'hessenschau vom 26.08.2020'
+        }
+    }, {
+        'url': 'https://www.hr-fernsehen.de/sendungen-a-z/mex/sendungen/fair-und-gut---was-hinter-aldis-eigenem-guetesiegel-steckt,video-130544.html',
+        'only_matching': True
+    }]
+
+    _GEO_COUNTRIES = ['DE']
+
+    def extract_airdate(self, loader_data):
+        airdate_str = loader_data.get('mediaMetadata', {}).get('agf', {}).get('airdate')
+
+        if airdate_str is None:
+            return None
+
+        return unified_timestamp(airdate_str)
+
+    def extract_formats(self, loader_data):
+        stream_formats = []
+        for stream_obj in loader_data["videoResolutionLevels"]:
+            stream_format = {
+                'format_id': str(stream_obj['verticalResolution']) + "p",
+                'height': stream_obj['verticalResolution'],
+                'url': stream_obj['url'],
+            }
+
+            quality_information = re.search(r'([0-9]{3,4})x([0-9]{3,4})-([0-9]{2})p-([0-9]{3,4})kbit',
+                                            stream_obj['url'])
+            if quality_information:
+                stream_format['width'] = int_or_none(quality_information.group(1))
+                stream_format['height'] = int_or_none(quality_information.group(2))
+                stream_format['fps'] = int_or_none(quality_information.group(3))
+                stream_format['tbr'] = int_or_none(quality_information.group(4))
+
+            stream_formats.append(stream_format)
+
+        self._sort_formats(stream_formats)
+        return stream_formats
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_meta(
+            ['og:title', 'twitter:title', 'name'], webpage)
+        description = self._html_search_meta(
+            ['description'], webpage)
+
+        loader_str = unescapeHTML(self._search_regex(r"data-new-hr-mediaplayer-loader='([^']*)'", webpage, "ardloader"))
+        loader_data = json.loads(loader_str)
+
+        info = {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'formats': self.extract_formats(loader_data),
+            'timestamp': self.extract_airdate(loader_data)
+        }
+
+        if "subtitle" in loader_data:
+            info["subtitles"] = {"de": [{"url": loader_data["subtitle"]}]}
+
+        thumbnails = list(set([t for t in loader_data.get("previewImageUrl", {}).values()]))
+        if len(thumbnails) > 0:
+            info["thumbnails"] = [{"url": t} for t in thumbnails]
+
+        return info

From ef044be34bb64c489558dd07818616b514d2e2ad Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 8 Jun 2022 15:52:21 +0100
Subject: [PATCH 089/312] [test] Skip not _WORKING IE in subtitle tests; use
 unittest.skipTest throughout

---
 test/test_download.py  | 7 +++----
 test/test_subtitles.py | 3 +++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/test/test_download.py b/test/test_download.py
index 8e43cfa12..0951a171a 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -88,7 +88,6 @@ class TestDownload(unittest.TestCase):
 
 # Dynamically generate tests
 
-
 def generator(test_case, tname):
 
     def test_template(self):
@@ -100,9 +99,10 @@ def generator(test_case, tname):
 
         def print_skipping(reason):
             print('Skipping %s: %s' % (test_case['name'], reason))
+            self.skipTest(reason)
+
         if not ie.working():
             print_skipping('IE marked as not _WORKING')
-            return
 
         for tc in test_cases:
             info_dict = tc.get('info_dict', {})
@@ -111,11 +111,10 @@ def generator(test_case, tname):
 
         if 'skip' in test_case:
             print_skipping(test_case['skip'])
-            return
+
         for other_ie in other_ies:
             if not other_ie.working():
                 print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
-                return
 
         params = get_params(test_case.get('params', {}))
         params['outtmpl'] = tname + '_' + params['outtmpl']
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 550e0ca00..c250473be 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -38,6 +38,9 @@ class BaseTestSubtitles(unittest.TestCase):
         self.DL = FakeYDL()
         self.ie = self.IE()
         self.DL.add_info_extractor(self.ie)
+        if not self.IE.working():
+            print('Skipping: %s marked as not _WORKING' % self.IE.ie_key())
+            self.skipTest('IE marked as not _WORKING')
 
     def getInfoDict(self):
         info_dict = self.DL.extract_info(self.url, download=False)

From 3aa94d7945dfaa0e04acf2700ffe0e43b00db498 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 8 Jun 2022 23:11:33 +0100
Subject: [PATCH 090/312] [test] Fix workable subtitle tests (except YT) and
 mark others as skip, broken

* broken tests need to be fixed when fixing the respective IE
---
 test/test_subtitles.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index c250473be..23cf06e09 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -131,6 +131,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
         self.assertFalse(subtitles)
 
 
+@unittest.skip('IE broken')
 class TestTedSubtitles(BaseTestSubtitles):
     url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
     IE = TEDIE
@@ -155,18 +156,19 @@ class TestVimeoSubtitles(BaseTestSubtitles):
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
-        self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
-        self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
+        self.assertEqual(md5(subtitles['en']), '386cbc9320b94e25cb364b97935e5dd1')
+        self.assertEqual(md5(subtitles['fr']), 'c9b69eef35bc6641c0d4da8a04f9dfac')
 
     def test_nosubtitles(self):
         self.DL.expect_warning('video doesn\'t have subtitles')
-        self.url = 'http://vimeo.com/56015672'
+        self.url = 'http://vimeo.com/68093876'
         self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         self.assertFalse(subtitles)
 
 
+@unittest.skip('IE broken')
 class TestWallaSubtitles(BaseTestSubtitles):
     url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
     IE = WallaIE
@@ -188,6 +190,7 @@ class TestWallaSubtitles(BaseTestSubtitles):
         self.assertFalse(subtitles)
 
 
+@unittest.skip('IE broken')
 class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
     url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
     IE = CeskaTelevizeIE
@@ -209,6 +212,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
         self.assertFalse(subtitles)
 
 
+@unittest.skip('IE broken')
 class TestLyndaSubtitles(BaseTestSubtitles):
     url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
     IE = LyndaIE
@@ -221,6 +225,7 @@ class TestLyndaSubtitles(BaseTestSubtitles):
         self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
 
 
+@unittest.skip('IE broken')
 class TestNPOSubtitles(BaseTestSubtitles):
     url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
     IE = NPOIE
@@ -233,6 +238,7 @@ class TestNPOSubtitles(BaseTestSubtitles):
         self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
 
 
+@unittest.skip('IE broken')
 class TestMTVSubtitles(BaseTestSubtitles):
     url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
     IE = ComedyCentralIE
@@ -256,8 +262,8 @@ class TestNRKSubtitles(BaseTestSubtitles):
         self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
-        self.assertEqual(set(subtitles.keys()), set(['no']))
-        self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
+        self.assertEqual(set(subtitles.keys()), set(['nb-ttv']))
+        self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
 
 
 class TestRaiPlaySubtitles(BaseTestSubtitles):
@@ -280,6 +286,7 @@ class TestRaiPlaySubtitles(BaseTestSubtitles):
         self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
 
 
+@unittest.skip('IE broken - DRM only')
 class TestVikiSubtitles(BaseTestSubtitles):
     url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
     IE = VikiIE
@@ -306,6 +313,7 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
         self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
 
 
+@unittest.skip('IE broken')
 class TestThePlatformFeedSubtitles(BaseTestSubtitles):
     url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
     IE = ThePlatformFeedIE
@@ -341,7 +349,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         self.assertEqual(set(subtitles.keys()), set(['en']))
-        self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
+        self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
 
     def test_subtitles_in_page(self):
         self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
@@ -349,7 +357,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         self.assertEqual(set(subtitles.keys()), set(['en']))
-        self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
+        self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
 
 
 if __name__ == '__main__':

From 811c480f7b6c25ca510a033e6365d00174135392 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 9 Jun 2022 15:25:23 +0100
Subject: [PATCH 091/312] [YouTube] Support JSON3 subtitle format * subtitle
 tests updated to match

---
 test/test_subtitles.py          | 74 ++++++++++++++++++++++++---------
 youtube_dl/extractor/youtube.py |  2 +-
 2 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 23cf06e09..4cbc69ccd 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -59,6 +59,21 @@ class BaseTestSubtitles(unittest.TestCase):
 
 
 class TestYoutubeSubtitles(BaseTestSubtitles):
+    # Available subtitles for QRS8MkLhQmM:
+    # Language formats
+    # ru       vtt, ttml, srv3, srv2, srv1, json3
+    # fr       vtt, ttml, srv3, srv2, srv1, json3
+    # en       vtt, ttml, srv3, srv2, srv1, json3
+    # nl       vtt, ttml, srv3, srv2, srv1, json3
+    # de       vtt, ttml, srv3, srv2, srv1, json3
+    # ko       vtt, ttml, srv3, srv2, srv1, json3
+    # it       vtt, ttml, srv3, srv2, srv1, json3
+    # zh-Hant  vtt, ttml, srv3, srv2, srv1, json3
+    # hi       vtt, ttml, srv3, srv2, srv1, json3
+    # pt-BR    vtt, ttml, srv3, srv2, srv1, json3
+    # es-MX    vtt, ttml, srv3, srv2, srv1, json3
+    # ja       vtt, ttml, srv3, srv2, srv1, json3
+    # pl       vtt, ttml, srv3, srv2, srv1, json3
     url = 'QRS8MkLhQmM'
     IE = YoutubeIE
 
@@ -67,41 +82,60 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         self.assertEqual(len(subtitles.keys()), 13)
-        self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
-        self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
+        self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
+        self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9')
         for lang in ['fr', 'de']:
             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
 
-    def test_youtube_subtitles_ttml_format(self):
+    def _test_subtitles_format(self, fmt, md5_hash, lang='en'):
         self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitlesformat'] = 'ttml'
+        self.DL.params['subtitlesformat'] = fmt
         subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
+        self.assertEqual(md5(subtitles[lang]), md5_hash)
+
+    def test_youtube_subtitles_ttml_format(self):
+        self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2')
 
     def test_youtube_subtitles_vtt_format(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitlesformat'] = 'vtt'
+        self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d')
+
+    def test_youtube_subtitles_json3_format(self):
+        self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b')
+
+    def _test_automatic_captions(self, url, lang):
+        self.url = url
+        self.DL.params['writeautomaticsub'] = True
+        self.DL.params['subtitleslangs'] = [lang]
         subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
+        self.assertTrue(subtitles[lang] is not None)
 
     def test_youtube_automatic_captions(self):
-        self.url = '8YoUxe5ncPo'
-        self.DL.params['writeautomaticsub'] = True
-        self.DL.params['subtitleslangs'] = ['it']
-        subtitles = self.getSubtitles()
-        self.assertTrue(subtitles['it'] is not None)
+        # Available automatic captions for 8YoUxe5ncPo:
+        # Language formats (all in vtt, ttml, srv3, srv2, srv1, json3)
+        # gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr,
+        # lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da,
+        # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv,
+        # bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy,
+        # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur,
+        # mt, ms, mr, ug, ta, my, af, sw, is, am, 
+        #                                         *it*, iw, sv, ar,
+        # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi,
+        # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl,
+        # ky, sd
+        # ...
+        self._test_automatic_captions('8YoUxe5ncPo', 'it')
 
+    @unittest.skip('ASR subs all in all supported langs now')
     def test_youtube_translated_subtitles(self):
-        # This video has a subtitles track, which can be translated
-        self.url = 'Ky9eprVWzlI'
-        self.DL.params['writeautomaticsub'] = True
-        self.DL.params['subtitleslangs'] = ['it']
-        subtitles = self.getSubtitles()
-        self.assertTrue(subtitles['it'] is not None)
+        # This video has a subtitles track, which can be translated (#4555)
+        self._test_automatic_captions('Ky9eprVWzlI', 'it')
 
     def test_youtube_nosubtitles(self):
         self.DL.expect_warning('video doesn\'t have subtitles')
-        self.url = 'n5BB19UTcdA'
+        # Available automatic captions for 8YoUxe5ncPo:
+        # ...
+        # 8YoUxe5ncPo has no subtitles
+        self.url = '8YoUxe5ncPo'
         self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 9c62b8890..91a3b6058 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -499,7 +499,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
     )
-    _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
+    _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 
     _GEO_BYPASS = False
 

From 0700fde6403aa9eec1ff02bff7323696a205900c Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan@gmail.com>
Date: Sat, 9 Jan 2021 17:56:12 +0530
Subject: [PATCH 092/312] [utils, etc] Kill child processes when yt-dl is
 killed

* derived from PR #26592, closes #26592

Authored by: Unrud
---
 youtube_dl/YoutubeDL.py                    |  3 ++-
 youtube_dl/compat.py                       |  3 ++-
 youtube_dl/downloader/external.py          | 16 ++++++++++------
 youtube_dl/downloader/rtmp.py              | 10 ++++++----
 youtube_dl/extractor/openload.py           |  3 ++-
 youtube_dl/postprocessor/embedthumbnail.py |  5 +++--
 youtube_dl/postprocessor/ffmpeg.py         |  5 +++--
 youtube_dl/utils.py                        | 18 ++++++++++++++----
 8 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 019e309cb..3895b408f 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -73,6 +73,7 @@ from .utils import (
     PostProcessingError,
     preferredencoding,
     prepend_extension,
+    process_communicate_or_kill,
     register_socks_protocols,
     render_table,
     replace_extension,
@@ -2323,7 +2324,7 @@ class YoutubeDL(object):
                 ['git', 'rev-parse', '--short', 'HEAD'],
                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                 cwd=os.path.dirname(os.path.abspath(__file__)))
-            out, err = sp.communicate()
+            out, err = process_communicate_or_kill(sp)
             out = out.decode().strip()
             if re.match('[0-9a-f]+', out):
                 self._write_string('[debug] Git HEAD: ' + out + '\n')
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 2004a405a..9f5f85dae 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2890,6 +2890,7 @@ else:
     _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
 
     def compat_get_terminal_size(fallback=(80, 24)):
+        from .utils import process_communicate_or_kill
         columns = compat_getenv('COLUMNS')
         if columns:
             columns = int(columns)
@@ -2906,7 +2907,7 @@ else:
                 sp = subprocess.Popen(
                     ['stty', 'size'],
                     stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-                out, err = sp.communicate()
+                out, err = process_communicate_or_kill(sp)
                 _lines, _columns = map(int, out.split())
             except Exception:
                 _columns, _lines = _terminal_size(*fallback)
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index c31f8910a..a06ab2e50 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -22,6 +22,7 @@ from ..utils import (
     handle_youtubedl_headers,
     check_executable,
     is_outdated_version,
+    process_communicate_or_kill,
 )
 
 
@@ -104,7 +105,7 @@ class ExternalFD(FileDownloader):
 
         p = subprocess.Popen(
             cmd, stderr=subprocess.PIPE)
-        _, stderr = p.communicate()
+        _, stderr = process_communicate_or_kill(p)
         if p.returncode != 0:
             self.to_stderr(stderr.decode('utf-8', 'replace'))
         return p.returncode
@@ -141,7 +142,7 @@ class CurlFD(ExternalFD):
 
         # curl writes the progress to stderr so don't capture it.
         p = subprocess.Popen(cmd)
-        p.communicate()
+        process_communicate_or_kill(p)
         return p.returncode
 
 
@@ -336,14 +337,17 @@ class FFmpegFD(ExternalFD):
         proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
         try:
             retval = proc.wait()
-        except KeyboardInterrupt:
-            # subprocces.run would send the SIGKILL signal to ffmpeg and the
+        except BaseException as e:
+            # subprocess.run would send the SIGKILL signal to ffmpeg and the
             # mp4 file couldn't be played, but if we ask ffmpeg to quit it
             # produces a file that is playable (this is mostly useful for live
             # streams). Note that Windows is not affected and produces playable
             # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
-            if sys.platform != 'win32':
-                proc.communicate(b'q')
+            if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
+                process_communicate_or_kill(proc, b'q')
+            else:
+                proc.kill()
+                proc.wait()
             raise
         return retval
 
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py
index fbb7f51b0..8a25dbc8d 100644
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -89,11 +89,13 @@ class RtmpFD(FileDownloader):
                                 self.to_screen('')
                             cursor_in_new_line = True
                             self.to_screen('[rtmpdump] ' + line)
-            finally:
+                if not cursor_in_new_line:
+                    self.to_screen('')
+                return proc.wait()
+            except BaseException:  # Including KeyboardInterrupt
+                proc.kill()
                 proc.wait()
-            if not cursor_in_new_line:
-                self.to_screen('')
-            return proc.returncode
+                raise
 
         url = info_dict['url']
         player_url = info_dict.get('player_url')
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py
index 0c20d0177..b05d60435 100644
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@@ -16,6 +16,7 @@ from ..utils import (
     ExtractorError,
     get_exe_version,
     is_outdated_version,
+    process_communicate_or_kill,
     std_headers,
 )
 
@@ -226,7 +227,7 @@ class PhantomJSwrapper(object):
             self.exe, '--ssl-protocol=any',
             self._TMP_FILES['script'].name
         ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        out, err = p.communicate()
+        out, err = process_communicate_or_kill(p)
         if p.returncode != 0:
             raise ExtractorError(
                 'Executing JS failed\n:' + encodeArgument(err))
diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py
index 3990908b6..5e7b6e2df 100644
--- a/youtube_dl/postprocessor/embedthumbnail.py
+++ b/youtube_dl/postprocessor/embedthumbnail.py
@@ -13,8 +13,9 @@ from ..utils import (
     encodeFilename,
     PostProcessingError,
     prepend_extension,
+    process_communicate_or_kill,
     replace_extension,
-    shell_quote
+    shell_quote,
 )
 
 
@@ -109,7 +110,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
                 self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
 
             p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            stdout, stderr = p.communicate()
+            stdout, stderr = process_communicate_or_kill(p)
 
             if p.returncode != 0:
                 msg = stderr.decode('utf-8', 'replace').strip()
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 9f76c9d4e..8c29c8d59 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -16,6 +16,7 @@ from ..utils import (
     is_outdated_version,
     PostProcessingError,
     prepend_extension,
+    process_communicate_or_kill,
     shell_quote,
     subtitles_filename,
     dfxp2srt,
@@ -180,7 +181,7 @@ class FFmpegPostProcessor(PostProcessor):
             handle = subprocess.Popen(
                 cmd, stderr=subprocess.PIPE,
                 stdout=subprocess.PIPE, stdin=subprocess.PIPE)
-            stdout_data, stderr_data = handle.communicate()
+            stdout_data, stderr_data = process_communicate_or_kill(handle)
             expected_ret = 0 if self.probe_available else 1
             if handle.wait() != expected_ret:
                 return None
@@ -228,7 +229,7 @@ class FFmpegPostProcessor(PostProcessor):
         if self._downloader.params.get('verbose', False):
             self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
-        stdout, stderr = p.communicate()
+        stdout, stderr = process_communicate_or_kill(p)
         if p.returncode != 0:
             stderr = stderr.decode('utf-8', 'replace')
             msgs = stderr.strip().split('\n')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 8aa2a43a2..4e00317f1 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2212,6 +2212,15 @@ def unescapeHTML(s):
         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 
 
+def process_communicate_or_kill(p, *args, **kwargs):
+    try:
+        return p.communicate(*args, **kwargs)
+    except BaseException:  # Including KeyboardInterrupt
+        p.kill()
+        p.wait()
+        raise
+
+
 def get_subprocess_encoding():
     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
         # For subprocess calls, encode with locale encoding
@@ -3788,7 +3797,8 @@ def check_executable(exe, args=[]):
     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
     args can be a list of arguments for a short output (like -version) """
     try:
-        subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
+        process_communicate_or_kill(subprocess.Popen(
+            [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
     except OSError:
         return False
     return exe
@@ -3802,10 +3812,10 @@ def get_exe_version(exe, args=['--version'],
         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
         # SIGTTOU if youtube-dl is run in the background.
         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
-        out, _ = subprocess.Popen(
+        out, _ = process_communicate_or_kill(subprocess.Popen(
             [encodeArgument(exe)] + args,
             stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
+            stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
     except OSError:
         return False
     if isinstance(out, bytes):  # Python 2.x
@@ -5744,7 +5754,7 @@ def write_xattr(path, key, value):
                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
                 except EnvironmentError as e:
                     raise XAttrMetadataError(e.errno, e.strerror)
-                stdout, stderr = p.communicate()
+                stdout, stderr = process_communicate_or_kill(p)
                 stderr = stderr.decode('utf-8', 'replace')
                 if p.returncode != 0:
                     raise XAttrMetadataError(p.returncode, stderr)

From cc179df346abf34c8f77dbb221b839092007f20c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 12 Jun 2022 14:10:38 +0100
Subject: [PATCH 093/312] [XHamster] Support xhday.com alias, extract
 `uploader_id` * support xhday.com alias for xhamster.com (resolves #31023)  
 Authored by: dirkf * extract `uploader_id`:   from
 https://github.com/yt-dlp/yt-dlp/commit/908b56eaf7872149706dbd7fa071f838d0c786b7
   (PR https://github.com/yt-dlp/yt-dlp/pull/844)   Authored by: octotherp

---
 youtube_dl/extractor/xhamster.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index f73b9778f..f764021ba 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import itertools
@@ -23,7 +24,7 @@ from ..utils import (
 
 
 class XHamsterIE(InfoExtractor):
-    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)'
+    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)'
     _VALID_URL = r'''(?x)
                     https?://
                         (?:.+?\.)?%s/
@@ -34,7 +35,7 @@ class XHamsterIE(InfoExtractor):
                     ''' % _DOMAINS
     _TESTS = [{
         'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
-        'md5': '98b4687efb1ffd331c4197854dc09e8f',
+        'md5': '34e1ab926db5dc2750fed9e1f34304bb',
         'info_dict': {
             'id': '1509445',
             'display_id': 'femaleagent-shy-beauty-takes-the-bait',
@@ -43,6 +44,7 @@ class XHamsterIE(InfoExtractor):
             'timestamp': 1350194821,
             'upload_date': '20121014',
             'uploader': 'Ruseful2011',
+            'uploader_id': 'ruseful2011',
             'duration': 893,
             'age_limit': 18,
         },
@@ -72,6 +74,7 @@ class XHamsterIE(InfoExtractor):
             'timestamp': 1454948101,
             'upload_date': '20160208',
             'uploader': 'parejafree',
+            'uploader_id': 'parejafree',
             'duration': 72,
             'age_limit': 18,
         },
@@ -117,6 +120,9 @@ class XHamsterIE(InfoExtractor):
     }, {
         'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx',
         'only_matching': True,
+    }, {
+        'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -245,6 +251,7 @@ class XHamsterIE(InfoExtractor):
             else:
                 categories = None
 
+            uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL']))
             return {
                 'id': video_id,
                 'display_id': display_id,
@@ -253,6 +260,8 @@ class XHamsterIE(InfoExtractor):
                 'timestamp': int_or_none(video.get('created')),
                 'uploader': try_get(
                     video, lambda x: x['author']['name'], compat_str),
+                'uploader_url': uploader_url,
+                'uploader_id': uploader_url.split('/')[-1] if uploader_url else None,
                 'thumbnail': video.get('thumbURL'),
                 'duration': int_or_none(video.get('duration')),
                 'view_count': int_or_none(video.get('views')),
@@ -261,7 +270,7 @@ class XHamsterIE(InfoExtractor):
                 'dislike_count': int_or_none(try_get(
                     video, lambda x: x['rating']['dislikes'], int)),
                 'comment_count': int_or_none(video.get('views')),
-                'age_limit': age_limit,
+                'age_limit': age_limit if age_limit is not None else 18,
                 'categories': categories,
                 'formats': formats,
             }
@@ -352,6 +361,7 @@ class XHamsterIE(InfoExtractor):
             'description': description,
             'upload_date': upload_date,
             'uploader': uploader,
+            'uploader_id': uploader.lower() if uploader else None,
             'thumbnail': thumbnail,
             'duration': duration,
             'view_count': view_count,
@@ -420,6 +430,9 @@ class XHamsterUserIE(InfoExtractor):
             'id': 'firatkaan',
         },
         'playlist_mincount': 1,
+    }, {
+        'url': 'https://xhday.com/users/mobhunter',
+        'only_matching': True,
     }]
 
     def _entries(self, user_id):

From 11665dd2367a2eefd1ad090828f987fef11226e4 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 15 Jun 2022 18:26:54 +0100
Subject: [PATCH 094/312] [test] Fix linter for
 3aa94d7945dfaa0e04acf2700ffe0e43b00db498

---
 test/test_download.py  | 1 +
 test/test_subtitles.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test_download.py b/test/test_download.py
index 0951a171a..6a6673bc2 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -88,6 +88,7 @@ class TestDownload(unittest.TestCase):
 
 # Dynamically generate tests
 
+
 def generator(test_case, tname):
 
     def test_template(self):
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 4cbc69ccd..1197721ff 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -117,7 +117,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
         # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv,
         # bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy,
         # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur,
-        # mt, ms, mr, ug, ta, my, af, sw, is, am, 
+        # mt, ms, mr, ug, ta, my, af, sw, is, am,
         #                                         *it*, iw, sv, ar,
         # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi,
         # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl,

From 8a158a936c8b002ef536e9e2b778ded02c09c0fa Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 14 Jun 2022 19:45:34 +0100
Subject: [PATCH 095/312] [NHK] Use new API URL

---
 youtube_dl/extractor/nhk.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py
index 46a800e7e..f43d91cd5 100644
--- a/youtube_dl/extractor/nhk.py
+++ b/youtube_dl/extractor/nhk.py
@@ -8,7 +8,7 @@ from ..utils import urljoin
 
 
 class NhkBaseIE(InfoExtractor):
-    _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json'
+    _API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
     _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
     _TYPE_REGEX = r'/(?P<type>video|audio)/'
 

From a03b9775d544b06a5b4f2aa630214c7c22fc2229 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 26 Jun 2022 14:18:33 +0100
Subject: [PATCH 096/312] [Mediaset] Support player version number in URL
 pattern

Ref: https://github.com/yt-dlp/yt-dlp/issues/4141
---
 youtube_dl/extractor/mediaset.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py
index 2c16fc9e2..20048c6ab 100644
--- a/youtube_dl/extractor/mediaset.py
+++ b/youtube_dl/extractor/mediaset.py
@@ -24,7 +24,7 @@ class MediasetIE(ThePlatformBaseIE):
                             (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
                             (?:
                                 (?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
-                                player/index\.html\?.*?\bprogramGuid=
+                                player(?:/v\d+)?/index\.html\?.*?\bprogramGuid=
                             )
                     )(?P<id>[0-9A-Z]{16,})
                     '''
@@ -73,6 +73,10 @@ class MediasetIE(ThePlatformBaseIE):
         # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
         'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
         'only_matching': True,
+    }, {
+        # embedUrl (from https://www.wittytv.it/amici/est-ce-que-tu-maimes-gabriele-5-dicembre-copia/)
+        'url': 'https://static3.mediasetplay.mediaset.it/player/v2/index.html?partnerId=wittytv&configId=&programGuid=FD00000000153323&autoplay=true&purl=http://www.wittytv.it/amici/est-ce-que-tu-maimes-gabriele-5-dicembre-copia/',
+        'only_matching': True,
     }, {
         'url': 'mediaset:FAFU000000665924',
         'only_matching': True,

From 090acd58c1d810fbef1bac08d70bbfad9c0a7504 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 3 Jul 2022 20:05:21 +0100
Subject: [PATCH 097/312] [options] Improve be35e53 (--match-/reject-title
 parameter value)

Resolves #31064.
---
 youtube_dl/options.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 6521ad881..f6621ef91 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -270,11 +270,11 @@ def parseOpts(overrideArguments=None):
     selection.add_option(
         '--match-title',
         dest='matchtitle', metavar='REGEX',
-        help='Download only matching titles (case-insensitive regex or sub-string)')
+        help='Download only matching titles (case-insensitive regex or alphanumeric sub-string)')
     selection.add_option(
         '--reject-title',
         dest='rejecttitle', metavar='REGEX',
-        help='Skip download for matching titles (case-insensitive regex or sub-string)')
+        help='Skip download for matching titles (case-insensitive regex or alphanumeric sub-string)')
     selection.add_option(
         '--max-downloads',
         dest='max_downloads', metavar='NUMBER', type=int, default=None,

From 5f5c127ece74e52aa5b49b6d2941cc0f848d3c36 Mon Sep 17 00:00:00 2001
From: Kyraminol Endyeran <kyraminari@gmail.com>
Date: Tue, 12 Jul 2022 01:35:40 +0200
Subject: [PATCH 098/312] [VVVVID] Support video/dash types (#31060)

Resolves #31030.
---
 youtube_dl/extractor/vvvvid.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/youtube_dl/extractor/vvvvid.py b/youtube_dl/extractor/vvvvid.py
index bc196f8a0..6a0d4e8f0 100644
--- a/youtube_dl/extractor/vvvvid.py
+++ b/youtube_dl/extractor/vvvvid.py
@@ -64,6 +64,18 @@ class VVVVIDIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        # video_type == 'video/dash'
+        'url': 'https://www.vvvvid.it/show/683/made-in-abyss/1542/693786/nanachi',
+        'info_dict': {
+            'id': '693786',
+            'ext': 'mp4',
+            'title': 'Nanachi',
+        },
+        'params': {
+            'skip_download': True,
+            'format': 'mp4',
+        },
     }, {
         'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
         'only_matching': True
@@ -205,6 +217,9 @@ class VVVVIDIE(InfoExtractor):
                 })
                 is_youtube = True
                 break
+            elif video_type == 'video/dash':
+                formats.extend(self._extract_m3u8_formats(
+                    embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
             else:
                 formats.extend(self._extract_wowza_formats(
                     'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))

From adb5294177265ba35b45746dbb600965076ed150 Mon Sep 17 00:00:00 2001
From: Wes <morganw@gmail.com>
Date: Fri, 29 Jul 2022 20:10:00 -0500
Subject: [PATCH 099/312] [aenetworks] Update _THEPLATFORM_KEY and
 _THEPLATFORM_SECRET (#29749)

Fixes ytdl-org/youtube-dl#29300
---
 youtube_dl/extractor/aenetworks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py
index e55c03fd7..2a1f08e39 100644
--- a/youtube_dl/extractor/aenetworks.py
+++ b/youtube_dl/extractor/aenetworks.py
@@ -20,8 +20,8 @@ class AENetworksBaseIE(ThePlatformIE):
             (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
             fyi\.tv
         )/'''
-    _THEPLATFORM_KEY = 'crazyjava'
-    _THEPLATFORM_SECRET = 's3cr3t'
+    _THEPLATFORM_KEY = '43jXaGRQud'
+    _THEPLATFORM_SECRET = 'S10BPXHMlb'
     _DOMAIN_MAP = {
         'history.com': ('HISTORY', 'history'),
         'aetv.com': ('AETV', 'aetv'),

From deee741fb145360576ceae9d69b1b43db082c404 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 9 Aug 2022 21:05:00 +0100
Subject: [PATCH 100/312] [test, etc] Improve download test logs; also clean up
 some new flake8 issues (#31153)

* [test] Identify testcase errors better
* [test] Identify download errors better
* [extractor/minds] Linter
* [extractor/aes] Linter
---
 test/test_download.py         | 7 +++++--
 youtube_dl/aes.py             | 2 +-
 youtube_dl/extractor/minds.py | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/test/test_download.py b/test/test_download.py
index 6a6673bc2..19936969f 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -33,6 +33,7 @@ from youtube_dl.compat import (
 from youtube_dl.utils import (
     DownloadError,
     ExtractorError,
+    error_to_compat_str,
     format_bytes,
     UnavailableVideoError,
 )
@@ -108,7 +109,7 @@ def generator(test_case, tname):
         for tc in test_cases:
             info_dict = tc.get('info_dict', {})
             if not (info_dict.get('id') and info_dict.get('ext')):
-                raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
+                raise Exception('Test definition (%s) requires both \'id\' and \'ext\' keys present to define the output file' % (tname, ))
 
         if 'skip' in test_case:
             print_skipping(test_case['skip'])
@@ -161,7 +162,9 @@ def generator(test_case, tname):
                 except (DownloadError, ExtractorError) as err:
                     # Check if the exception is not a network related one
                     if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
-                        raise
+                        msg = getattr(err, 'msg', error_to_compat_str(err))
+                        err.msg = '%s (%s)' % (msg, tname, )
+                        raise err
 
                     if try_num == RETRIES:
                         report_warning('%s failed due to network errors, skipping...' % tname)
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py
index 461bb6d41..d0de2d93f 100644
--- a/youtube_dl/aes.py
+++ b/youtube_dl/aes.py
@@ -303,7 +303,7 @@ def xor(data1, data2):
 
 
 def rijndael_mul(a, b):
-    if(a == 0 or b == 0):
+    if (a == 0 or b == 0):
         return 0
     return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
 
diff --git a/youtube_dl/extractor/minds.py b/youtube_dl/extractor/minds.py
index 8e9f0f825..e8fd582aa 100644
--- a/youtube_dl/extractor/minds.py
+++ b/youtube_dl/extractor/minds.py
@@ -78,7 +78,7 @@ class MindsIE(MindsBaseIE):
             else:
                 return self.url_result(entity['perma_url'])
         else:
-            assert(entity['subtype'] == 'video')
+            assert (entity['subtype'] == 'video')
             video_id = entity_id
         # 1080p and webm formats available only on the sources array
         video = self._call_api(

From e6a836d54ca1d3cd02f3ee45ef707a46f23e8291 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 10 Aug 2022 15:37:59 +0100
Subject: [PATCH 101/312] [core] Make `--max-downloads ...` stop immediately on
 reaching the limit

Based on and closes #26638.
---
 youtube_dl/YoutubeDL.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 3895b408f..e77b8d50c 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1779,10 +1779,9 @@ class YoutubeDL(object):
 
         assert info_dict.get('_type', 'video') == 'video'
 
-        max_downloads = self.params.get('max_downloads')
-        if max_downloads is not None:
-            if self._num_downloads >= int(max_downloads):
-                raise MaxDownloadsReached()
+        max_downloads = int_or_none(self.params.get('max_downloads')) or float('inf')
+        if self._num_downloads >= max_downloads:
+            raise MaxDownloadsReached()
 
         # TODO: backward compatibility, to be removed
         info_dict['fulltitle'] = info_dict['title']
@@ -2062,6 +2061,9 @@ class YoutubeDL(object):
                     self.report_error('postprocessing: %s' % str(err))
                     return
                 self.record_download_archive(info_dict)
+                # avoid possible nugatory search for further items (PR #26638)
+                if self._num_downloads >= max_downloads:
+                    raise MaxDownloadsReached()
 
     def download(self, url_list):
         """Download a given list of URLs."""

From d231b56717c73ee597d2e077d11b69ed48a1b02d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 14 Aug 2022 18:45:45 +0100
Subject: [PATCH 102/312] [jsinterp] Overhaul JSInterp to handle new YT players
 4c3f79c5, 324f67b9 (#31170)

* back-port from yt-dlp 8f53dc44a0cc1c2d98c35740b9293462c080f5d0, thanks pukkandan
* also support void, improve <</>> precedence, improve expressions in comma-list
* add more tests
---
 test/test_jsinterp.py          |  49 ++-
 test/test_utils.py             |   3 +
 test/test_youtube_signature.py |  13 +
 youtube_dl/compat.py           |  54 ++-
 youtube_dl/jsinterp.py         | 581 ++++++++++++++++++++-------------
 youtube_dl/utils.py            |  47 ++-
 6 files changed, 500 insertions(+), 247 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index acdabffb1..c6c931743 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -19,6 +19,9 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function x3(){return 42;}')
         self.assertEqual(jsi.call_function('x3'), 42)
 
+        jsi = JSInterpreter('function x3(){42}')
+        self.assertEqual(jsi.call_function('x3'), None)
+
         jsi = JSInterpreter('var x5 = function(){return 42;}')
         self.assertEqual(jsi.call_function('x5'), 42)
 
@@ -51,8 +54,11 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function f(){return 11 >> 2;}')
         self.assertEqual(jsi.call_function('f'), 2)
 
+        jsi = JSInterpreter('function f(){return []? 2+3: 4;}')
+        self.assertEqual(jsi.call_function('f'), 5)
+
     def test_array_access(self):
-        jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;}')
+        jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
         self.assertEqual(jsi.call_function('f'), [5, 2, 7])
 
     def test_parens(self):
@@ -62,6 +68,10 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
         self.assertEqual(jsi.call_function('f'), 9)
 
+    def test_quotes(self):
+        jsi = JSInterpreter(r'function f(){return "a\"\\("}')
+        self.assertEqual(jsi.call_function('f'), r'a"\(')
+
     def test_assignments(self):
         jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
         self.assertEqual(jsi.call_function('f'), 31)
@@ -104,18 +114,29 @@ class TestJSInterpreter(unittest.TestCase):
         }''')
         self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
 
+    def test_builtins(self):
+        jsi = JSInterpreter('''
+        function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 86000)
+        jsi = JSInterpreter('''
+        function x(dt) { return new Date(dt) - 0; }
+        ''')
+        self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
+
     def test_call(self):
         jsi = JSInterpreter('''
         function x() { return 2; }
-        function y(a) { return x() + a; }
+        function y(a) { return x() + (a?a:0); }
         function z() { return y(3); }
         ''')
         self.assertEqual(jsi.call_function('z'), 5)
+        self.assertEqual(jsi.call_function('y'), 2)
 
     def test_for_loop(self):
         # function x() { a=0; for (i=0; i-10; i++) {a++} a }
         jsi = JSInterpreter('''
-        function x() { a=0; for (i=0; i-10; i = i + 1) {a++} a }
+        function x() { a=0; for (i=0; i-10; i++) {a++} return a }
         ''')
         self.assertEqual(jsi.call_function('x'), 10)
 
@@ -156,19 +177,19 @@ class TestJSInterpreter(unittest.TestCase):
 
     def test_for_loop_continue(self):
         jsi = JSInterpreter('''
-        function x() { a=0; for (i=0; i-10; i++) { continue; a++ } a }
+        function x() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }
         ''')
         self.assertEqual(jsi.call_function('x'), 0)
 
     def test_for_loop_break(self):
         jsi = JSInterpreter('''
-        function x() { a=0; for (i=0; i-10; i++) { break; a++ } a }
+        function x() { a=0; for (i=0; i-10; i++) { break; a++ } return a }
         ''')
         self.assertEqual(jsi.call_function('x'), 0)
 
     def test_literal_list(self):
         jsi = JSInterpreter('''
-        function x() { [1, 2, "asdf", [5, 6, 7]][3] }
+        function x() { return [1, 2, "asdf", [5, 6, 7]][3] }
         ''')
         self.assertEqual(jsi.call_function('x'), [5, 6, 7])
 
@@ -177,6 +198,22 @@ class TestJSInterpreter(unittest.TestCase):
         function x() { a=5; a -= 1, a+=3; return a }
         ''')
         self.assertEqual(jsi.call_function('x'), 7)
+        jsi = JSInterpreter('''
+        function x() { a=5; return (a -= 1, a+=3, a); }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 7)
+
+    def test_void(self):
+        jsi = JSInterpreter('''
+        function x() { return void 42; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), None)
+
+    def test_return_function(self):
+        jsi = JSInterpreter('''
+        function x() { return [1, function(){return 1}][1] }
+        ''')
+        self.assertEqual(jsi.call_function('x')([]), 1)
 
 
 if __name__ == '__main__':
diff --git a/test/test_utils.py b/test/test_utils.py
index 259c4763e..f1a748dde 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -370,6 +370,9 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
         self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
         self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
+        self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1)
+        self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
+        self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78)
 
     def test_determine_ext(self):
         self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index fc5e9828e..6e955e0f0 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -90,12 +90,25 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
         'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
     ),
+    (
+        'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js',
+        'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA',
+    ),
+    (
+        'https://www.youtube.com/s/player/324f67b9/player_ias.vflset/en_US/base.js',
+        'xdftNy7dh9QGnhW', '22qLGxrmX8F1rA',
+    ),
+    (
+        'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js',
+        'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw',
+    ),
 ]
 
 
 class TestPlayerInfo(unittest.TestCase):
     def test_youtube_extract_player_info(self):
         PLAYER_URLS = (
+            ('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'),
             ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
             ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'),
             ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 9f5f85dae..6d2c31a61 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2985,7 +2985,6 @@ except ImportError:
     except ImportError:
         compat_filter = filter
 
-
 try:
     from future_builtins import zip as compat_zip
 except ImportError:  # not 2.6+ or is 3.x
@@ -2995,6 +2994,57 @@ except ImportError:  # not 2.6+ or is 3.x
         compat_zip = zip
 
 
+# method renamed between Py2/3
+try:
+    from itertools import zip_longest as compat_itertools_zip_longest
+except ImportError:
+    from itertools import izip_longest as compat_itertools_zip_longest
+
+
+# new class in collections
+try:
+    from collections import ChainMap as compat_collections_chain_map
+except ImportError:
+    # Py < 3.3
+    class compat_collections_chain_map(compat_collections_abc.MutableMapping):
+
+        maps = [{}]
+
+        def __init__(self, *maps):
+            self.maps = list(maps) or [{}]
+
+        def __getitem__(self, k):
+            for m in self.maps:
+                if k in m:
+                    return m[k]
+            raise KeyError(k)
+
+        def __setitem__(self, k, v):
+            self.maps[0].__setitem__(k, v)
+            return
+
+        def __delitem__(self, k):
+            if k in self.maps[0]:
+                del self.maps[0][k]
+                return
+            raise KeyError(k)
+
+        def __iter__(self):
+            return itertools.chain(*reversed(self.maps))
+
+        def __len__(self):
+            return len(iter(self))
+
+        def new_child(self, m=None, **kwargs):
+            m = m or {}
+            m.update(kwargs)
+            return compat_collections_chain_map(m, *self.maps)
+
+        @property
+        def parents(self):
+            return compat_collections_chain_map(*(self.maps[1:]))
+
+
 if sys.version_info < (3, 3):
     def compat_b64decode(s, *args, **kwargs):
         if isinstance(s, compat_str):
@@ -3031,6 +3081,7 @@ __all__ = [
     'compat_basestring',
     'compat_chr',
     'compat_collections_abc',
+    'compat_collections_chain_map',
     'compat_cookiejar',
     'compat_cookiejar_Cookie',
     'compat_cookies',
@@ -3051,6 +3102,7 @@ __all__ = [
     'compat_input',
     'compat_integer_types',
     'compat_itertools_count',
+    'compat_itertools_zip_longest',
     'compat_kwargs',
     'compat_map',
     'compat_numeric_types',
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 8eaa911cd..c60a9b3c2 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -1,42 +1,87 @@
 from __future__ import unicode_literals
 
+import itertools
 import json
+import math
 import operator
 import re
 
 from .utils import (
+    NO_DEFAULT,
     ExtractorError,
+    js_to_json,
     remove_quotes,
+    unified_timestamp,
 )
 from .compat import (
-    compat_collections_abc,
+    compat_collections_chain_map as ChainMap,
+    compat_itertools_zip_longest as zip_longest,
     compat_str,
 )
-MutableMapping = compat_collections_abc.MutableMapping
 
+_NAME_RE = r'[a-zA-Z_$][\w$]*'
 
-class Nonlocal:
-    pass
+# (op, definition) in order of binding priority, tightest first
+# avoid dict to maintain order
+# definition None => Defined in JSInterpreter._operator
+_DOT_OPERATORS = (
+    ('.', None),
+    # TODO: ('?.', None),
+)
 
-
-_OPERATORS = [
+_OPERATORS = (
     ('|', operator.or_),
     ('^', operator.xor),
     ('&', operator.and_),
     ('>>', operator.rshift),
     ('<<', operator.lshift),
-    ('-', operator.sub),
     ('+', operator.add),
-    ('%', operator.mod),
-    ('/', operator.truediv),
+    ('-', operator.sub),
     ('*', operator.mul),
-]
-_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
-_ASSIGN_OPERATORS.append(('=', (lambda cur, right: right)))
+    ('/', operator.truediv),
+    ('%', operator.mod),
+)
 
-_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
+_COMP_OPERATORS = (
+    ('===', operator.is_),
+    ('==', operator.eq),
+    ('!==', operator.is_not),
+    ('!=', operator.ne),
+    ('<=', operator.le),
+    ('>=', operator.ge),
+    ('<', operator.lt),
+    ('>', operator.gt),
+)
+
+_LOG_OPERATORS = (
+    ('&', operator.and_),
+    ('|', operator.or_),
+    ('^', operator.xor),
+)
+
+_SC_OPERATORS = (
+    ('?', None),
+    ('||', None),
+    ('&&', None),
+    # TODO: ('??', None),
+)
+
+_OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS))
 
 _MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
+_QUOTES = '\'"'
+
+
+def _ternary(cndn, if_true=True, if_false=False):
+    """Simulate JS's ternary operator (cndn?if_true:if_false)"""
+    if cndn in (False, None, 0, ''):
+        return if_false
+    try:
+        if math.isnan(cndn):  # NB: NaN cannot be checked by membership
+            return if_false
+    except TypeError:
+        pass
+    return if_true
 
 
 class JS_Break(ExtractorError):
@@ -49,70 +94,77 @@ class JS_Continue(ExtractorError):
         ExtractorError.__init__(self, 'Invalid continue')
 
 
-class LocalNameSpace(MutableMapping):
-    def __init__(self, *stack):
-        self.stack = tuple(stack)
-
-    def __getitem__(self, key):
-        for scope in self.stack:
-            if key in scope:
-                return scope[key]
-        raise KeyError(key)
-
+class LocalNameSpace(ChainMap):
     def __setitem__(self, key, value):
-        for scope in self.stack:
+        for scope in self.maps:
             if key in scope:
                 scope[key] = value
-                break
-        else:
-            self.stack[0][key] = value
-        return value
+                return
+        self.maps[0][key] = value
 
     def __delitem__(self, key):
         raise NotImplementedError('Deleting is not supported')
 
-    def __iter__(self):
-        for scope in self.stack:
-            for scope_item in iter(scope):
-                yield scope_item
-
-    def __len__(self, key):
-        return len(iter(self))
-
     def __repr__(self):
-        return 'LocalNameSpace%s' % (self.stack, )
+        return 'LocalNameSpace%s' % (self.maps, )
 
 
 class JSInterpreter(object):
+    __named_object_counter = 0
+
     def __init__(self, code, objects=None):
-        if objects is None:
-            objects = {}
-        self.code = code
-        self._functions = {}
-        self._objects = objects
-        self.__named_object_counter = 0
+        self.code, self._functions = code, {}
+        self._objects = {} if objects is None else objects
+
+    class Exception(ExtractorError):
+        def __init__(self, msg, *args, **kwargs):
+            expr = kwargs.pop('expr', None)
+            if expr is not None:
+                msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
+            super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
 
     def _named_object(self, namespace, obj):
         self.__named_object_counter += 1
-        name = '__youtube_dl_jsinterp_obj%s' % (self.__named_object_counter, )
+        name = '__youtube_dl_jsinterp_obj%d' % (self.__named_object_counter, )
         namespace[name] = obj
         return name
 
     @staticmethod
-    def _separate(expr, delim=',', max_split=None):
+    def _separate(expr, delim=',', max_split=None, skip_delims=None):
         if not expr:
             return
         counters = {k: 0 for k in _MATCHING_PARENS.values()}
-        start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
+        start, splits, pos, skipping, delim_len = 0, 0, 0, 0, len(delim) - 1
+        in_quote, escaping = None, False
         for idx, char in enumerate(expr):
-            if char in _MATCHING_PARENS:
-                counters[_MATCHING_PARENS[char]] += 1
-            elif char in counters:
-                counters[char] -= 1
-            if char != delim[pos] or any(counters.values()):
-                pos = 0
+            if not in_quote:
+                if char in _MATCHING_PARENS:
+                    counters[_MATCHING_PARENS[char]] += 1
+                elif char in counters:
+                    counters[char] -= 1
+            if not escaping:
+                if char in _QUOTES and in_quote in (char, None):
+                    in_quote = None if in_quote else char
+                else:
+                    escaping = in_quote and char == '\\'
+            else:
+                escaping = False
+
+            if char != delim[pos] or any(counters.values()) or in_quote:
+                pos = skipping = 0
                 continue
-            elif pos != delim_len:
+            elif skipping > 0:
+                skipping -= 1
+                continue
+            elif pos == 0 and skip_delims:
+                here = expr[idx:]
+                for s in skip_delims if isinstance(skip_delims, (list, tuple)) else [skip_delims]:
+                    if here.startswith(s) and s:
+                        skipping = len(s) - 1
+                        break
+                if skipping > 0:
+                    continue
+            if pos < delim_len:
                 pos += 1
                 continue
             yield expr[start: idx - delim_len]
@@ -122,61 +174,108 @@ class JSInterpreter(object):
                 break
         yield expr[start:]
 
-    @staticmethod
-    def _separate_at_paren(expr, delim):
-        separated = list(JSInterpreter._separate(expr, delim, 1))
+    @classmethod
+    def _separate_at_paren(cls, expr, delim):
+        separated = list(cls._separate(expr, delim, 1))
+
         if len(separated) < 2:
-            raise ExtractorError('No terminating paren {0} in {1}'.format(delim, expr))
+            raise cls.Exception('No terminating paren {delim} in {expr}'.format(**locals()))
         return separated[0][1:].strip(), separated[1].strip()
 
+    @staticmethod
+    def _all_operators():
+        return itertools.chain(
+            _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS)
+
+    def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
+        if op in ('||', '&&'):
+            if (op == '&&') ^ _ternary(left_val):
+                return left_val  # short circuiting
+        elif op == '?':
+            right_expr = _ternary(left_val, *self._separate(right_expr, ':', 1))
+
+        right_val = self.interpret_expression(right_expr, local_vars, allow_recursion)
+        opfunc = op and next((v for k, v in self._all_operators() if k == op), None)
+        if not opfunc:
+            return right_val
+
+        try:
+            return opfunc(left_val, right_val)
+        except Exception as e:
+            raise self.Exception('Failed to evaluate {left_val!r} {op} {right_val!r}'.format(**locals()), expr, cause=e)
+
+    def _index(self, obj, idx):
+        if idx == 'length':
+            return len(obj)
+        try:
+            return obj[int(idx)] if isinstance(obj, list) else obj[idx]
+        except Exception as e:
+            raise self.Exception('Cannot get index {idx}'.format(**locals()), expr=repr(obj), cause=e)
+
+    def _dump(self, obj, namespace):
+        try:
+            return json.dumps(obj)
+        except TypeError:
+            return self._named_object(namespace, obj)
+
     def interpret_statement(self, stmt, local_vars, allow_recursion=100):
         if allow_recursion < 0:
-            raise ExtractorError('Recursion limit reached')
+            raise self.Exception('Recursion limit reached')
+        allow_recursion -= 1
 
-        sub_statements = list(self._separate(stmt, ';'))
-        stmt = (sub_statements or ['']).pop()
+        should_return = False
+        sub_statements = list(self._separate(stmt, ';')) or ['']
+        expr = stmt = sub_statements.pop().strip()
         for sub_stmt in sub_statements:
-            ret, should_abort = self.interpret_statement(sub_stmt, local_vars, allow_recursion - 1)
-            if should_abort:
-                return ret
+            ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion)
+            if should_return:
+                return ret, should_return
 
-        should_abort = False
-        stmt = stmt.lstrip()
-        stmt_m = re.match(r'var\s', stmt)
-        if stmt_m:
-            expr = stmt[len(stmt_m.group(0)):]
-        else:
-            return_m = re.match(r'return(?:\s+|$)', stmt)
-            if return_m:
-                expr = stmt[len(return_m.group(0)):]
-                should_abort = True
+        m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|$)', stmt)
+        if m:
+            expr = stmt[len(m.group(0)):].strip()
+            should_return = not m.group('var')
+        if not expr:
+            return None, should_return
+
+        if expr[0] in _QUOTES:
+            inner, outer = self._separate(expr, expr[0], 1)
+            inner = json.loads(js_to_json(inner + expr[0]))  # , strict=True))
+            if not outer:
+                return inner, should_return
+            expr = self._named_object(local_vars, inner) + outer
+
+        if expr.startswith('new '):
+            obj = expr[4:]
+            if obj.startswith('Date('):
+                left, right = self._separate_at_paren(obj[4:], ')')
+                left = self.interpret_expression(left, local_vars, allow_recursion)
+                expr = unified_timestamp(left, False)
+                if not expr:
+                    raise self.Exception('Failed to parse date {left!r}'.format(**locals()), expr=expr)
+                expr = self._dump(int(expr * 1000), local_vars) + right
             else:
-                # Try interpreting it as an expression
-                expr = stmt
+                raise self.Exception('Unsupported object {obj}'.format(**locals()), expr=expr)
 
-        v = self.interpret_expression(expr, local_vars, allow_recursion)
-        return v, should_abort
-
-    def interpret_expression(self, expr, local_vars, allow_recursion):
-        expr = expr.strip()
-        if expr == '':  # Empty expression
-            return None
+        if expr.startswith('void '):
+            left = self.interpret_expression(expr[5:], local_vars, allow_recursion)
+            return None, should_return
 
         if expr.startswith('{'):
             inner, outer = self._separate_at_paren(expr, '}')
-            inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion - 1)
+            inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
             if not outer or should_abort:
-                return inner
+                return inner, should_abort or should_return
             else:
-                expr = json.dumps(inner) + outer
+                expr = self._dump(inner, local_vars) + outer
 
         if expr.startswith('('):
             inner, outer = self._separate_at_paren(expr, ')')
-            inner = self.interpret_expression(inner, local_vars, allow_recursion)
-            if not outer:
-                return inner
+            inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
+            if not outer or should_abort:
+                return inner, should_abort or should_return
             else:
-                expr = json.dumps(inner) + outer
+                expr = self._dump(inner, local_vars) + outer
 
         if expr.startswith('['):
             inner, outer = self._separate_at_paren(expr, ']')
@@ -185,57 +284,53 @@ class JSInterpreter(object):
                 for item in self._separate(inner)])
             expr = name + outer
 
-        m = re.match(r'try\s*', expr)
-        if m:
+        m = re.match(r'(?P<try>try|finally)\s*|(?:(?P<catch>catch)|(?P<for>for)|(?P<switch>switch))\s*\(', expr)
+        md = m.groupdict() if m else {}
+        if md.get('try'):
             if expr[m.end()] == '{':
                 try_expr, expr = self._separate_at_paren(expr[m.end():], '}')
             else:
                 try_expr, expr = expr[m.end() - 1:], ''
-            ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion - 1)
+            ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion)
             if should_abort:
-                return ret
-            return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
+                return ret, True
+            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
+            return ret, should_abort or should_return
 
-        m = re.match(r'(?:(?P<catch>catch)|(?P<for>for)|(?P<switch>switch))\s*\(', expr)
-        md = m.groupdict() if m else {}
-        if md.get('catch'):
+        elif md.get('catch'):
             # We ignore the catch block
             _, expr = self._separate_at_paren(expr, '}')
-            return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
+            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
+            return ret, should_abort or should_return
 
         elif md.get('for'):
-            def raise_constructor_error(c):
-                raise ExtractorError(
-                    'Premature return in the initialization of a for loop in {0!r}'.format(c))
-
             constructor, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
             if remaining.startswith('{'):
                 body, expr = self._separate_at_paren(remaining, '}')
             else:
-                m = re.match(r'switch\s*\(', remaining)  # FIXME
-                if m:
-                    switch_val, remaining = self._separate_at_paren(remaining[m.end() - 1:], ')')
+                switch_m = re.match(r'switch\s*\(', remaining)  # FIXME
+                if switch_m:
+                    switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:], ')')
                     body, expr = self._separate_at_paren(remaining, '}')
                     body = 'switch(%s){%s}' % (switch_val, body)
                 else:
                     body, expr = remaining, ''
             start, cndn, increment = self._separate(constructor, ';')
-            if self.interpret_statement(start, local_vars, allow_recursion - 1)[1]:
-                raise_constructor_error(constructor)
+            self.interpret_expression(start, local_vars, allow_recursion)
             while True:
-                if not self.interpret_expression(cndn, local_vars, allow_recursion):
+                if not _ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
                     break
                 try:
-                    ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion - 1)
+                    ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion)
                     if should_abort:
-                        return ret
+                        return ret, True
                 except JS_Break:
                     break
                 except JS_Continue:
                     pass
-                if self.interpret_statement(increment, local_vars, allow_recursion - 1)[1]:
-                    raise_constructor_error(constructor)
-            return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
+                self.interpret_expression(increment, local_vars, allow_recursion)
+            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
+            return ret, should_abort or should_return
 
         elif md.get('switch'):
             switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
@@ -245,7 +340,7 @@ class JSInterpreter(object):
             for default in (False, True):
                 matched = False
                 for item in items:
-                    case, stmt = [i.strip() for i in self._separate(item, ':', 1)]
+                    case, stmt = (i.strip() for i in self._separate(item, ':', 1))
                     if default:
                         matched = matched or case == 'default'
                     elif not matched:
@@ -254,24 +349,28 @@ class JSInterpreter(object):
                     if not matched:
                         continue
                     try:
-                        ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion - 1)
+                        ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion)
                         if should_abort:
                             return ret
                     except JS_Break:
                         break
                 if matched:
                     break
-            return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
+            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
+            return ret, should_abort or should_return
 
         # Comma separated statements
         sub_expressions = list(self._separate(expr))
-        expr = sub_expressions.pop().strip() if sub_expressions else ''
-        for sub_expr in sub_expressions:
-            self.interpret_expression(sub_expr, local_vars, allow_recursion)
+        if len(sub_expressions) > 1:
+            for sub_expr in sub_expressions:
+                ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
+                if should_abort:
+                    return ret, True
+            return ret, False
 
         for m in re.finditer(r'''(?x)
-                (?P<pre_sign>\+\+|--)(?P<var1>%(_NAME_RE)s)|
-                (?P<var2>%(_NAME_RE)s)(?P<post_sign>\+\+|--)''' % globals(), expr):
+                (?P<pre_sign>\+\+|--)(?P<var1>{_NAME_RE})|
+                (?P<var2>{_NAME_RE})(?P<post_sign>\+\+|--)'''.format(**globals()), expr):
             var = m.group('var1') or m.group('var2')
             start, end = m.span()
             sign = m.group('pre_sign') or m.group('post_sign')
@@ -279,85 +378,87 @@ class JSInterpreter(object):
             local_vars[var] += 1 if sign[0] == '+' else -1
             if m.group('pre_sign'):
                 ret = local_vars[var]
-            expr = expr[:start] + json.dumps(ret) + expr[end:]
+            expr = expr[:start] + self._dump(ret, local_vars) + expr[end:]
 
-        for op, opfunc in _ASSIGN_OPERATORS:
-            m = re.match(r'''(?x)
-                (?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
-                \s*%s
-                (?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
-            if not m:
-                continue
-            right_val = self.interpret_expression(m.group('expr'), local_vars, allow_recursion)
+        if not expr:
+            return None, should_return
 
-            if m.groupdict().get('index'):
-                lvar = local_vars[m.group('out')]
-                idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
-                if not isinstance(idx, int):
-                    raise ExtractorError('List indices must be integers: %s' % (idx, ))
-                cur = lvar[idx]
-                val = opfunc(cur, right_val)
-                lvar[idx] = val
-                return val
-            else:
-                cur = local_vars.get(m.group('out'))
-                val = opfunc(cur, right_val)
-                local_vars[m.group('out')] = val
-                return val
+        m = re.match(r'''(?x)
+            (?P<assign>
+                (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s*
+                (?P<op>{_OPERATOR_RE})?
+                =(?P<expr>.*)$
+            )|(?P<return>
+                (?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$
+            )|(?P<indexing>
+                (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
+            )|(?P<attribute>
+                (?P<var>{_NAME_RE})(?:\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
+            )|(?P<function>
+                (?P<fname>{_NAME_RE})\((?P<args>.*)\)$
+            )'''.format(**globals()), expr)
+        md = m.groupdict() if m else {}
+        if md.get('assign'):
+            left_val = local_vars.get(m.group('out'))
 
-        if expr.isdigit():
-            return int(expr)
+            if not m.group('index'):
+                local_vars[m.group('out')] = self._operator(
+                    m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
+                return local_vars[m.group('out')], should_return
+            elif left_val is None:
+                raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)
 
-        if expr == 'break':
+            idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
+            if not isinstance(idx, (int, float)):
+                raise self.Exception('List index %s must be integer' % (idx, ), expr=expr)
+            idx = int(idx)
+            left_val[idx] = self._operator(
+                m.group('op'), left_val[idx], m.group('expr'), expr, local_vars, allow_recursion)
+            return left_val[idx], should_return
+
+        elif expr.isdigit():
+            return int(expr), should_return
+
+        elif expr == 'break':
             raise JS_Break()
         elif expr == 'continue':
             raise JS_Continue()
 
-        var_m = re.match(
-            r'(?!if|return|true|false|null)(?P<name>%s)$' % _NAME_RE,
-            expr)
-        if var_m:
-            return local_vars[var_m.group('name')]
+        elif md.get('return'):
+            return local_vars[m.group('name')], should_return
 
         try:
-            return json.loads(expr)
+            ret = json.loads(js_to_json(expr))  # strict=True)
+            if not md.get('attribute'):
+                return ret, should_return
         except ValueError:
             pass
 
-        m = re.match(
-            r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
-        if m:
+        if md.get('indexing'):
             val = local_vars[m.group('in')]
             idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion)
-            return val[idx]
+            return self._index(val, idx), should_return
 
-        def raise_expr_error(where, op, exp):
-            raise ExtractorError('Premature {0} return of {1} in {2!r}'.format(where, op, exp))
-
-        for op, opfunc in _OPERATORS:
-            separated = list(self._separate(expr, op))
+        for op, _ in self._all_operators():
+            # hackety: </> have higher priority than <</>>, but don't confuse them
+            skip_delim = (op + op) if op in ('<', '>') else None
+            separated = list(self._separate(expr, op, skip_delims=skip_delim))
             if len(separated) < 2:
                 continue
-            right_val = separated.pop()
-            left_val = op.join(separated)
-            left_val, should_abort = self.interpret_statement(
-                left_val, local_vars, allow_recursion - 1)
-            if should_abort:
-                raise_expr_error('left-side', op, expr)
-            right_val, should_abort = self.interpret_statement(
-                right_val, local_vars, allow_recursion - 1)
-            if should_abort:
-                raise_expr_error('right-side', op, expr)
-            return opfunc(left_val or 0, right_val)
 
-        m = re.match(
-            r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*' % _NAME_RE,
-            expr)
-        if m:
+            right_expr = separated.pop()
+            while op == '-' and len(separated) > 1 and not separated[-1].strip():
+                right_expr = '-' + right_expr
+                separated.pop()
+            left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
+            return self._operator(op, 0 if left_val is None else left_val,
+                                  right_expr, expr, local_vars, allow_recursion), should_return
+
+        if md.get('attribute'):
             variable = m.group('var')
-            nl = Nonlocal()
-
-            nl.member = remove_quotes(m.group('member') or m.group('member2'))
+            member = m.group('member')
+            if not member:
+                member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion)
             arg_str = expr[m.end():]
             if arg_str.startswith('('):
                 arg_str, remaining = self._separate_at_paren(arg_str, ')')
@@ -367,25 +468,24 @@ class JSInterpreter(object):
             def assertion(cndn, msg):
                 """ assert, but without risk of getting optimized out """
                 if not cndn:
-                    raise ExtractorError('{0} {1}: {2}'.format(nl.member, msg, expr))
+                    raise ExtractorError('{member} {msg}'.format(**locals()), expr=expr)
 
             def eval_method():
-                # nonlocal member
-                member = nl.member
-                if variable == 'String':
-                    obj = compat_str
-                elif variable in local_vars:
-                    obj = local_vars[variable]
-                else:
+                if (variable, member) == ('console', 'debug'):
+                    return
+                types = {
+                    'String': compat_str,
+                    'Math': float,
+                }
+                obj = local_vars.get(variable, types.get(variable, NO_DEFAULT))
+                if obj is NO_DEFAULT:
                     if variable not in self._objects:
                         self._objects[variable] = self.extract_object(variable)
                     obj = self._objects[variable]
 
+                # Member access
                 if arg_str is None:
-                    # Member access
-                    if member == 'length':
-                        return len(obj)
-                    return obj[member]
+                    return self._index(obj, member)
 
                 # Function call
                 argvals = [
@@ -396,12 +496,17 @@ class JSInterpreter(object):
                     if member == 'fromCharCode':
                         assertion(argvals, 'takes one or more arguments')
                         return ''.join(map(chr, argvals))
-                    raise ExtractorError('Unsupported string method %s' % (member, ))
+                    raise self.Exception('Unsupported string method ' + member, expr=expr)
+                elif obj == float:
+                    if member == 'pow':
+                        assertion(len(argvals) == 2, 'takes two arguments')
+                        return argvals[0] ** argvals[1]
+                    raise self.Exception('Unsupported Math method ' + member, expr=expr)
 
                 if member == 'split':
                     assertion(argvals, 'takes one or more arguments')
-                    assertion(argvals == [''], 'with arguments is not implemented')
-                    return list(obj)
+                    assertion(len(argvals) == 1, 'with limit argument is not implemented')
+                    return obj.split(argvals[0]) if argvals[0] else list(obj)
                 elif member == 'join':
                     assertion(isinstance(obj, list), 'must be applied on a list')
                     assertion(len(argvals) == 1, 'takes exactly one argument')
@@ -447,7 +552,7 @@ class JSInterpreter(object):
                     assertion(argvals, 'takes one or more arguments')
                     assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
                     f, this = (argvals + [''])[:2]
-                    return [f((item, idx, obj), this=this) for idx, item in enumerate(obj)]
+                    return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)]
                 elif member == 'indexOf':
                     assertion(argvals, 'takes one or more arguments')
                     assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
@@ -457,32 +562,35 @@ class JSInterpreter(object):
                     except ValueError:
                         return -1
 
-                if isinstance(obj, list):
-                    member = int(member)
-                    nl.member = member
-                return obj[member](argvals)
+                idx = int(member) if isinstance(obj, list) else member
+                return obj[idx](argvals, allow_recursion=allow_recursion)
 
             if remaining:
-                return self.interpret_expression(
+                ret, should_abort = self.interpret_statement(
                     self._named_object(local_vars, eval_method()) + remaining,
                     local_vars, allow_recursion)
+                return ret, should_return or should_abort
             else:
-                return eval_method()
+                return eval_method(), should_return
 
-        m = re.match(r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
-        if m:
-            fname = m.group('func')
-            argvals = tuple([
-                int(v) if v.isdigit() else local_vars[v]
-                for v in self._separate(m.group('args'))])
+        elif md.get('function'):
+            fname = m.group('fname')
+            argvals = [self.interpret_expression(v, local_vars, allow_recursion)
+                       for v in self._separate(m.group('args'))]
             if fname in local_vars:
-                return local_vars[fname](argvals)
+                return local_vars[fname](argvals, allow_recursion=allow_recursion), should_return
             elif fname not in self._functions:
                 self._functions[fname] = self.extract_function(fname)
-            return self._functions[fname](argvals)
+            return self._functions[fname](argvals, allow_recursion=allow_recursion), should_return
 
-        if expr:
-            raise ExtractorError('Unsupported JS expression %r' % expr)
+        raise self.Exception(
+            'Unsupported JS expression ' + (expr[:40] if expr != stmt else ''), expr=stmt)
+
+    def interpret_expression(self, expr, local_vars, allow_recursion):
+        ret, should_return = self.interpret_statement(expr, local_vars, allow_recursion)
+        if should_return:
+            raise self.Exception('Cannot return from an expression', expr)
+        return ret
 
     def extract_object(self, objname):
         _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
@@ -494,15 +602,17 @@ class JSInterpreter(object):
                 }\s*;
             ''' % (re.escape(objname), _FUNC_NAME_RE),
             self.code)
+        if not obj_m:
+            raise self.Exception('Could not find object ' + objname)
         fields = obj_m.group('fields')
         # Currently, it only supports function definitions
         fields_m = re.finditer(
             r'''(?x)
-                (?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}
-            ''' % _FUNC_NAME_RE,
+                (?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
+            ''' % (_FUNC_NAME_RE, _NAME_RE),
             fields)
         for f in fields_m:
-            argnames = f.group('args').split(',')
+            argnames = self.build_arglist(f.group('args'))
             obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code'))
 
         return obj
@@ -510,15 +620,19 @@ class JSInterpreter(object):
     def extract_function_code(self, funcname):
         """ @returns argnames, code """
         func_m = re.search(
-            r'''(?x)
-                (?:function\s+%(f_n)s|[{;,]\s*%(f_n)s\s*=\s*function|var\s+%(f_n)s\s*=\s*function)\s*
+            r'''(?xs)
+                (?:
+                    function\s+%(name)s|
+                    [{;,]\s*%(name)s\s*=\s*function|
+                    (?:var|const|let)\s+%(name)s\s*=\s*function
+                )\s*
                 \((?P<args>[^)]*)\)\s*
-                (?P<code>\{(?:(?!};)[^"]|"([^"]|\\")*")+\})''' % {'f_n': re.escape(funcname), },
+                (?P<code>{.+})''' % {'name': re.escape(funcname)},
             self.code)
         code, _ = self._separate_at_paren(func_m.group('code'), '}')  # refine the match
         if func_m is None:
-            raise ExtractorError('Could not find JS function %r' % funcname)
-        return func_m.group('args').split(','), code
+            raise self.Exception('Could not find JS function "{funcname}"'.format(**locals()))
+        return self.build_arglist(func_m.group('args')), code
 
     def extract_function(self, funcname):
         return self.extract_function_from_code(*self.extract_function_code(funcname))
@@ -534,7 +648,7 @@ class JSInterpreter(object):
             name = self._named_object(
                 local_vars,
                 self.extract_function_from_code(
-                    [x.strip() for x in mobj.group('args').split(',')],
+                    self.build_arglist(mobj.group('args')),
                     body, local_vars, *global_stack))
             code = code[:start] + name + remaining
         return self.build_function(argnames, code, local_vars, *global_stack)
@@ -542,17 +656,22 @@ class JSInterpreter(object):
     def call_function(self, funcname, *args):
         return self.extract_function(funcname)(args)
 
+    @classmethod
+    def build_arglist(cls, arg_text):
+        if not arg_text:
+            return []
+        return list(filter(None, (x.strip() or None for x in cls._separate(arg_text))))
+
     def build_function(self, argnames, code, *global_stack):
         global_stack = list(global_stack) or [{}]
-        local_vars = global_stack.pop(0)
+        argnames = tuple(argnames)
 
-        def resf(args, **kwargs):
-            local_vars.update(dict(zip(argnames, args)))
-            local_vars.update(kwargs)
-            var_stack = LocalNameSpace(local_vars, *global_stack)
-            for stmt in self._separate(code.replace('\n', ''), ';'):
-                ret, should_abort = self.interpret_statement(stmt, var_stack)
-                if should_abort:
-                    break
-            return ret
+        def resf(args, kwargs={}, allow_recursion=100):
+            global_stack[0].update(
+                zip_longest(argnames, args, fillvalue=None))
+            global_stack[0].update(kwargs)
+            var_stack = LocalNameSpace(*global_stack)
+            ret, should_abort = self.interpret_statement(code.replace('\n', ''), var_stack, allow_recursion - 1)
+            if should_abort:
+                return ret
         return resf
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 4e00317f1..a5f584ec5 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1696,6 +1696,17 @@ MONTH_NAMES = {
         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 }
 
+# Timezone names for RFC2822 obs-zone
+# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
+TIMEZONE_NAMES = {
+    'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
+    'AST': -4, 'ADT': -3,  # Atlantic (used in Canada)
+    'EST': -5, 'EDT': -4,  # Eastern
+    'CST': -6, 'CDT': -5,  # Central
+    'MST': -7, 'MDT': -6,  # Mountain
+    'PST': -8, 'PDT': -7   # Pacific
+}
+
 KNOWN_EXTENSIONS = (
     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
     'flv', 'f4v', 'f4a', 'f4b',
@@ -1735,12 +1746,17 @@ DATE_FORMATS = (
     '%b %dth %Y %I:%M',
     '%Y %m %d',
     '%Y-%m-%d',
+    '%Y.%m.%d.',
     '%Y/%m/%d',
     '%Y/%m/%d %H:%M',
     '%Y/%m/%d %H:%M:%S',
+    '%Y%m%d%H%M',
+    '%Y%m%d%H%M%S',
+    '%Y%m%d',
     '%Y-%m-%d %H:%M',
     '%Y-%m-%d %H:%M:%S',
     '%Y-%m-%d %H:%M:%S.%f',
+    '%Y-%m-%d %H:%M:%S:%f',
     '%d.%m.%Y %H:%M',
     '%d.%m.%Y %H.%M',
     '%Y-%m-%dT%H:%M:%SZ',
@@ -1753,6 +1769,7 @@ DATE_FORMATS = (
     '%b %d %Y at %H:%M:%S',
     '%B %d %Y at %H:%M',
     '%B %d %Y at %H:%M:%S',
+    '%H:%M %d-%b-%Y',
 )
 
 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
@@ -1763,6 +1780,7 @@ DATE_FORMATS_DAY_FIRST.extend([
     '%d/%m/%Y',
     '%d/%m/%y',
     '%d/%m/%Y %H:%M:%S',
+    '%d-%m-%Y %H:%M',
 ])
 
 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
@@ -2966,10 +2984,22 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
 
 def extract_timezone(date_str):
     m = re.search(
-        r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
-        date_str)
+        r'''(?x)
+            ^.{8,}?                                              # >=8 char non-TZ prefix, if present
+            (?P<tz>Z|                                            # just the UTC Z, or
+                (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
+                   (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
+                   [ ]?                                          # optional space
+                (?P<sign>\+|-)                                   # +/-
+                (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
+            $)
+        ''', date_str)
     if not m:
-        timezone = datetime.timedelta()
+        m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
+        timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
+        if timezone is not None:
+            date_str = date_str[:-len(m.group('tz'))]
+        timezone = datetime.timedelta(hours=timezone or 0)
     else:
         date_str = date_str[:-len(m.group('tz'))]
         if not m.group('sign'):
@@ -3037,7 +3067,8 @@ def unified_timestamp(date_str, day_first=True):
     if date_str is None:
         return None
 
-    date_str = re.sub(r'[,|]', '', date_str)
+    date_str = re.sub(r'\s+', ' ', re.sub(
+        r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
 
     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
     timezone, date_str = extract_timezone(date_str)
@@ -3063,7 +3094,7 @@ def unified_timestamp(date_str, day_first=True):
             pass
     timetuple = email.utils.parsedate_tz(date_str)
     if timetuple:
-        return calendar.timegm(timetuple) + pm_delta * 3600
+        return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
 
 
 def determine_ext(url, default_ext='unknown_video'):
@@ -3673,13 +3704,11 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
     if get_attr:
         if v is not None:
             v = getattr(v, get_attr, None)
-    if v == '':
-        v = None
-    if v is None:
+    if v in (None, ''):
         return default
     try:
         return int(v) * invscale // scale
-    except (ValueError, TypeError):
+    except (ValueError, TypeError, OverflowError):
         return default
 
 

From e52e8b8111cf7ca27daef184bacd926865e951b1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 15 Aug 2022 16:45:04 +0100
Subject: [PATCH 103/312] [postprocessor] Don't replace existing value with
 null metadata parsed from title

---
 youtube_dl/postprocessor/metadatafromtitle.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py
index f5c14d974..6cd5bb70f 100644
--- a/youtube_dl/postprocessor/metadatafromtitle.py
+++ b/youtube_dl/postprocessor/metadatafromtitle.py
@@ -40,6 +40,8 @@ class MetadataFromTitlePP(PostProcessor):
                 % self._titleformat)
             return [], info
         for attribute, value in match.groupdict().items():
+            if value is None:
+                continue
             info[attribute] = value
             self._downloader.to_screen(
                 '[fromtitle] parsed %s: %s'

From b0a60ce2032172aeaaf27fe3866ab72768f10cb2 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 17 Aug 2022 14:22:02 +0100
Subject: [PATCH 104/312] [jsinterp] Improve JS language support (#31175)

* operator ??
* operator ?.
* operator **
* accurate operator functions
* `undefined` handling
* object literals {a: 1, "b": expr}
* more tests for weird JS comparisons: see https://github.com/ytdl-org/youtube-dl/issues/31173#issuecomment-1217854397.
---
 test/test_jsinterp.py          | 114 ++++++++++++++++++++
 test/test_youtube_signature.py |   4 +
 youtube_dl/jsinterp.py         | 189 ++++++++++++++++++++++++++-------
 3 files changed, 267 insertions(+), 40 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index c6c931743..328941e09 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -8,7 +8,10 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+import math
+
 from youtube_dl.jsinterp import JSInterpreter
+undefined = JSInterpreter.undefined
 
 
 class TestJSInterpreter(unittest.TestCase):
@@ -48,6 +51,9 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function f(){return 1 << 5;}')
         self.assertEqual(jsi.call_function('f'), 32)
 
+        jsi = JSInterpreter('function f(){return 2 ** 5}')
+        self.assertEqual(jsi.call_function('f'), 32)
+
         jsi = JSInterpreter('function f(){return 19 & 21;}')
         self.assertEqual(jsi.call_function('f'), 17)
 
@@ -57,6 +63,15 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function f(){return []? 2+3: 4;}')
         self.assertEqual(jsi.call_function('f'), 5)
 
+        jsi = JSInterpreter('function f(){return 1 == 2}')
+        self.assertEqual(jsi.call_function('f'), False)
+
+        jsi = JSInterpreter('function f(){return 0 && 1 || 2;}')
+        self.assertEqual(jsi.call_function('f'), 2)
+
+        jsi = JSInterpreter('function f(){return 0 ?? 42;}')
+        self.assertEqual(jsi.call_function('f'), 0)
+
     def test_array_access(self):
         jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
         self.assertEqual(jsi.call_function('f'), [5, 2, 7])
@@ -203,6 +218,11 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x'), 7)
 
+        jsi = JSInterpreter('''
+        function x() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 5)
+
     def test_void(self):
         jsi = JSInterpreter('''
         function x() { return void 42; }
@@ -215,6 +235,100 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x')([]), 1)
 
+    def test_null(self):
+        jsi = JSInterpreter('''
+        function x() { return null; }
+        ''')
+        self.assertIs(jsi.call_function('x'), None)
+
+        jsi = JSInterpreter('''
+        function x() { return [null > 0, null < 0, null == 0, null === 0]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [False, False, False, False])
+
+        jsi = JSInterpreter('''
+        function x() { return [null >= 0, null <= 0]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [True, True])
+
+    def test_undefined(self):
+        jsi = JSInterpreter('''
+        function x() { return undefined === undefined; }
+        ''')
+        self.assertTrue(jsi.call_function('x'))
+
+        jsi = JSInterpreter('''
+        function x() { return undefined; }
+        ''')
+        self.assertIs(jsi.call_function('x'), undefined)
+
+        jsi = JSInterpreter('''
+        function x() { let v; return v; }
+        ''')
+        self.assertIs(jsi.call_function('x'), undefined)
+
+        jsi = JSInterpreter('''
+        function x() { return [undefined === undefined, undefined == undefined, undefined < undefined, undefined > undefined]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [True, True, False, False])
+
+        jsi = JSInterpreter('''
+        function x() { return [undefined === 0, undefined == 0, undefined < 0, undefined > 0]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [False, False, False, False])
+
+        jsi = JSInterpreter('''
+        function x() { return [undefined >= 0, undefined <= 0]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [False, False])
+
+        jsi = JSInterpreter('''
+        function x() { return [undefined > null, undefined < null, undefined == null, undefined === null]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [False, False, True, False])
+
+        jsi = JSInterpreter('''
+        function x() { return [undefined === null, undefined == null, undefined < null, undefined > null]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [False, True, False, False])
+
+        jsi = JSInterpreter('''
+        function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
+        ''')
+        for y in jsi.call_function('x'):
+            self.assertTrue(math.isnan(y))
+
+        jsi = JSInterpreter('''
+        function x() { let v; return v**0; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 1)
+
+        jsi = JSInterpreter('''
+        function x() { let v; return [v>42, v<=42, v&&42, 42&&v]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [False, False, undefined, undefined])
+
+        jsi = JSInterpreter('function x(){return undefined ?? 42; }')
+        self.assertEqual(jsi.call_function('x'), 42)
+
+    def test_object(self):
+        jsi = JSInterpreter('''
+        function x() { return {}; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), {})
+        jsi = JSInterpreter('''
+        function x() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), [42, 0])
+        jsi = JSInterpreter('''
+        function x() { let a; return a?.qq; }
+        ''')
+        self.assertIs(jsi.call_function('x'), undefined)
+        jsi = JSInterpreter('''
+        function x() { let a = {m1: 42, m2: 0 }; return a?.qq; }
+        ''')
+        self.assertIs(jsi.call_function('x'), undefined)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 6e955e0f0..4d756dad3 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -102,6 +102,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js',
         'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw',
     ),
+    (
+        'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js',
+        'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index c60a9b3c2..8e119d08a 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -7,7 +7,6 @@ import operator
 import re
 
 from .utils import (
-    NO_DEFAULT,
     ExtractorError,
     js_to_json,
     remove_quotes,
@@ -21,6 +20,70 @@ from .compat import (
 
 _NAME_RE = r'[a-zA-Z_$][\w$]*'
 
+_UNDEFINED = object()
+
+
+def _js_bit_op(op):
+
+    def wrapped(a, b):
+        def zeroise(x):
+            return 0 if x in (None, _UNDEFINED) else x
+        return op(zeroise(a), zeroise(b))
+
+    return wrapped
+
+
+def _js_arith_op(op):
+
+    def wrapped(a, b):
+        if _UNDEFINED in (a, b):
+            return float('nan')
+        return op(a or 0, b or 0)
+
+    return wrapped
+
+
+def _js_div(a, b):
+    if _UNDEFINED in (a, b) or not (a and b):
+        return float('nan')
+    return float('inf') if not b else operator.truediv(a or 0, b)
+
+
+def _js_mod(a, b):
+    if _UNDEFINED in (a, b) or not b:
+        return float('nan')
+    return (a or 0) % b
+
+
+def _js_exp(a, b):
+    if not b:
+        # even 0 ** 0 !!
+        return 1
+    if _UNDEFINED in (a, b):
+        return float('nan')
+    return (a or 0) ** b
+
+
+def _js_eq_op(op):
+
+    def wrapped(a, b):
+        if set((a, b)) <= set((None, _UNDEFINED)):
+            return op(a, a)
+        return op(a, b)
+
+    return wrapped
+
+
+def _js_comp_op(op):
+
+    def wrapped(a, b):
+        if _UNDEFINED in (a, b):
+            return False
+        return op(a or 0, b or 0)
+
+    return wrapped
+
+
 # (op, definition) in order of binding priority, tightest first
 # avoid dict to maintain order
 # definition None => Defined in JSInterpreter._operator
@@ -30,40 +93,38 @@ _DOT_OPERATORS = (
 )
 
 _OPERATORS = (
-    ('|', operator.or_),
-    ('^', operator.xor),
-    ('&', operator.and_),
-    ('>>', operator.rshift),
-    ('<<', operator.lshift),
-    ('+', operator.add),
-    ('-', operator.sub),
-    ('*', operator.mul),
-    ('/', operator.truediv),
-    ('%', operator.mod),
+    ('>>', _js_bit_op(operator.rshift)),
+    ('<<', _js_bit_op(operator.lshift)),
+    ('+', _js_arith_op(operator.add)),
+    ('-', _js_arith_op(operator.sub)),
+    ('*', _js_arith_op(operator.mul)),
+    ('/', _js_div),
+    ('%', _js_mod),
+    ('**', _js_exp),
 )
 
 _COMP_OPERATORS = (
     ('===', operator.is_),
-    ('==', operator.eq),
+    ('==', _js_eq_op(operator.eq)),
     ('!==', operator.is_not),
-    ('!=', operator.ne),
-    ('<=', operator.le),
-    ('>=', operator.ge),
-    ('<', operator.lt),
-    ('>', operator.gt),
+    ('!=', _js_eq_op(operator.ne)),
+    ('<=', _js_comp_op(operator.le)),
+    ('>=', _js_comp_op(operator.ge)),
+    ('<', _js_comp_op(operator.lt)),
+    ('>', _js_comp_op(operator.gt)),
 )
 
 _LOG_OPERATORS = (
-    ('&', operator.and_),
-    ('|', operator.or_),
-    ('^', operator.xor),
+    ('|', _js_bit_op(operator.or_)),
+    ('^', _js_bit_op(operator.xor)),
+    ('&', _js_bit_op(operator.and_)),
 )
 
 _SC_OPERATORS = (
     ('?', None),
+    ('??', None),
     ('||', None),
     ('&&', None),
-    # TODO: ('??', None),
 )
 
 _OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS))
@@ -74,7 +135,7 @@ _QUOTES = '\'"'
 
 def _ternary(cndn, if_true=True, if_false=False):
     """Simulate JS's ternary operator (cndn?if_true:if_false)"""
-    if cndn in (False, None, 0, ''):
+    if cndn in (False, None, 0, '', _UNDEFINED):
         return if_false
     try:
         if math.isnan(cndn):  # NB: NaN cannot be checked by membership
@@ -95,6 +156,12 @@ class JS_Continue(ExtractorError):
 
 
 class LocalNameSpace(ChainMap):
+    def __getitem__(self, key):
+        try:
+            return super(LocalNameSpace, self).__getitem__(key)
+        except KeyError:
+            return _UNDEFINED
+
     def __setitem__(self, key, value):
         for scope in self.maps:
             if key in scope:
@@ -105,6 +172,13 @@ class LocalNameSpace(ChainMap):
     def __delitem__(self, key):
         raise NotImplementedError('Deleting is not supported')
 
+    def __contains__(self, key):
+        try:
+            super(LocalNameSpace, self).__getitem__(key)
+            return True
+        except KeyError:
+            return False
+
     def __repr__(self):
         return 'LocalNameSpace%s' % (self.maps, )
 
@@ -112,6 +186,8 @@ class LocalNameSpace(ChainMap):
 class JSInterpreter(object):
     __named_object_counter = 0
 
+    undefined = _UNDEFINED
+
     def __init__(self, code, objects=None):
         self.code, self._functions = code, {}
         self._objects = {} if objects is None else objects
@@ -185,12 +261,16 @@ class JSInterpreter(object):
     @staticmethod
     def _all_operators():
         return itertools.chain(
+            # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
             _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS)
 
     def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
         if op in ('||', '&&'):
             if (op == '&&') ^ _ternary(left_val):
                 return left_val  # short circuiting
+        elif op == '??':
+            if left_val not in (None, self.undefined):
+                return left_val
         elif op == '?':
             right_expr = _ternary(left_val, *self._separate(right_expr, ':', 1))
 
@@ -204,12 +284,14 @@ class JSInterpreter(object):
         except Exception as e:
             raise self.Exception('Failed to evaluate {left_val!r} {op} {right_val!r}'.format(**locals()), expr, cause=e)
 
-    def _index(self, obj, idx):
+    def _index(self, obj, idx, allow_undefined=False):
         if idx == 'length':
             return len(obj)
         try:
             return obj[int(idx)] if isinstance(obj, list) else obj[idx]
         except Exception as e:
+            if allow_undefined:
+                return self.undefined
             raise self.Exception('Cannot get index {idx}'.format(**locals()), expr=repr(obj), cause=e)
 
     def _dump(self, obj, namespace):
@@ -249,8 +331,8 @@ class JSInterpreter(object):
             obj = expr[4:]
             if obj.startswith('Date('):
                 left, right = self._separate_at_paren(obj[4:], ')')
-                left = self.interpret_expression(left, local_vars, allow_recursion)
-                expr = unified_timestamp(left, False)
+                expr = unified_timestamp(
+                    self.interpret_expression(left, local_vars, allow_recursion), False)
                 if not expr:
                     raise self.Exception('Failed to parse date {left!r}'.format(**locals()), expr=expr)
                 expr = self._dump(int(expr * 1000), local_vars) + right
@@ -263,6 +345,14 @@ class JSInterpreter(object):
 
         if expr.startswith('{'):
             inner, outer = self._separate_at_paren(expr, '}')
+            # try for object expression
+            sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)]
+            if all(len(sub_expr) == 2 for sub_expr in sub_expressions):
+                return dict(
+                    (key_expr if re.match(_NAME_RE, key_expr) else key_expr,
+                     self.interpret_expression(val_expr, local_vars, allow_recursion))
+                    for key_expr, val_expr in sub_expressions), should_return
+            # or statement list
             inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
             if not outer or should_abort:
                 return inner, should_abort or should_return
@@ -387,13 +477,13 @@ class JSInterpreter(object):
             (?P<assign>
                 (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s*
                 (?P<op>{_OPERATOR_RE})?
-                =(?P<expr>.*)$
+                =(?!=)(?P<expr>.*)$
             )|(?P<return>
                 (?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$
             )|(?P<indexing>
                 (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
             )|(?P<attribute>
-                (?P<var>{_NAME_RE})(?:\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
+                (?P<var>{_NAME_RE})(?:(?P<nullish>\?)?\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
             )|(?P<function>
                 (?P<fname>{_NAME_RE})\((?P<args>.*)\)$
             )'''.format(**globals()), expr)
@@ -405,7 +495,7 @@ class JSInterpreter(object):
                 local_vars[m.group('out')] = self._operator(
                     m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
                 return local_vars[m.group('out')], should_return
-            elif left_val is None:
+            elif left_val in (None, self.undefined):
                 raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)
 
             idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
@@ -424,6 +514,9 @@ class JSInterpreter(object):
         elif expr == 'continue':
             raise JS_Continue()
 
+        elif expr == 'undefined':
+            return self.undefined, should_return
+
         elif md.get('return'):
             return local_vars[m.group('name')], should_return
 
@@ -441,7 +534,9 @@ class JSInterpreter(object):
 
         for op, _ in self._all_operators():
             # hackety: </> have higher priority than <</>>, but don't confuse them
-            skip_delim = (op + op) if op in ('<', '>') else None
+            skip_delim = (op + op) if op in '<>*?' else None
+            if op == '?':
+                skip_delim = (skip_delim, '?.')
             separated = list(self._separate(expr, op, skip_delims=skip_delim))
             if len(separated) < 2:
                 continue
@@ -451,12 +546,10 @@ class JSInterpreter(object):
                 right_expr = '-' + right_expr
                 separated.pop()
             left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
-            return self._operator(op, 0 if left_val is None else left_val,
-                                  right_expr, expr, local_vars, allow_recursion), should_return
+            return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return
 
         if md.get('attribute'):
-            variable = m.group('var')
-            member = m.group('member')
+            variable, member, nullish = m.group('var', 'member', 'nullish')
             if not member:
                 member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion)
             arg_str = expr[m.end():]
@@ -477,15 +570,24 @@ class JSInterpreter(object):
                     'String': compat_str,
                     'Math': float,
                 }
-                obj = local_vars.get(variable, types.get(variable, NO_DEFAULT))
-                if obj is NO_DEFAULT:
-                    if variable not in self._objects:
-                        self._objects[variable] = self.extract_object(variable)
-                    obj = self._objects[variable]
+                obj = local_vars.get(variable)
+                if obj in (self.undefined, None):
+                    obj = types.get(variable, self.undefined)
+                if obj is self.undefined:
+                    try:
+                        if variable not in self._objects:
+                            self._objects[variable] = self.extract_object(variable)
+                        obj = self._objects[variable]
+                    except self.Exception:
+                        if not nullish:
+                            raise
+
+                if nullish and obj is self.undefined:
+                    return self.undefined
 
                 # Member access
                 if arg_str is None:
-                    return self._index(obj, member)
+                    return self._index(obj, member, nullish)
 
                 # Function call
                 argvals = [
@@ -660,7 +762,14 @@ class JSInterpreter(object):
     def build_arglist(cls, arg_text):
         if not arg_text:
             return []
-        return list(filter(None, (x.strip() or None for x in cls._separate(arg_text))))
+
+        def valid_arg(y):
+            y = y.strip()
+            if not y:
+                raise cls.Exception('Missing arg in "%s"' % (arg_text, ))
+            return y
+
+        return [valid_arg(x) for x in cls._separate(arg_text)]
 
     def build_function(self, argnames, code, *global_stack):
         global_stack = list(global_stack) or [{}]

From 538ec65ba7634bb9ad9f8eb4ce72713c673969dc Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 19 Aug 2022 11:45:04 +0100
Subject: [PATCH 105/312] [jsinterp] Handle regexp literals and throw/catch
 execution (#31182)

* based on https://github.com/yt-dlp/yt-dlp/commit/f6ca640b122239d5ab215f8c2564efb7ac3e8c65, thanks pukkandan
* adds parse support for regexp flags
---
 test/test_jsinterp.py          |  21 +++++
 test/test_youtube_signature.py |   4 +
 youtube_dl/jsinterp.py         | 136 +++++++++++++++++++++++++++------
 3 files changed, 139 insertions(+), 22 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 328941e09..faddf00d5 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -9,6 +9,7 @@ import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 import math
+import re
 
 from youtube_dl.jsinterp import JSInterpreter
 undefined = JSInterpreter.undefined
@@ -316,19 +317,39 @@ class TestJSInterpreter(unittest.TestCase):
         function x() { return {}; }
         ''')
         self.assertEqual(jsi.call_function('x'), {})
+
         jsi = JSInterpreter('''
         function x() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }
         ''')
         self.assertEqual(jsi.call_function('x'), [42, 0])
+
         jsi = JSInterpreter('''
         function x() { let a; return a?.qq; }
         ''')
         self.assertIs(jsi.call_function('x'), undefined)
+
         jsi = JSInterpreter('''
         function x() { let a = {m1: 42, m2: 0 }; return a?.qq; }
         ''')
         self.assertIs(jsi.call_function('x'), undefined)
 
+    def test_regex(self):
+        jsi = JSInterpreter('''
+        function x() { let a=/,,[/,913,/](,)}/; }
+        ''')
+        self.assertIs(jsi.call_function('x'), None)
+
+        jsi = JSInterpreter('''
+        function x() { let a=/,,[/,913,/](,)}/; return a; }
+        ''')
+        # Pythons disagree on the type of a pattern
+        self.assertTrue(isinstance(jsi.call_function('x'), type(re.compile(''))))
+
+        jsi = JSInterpreter('''
+        function x() { let a=/,,[/,913,/](,)}/i; return a; }
+        ''')
+        self.assertEqual(jsi.call_function('x').flags & re.I, re.I)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 4d756dad3..43e22388d 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -106,6 +106,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js',
         'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg',
     ),
+    (
+        'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
+        'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 8e119d08a..48c27a1c0 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -7,6 +7,7 @@ import operator
 import re
 
 from .utils import (
+    error_to_compat_str,
     ExtractorError,
     js_to_json,
     remove_quotes,
@@ -130,7 +131,7 @@ _SC_OPERATORS = (
 _OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS))
 
 _MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
-_QUOTES = '\'"'
+_QUOTES = '\'"/'
 
 
 def _ternary(cndn, if_true=True, if_false=False):
@@ -155,6 +156,12 @@ class JS_Continue(ExtractorError):
         ExtractorError.__init__(self, 'Invalid continue')
 
 
+class JS_Throw(ExtractorError):
+    def __init__(self, e):
+        self.error = e
+        ExtractorError.__init__(self, 'Uncaught exception ' + error_to_compat_str(e))
+
+
 class LocalNameSpace(ChainMap):
     def __getitem__(self, key):
         try:
@@ -172,6 +179,17 @@ class LocalNameSpace(ChainMap):
     def __delitem__(self, key):
         raise NotImplementedError('Deleting is not supported')
 
+    # except
+    def pop(self, key, *args):
+        try:
+            off = self.__getitem__(key)
+            super(LocalNameSpace, self).__delitem__(key)
+            return off
+        except KeyError:
+            if len(args) > 0:
+                return args[0]
+            raise
+
     def __contains__(self, key):
         try:
             super(LocalNameSpace, self).__getitem__(key)
@@ -188,9 +206,29 @@ class JSInterpreter(object):
 
     undefined = _UNDEFINED
 
+    RE_FLAGS = {
+        # special knowledge: Python's re flags are bitmask values, current max 128
+        # invent new bitmask values well above that for literal parsing
+        # TODO: new pattern class to execute matches with these flags
+        'd': 1024,  # Generate indices for substring matches
+        'g': 2048,  # Global search
+        'i': re.I,  # Case-insensitive search
+        'm': re.M,  # Multi-line search
+        's': re.S,  # Allows . to match newline characters
+        'u': re.U,  # Treat a pattern as a sequence of unicode code points
+        'y': 4096,  # Perform a "sticky" search that matches starting at the current position in the target string
+    }
+
+    _EXC_NAME = '__youtube_dl_exception__'
+    _OBJ_NAME = '__youtube_dl_jsinterp_obj'
+
+    OP_CHARS = None
+
     def __init__(self, code, objects=None):
         self.code, self._functions = code, {}
         self._objects = {} if objects is None else objects
+        if type(self).OP_CHARS is None:
+            type(self).OP_CHARS = self.OP_CHARS = self.__op_chars()
 
     class Exception(ExtractorError):
         def __init__(self, msg, *args, **kwargs):
@@ -199,32 +237,64 @@ class JSInterpreter(object):
                 msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
             super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
 
+    @classmethod
+    def __op_chars(cls):
+        op_chars = set(';,')
+        for op in cls._all_operators():
+            for c in op[0]:
+                op_chars.add(c)
+        return op_chars
+
     def _named_object(self, namespace, obj):
         self.__named_object_counter += 1
-        name = '__youtube_dl_jsinterp_obj%d' % (self.__named_object_counter, )
+        name = '%s%d' % (self._OBJ_NAME, self.__named_object_counter)
         namespace[name] = obj
         return name
 
-    @staticmethod
-    def _separate(expr, delim=',', max_split=None, skip_delims=None):
+    @classmethod
+    def _regex_flags(cls, expr):
+        flags = 0
+        if not expr:
+            return flags, expr
+        for idx, ch in enumerate(expr):
+            if ch not in cls.RE_FLAGS:
+                break
+            flags |= cls.RE_FLAGS[ch]
+        return flags, expr[idx:] if idx > 0 else expr
+
+    @classmethod
+    def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
         if not expr:
             return
         counters = {k: 0 for k in _MATCHING_PARENS.values()}
-        start, splits, pos, skipping, delim_len = 0, 0, 0, 0, len(delim) - 1
-        in_quote, escaping = None, False
+        start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
+        in_quote, escaping, skipping = None, False, 0
+        after_op, in_regex_char_group, skip_re = True, False, 0
+
         for idx, char in enumerate(expr):
+            if skip_re > 0:
+                skip_re -= 1
+                continue
             if not in_quote:
                 if char in _MATCHING_PARENS:
                     counters[_MATCHING_PARENS[char]] += 1
                 elif char in counters:
                     counters[char] -= 1
-            if not escaping:
-                if char in _QUOTES and in_quote in (char, None):
-                    in_quote = None if in_quote else char
-                else:
-                    escaping = in_quote and char == '\\'
-            else:
-                escaping = False
+            if not escaping and char in _QUOTES and in_quote in (char, None):
+                if in_quote or after_op or char != '/':
+                    in_quote = None if in_quote and not in_regex_char_group else char
+                    if in_quote is None and char == '/' and delim != '/':
+                        # regexp flags
+                        n_idx = idx + 1
+                        while n_idx < len(expr) and expr[n_idx] in cls.RE_FLAGS:
+                            n_idx += 1
+                        skip_re = n_idx - idx - 1
+                        if skip_re > 0:
+                            continue
+            elif in_quote == '/' and char in '[]':
+                in_regex_char_group = char == '['
+            escaping = not escaping and in_quote and char == '\\'
+            after_op = not in_quote and char in cls.OP_CHARS or (char == ' ' and after_op)
 
             if char != delim[pos] or any(counters.values()) or in_quote:
                 pos = skipping = 0
@@ -313,16 +383,23 @@ class JSInterpreter(object):
             if should_return:
                 return ret, should_return
 
-        m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|$)', stmt)
+        m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["\'])|$)|(?P<throw>throw\s+)', stmt)
         if m:
             expr = stmt[len(m.group(0)):].strip()
+            if m.group('throw'):
+                raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion))
             should_return = not m.group('var')
         if not expr:
             return None, should_return
 
         if expr[0] in _QUOTES:
             inner, outer = self._separate(expr, expr[0], 1)
-            inner = json.loads(js_to_json(inner + expr[0]))  # , strict=True))
+            if expr[0] == '/':
+                flags, _ = self._regex_flags(outer)
+                inner, outer = inner.replace('"', r'\"'), ''
+                inner = re.compile(js_to_json(inner + expr[0]), flags=flags)  # , strict=True))
+            else:
+                inner = json.loads(js_to_json(inner + expr[0]))  # , strict=True))
             if not outer:
                 return inner, should_return
             expr = self._named_object(local_vars, inner) + outer
@@ -374,22 +451,37 @@ class JSInterpreter(object):
                 for item in self._separate(inner)])
             expr = name + outer
 
-        m = re.match(r'(?P<try>try|finally)\s*|(?:(?P<catch>catch)|(?P<for>for)|(?P<switch>switch))\s*\(', expr)
+        m = re.match(r'''(?x)
+            (?P<try>try|finally)\s*|
+            (?P<catch>catch\s*(?P<err>\(\s*{_NAME_RE}\s*\)))|
+            (?P<switch>switch)\s*\(|
+            (?P<for>for)\s*\(|'''.format(**globals()), expr)
         md = m.groupdict() if m else {}
         if md.get('try'):
             if expr[m.end()] == '{':
                 try_expr, expr = self._separate_at_paren(expr[m.end():], '}')
             else:
                 try_expr, expr = expr[m.end() - 1:], ''
-            ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion)
-            if should_abort:
-                return ret, True
+            try:
+                ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion)
+                if should_abort:
+                    return ret, True
+            except JS_Throw as e:
+                local_vars[self._EXC_NAME] = e.error
+            except Exception as e:
+                # XXX: This works for now, but makes debugging future issues very hard
+                local_vars[self._EXC_NAME] = e
             ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
             return ret, should_abort or should_return
 
         elif md.get('catch'):
-            # We ignore the catch block
-            _, expr = self._separate_at_paren(expr, '}')
+            catch_expr, expr = self._separate_at_paren(expr[m.end():], '}')
+            if self._EXC_NAME in local_vars:
+                catch_vars = local_vars.new_child({m.group('err'): local_vars.pop(self._EXC_NAME)})
+                ret, should_abort = self.interpret_statement(catch_expr, catch_vars, allow_recursion)
+                if should_abort:
+                    return ret, True
+
             ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
             return ret, should_abort or should_return
 
@@ -503,7 +595,7 @@ class JSInterpreter(object):
                 raise self.Exception('List index %s must be integer' % (idx, ), expr=expr)
             idx = int(idx)
             left_val[idx] = self._operator(
-                m.group('op'), left_val[idx], m.group('expr'), expr, local_vars, allow_recursion)
+                m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion)
             return left_val[idx], should_return
 
         elif expr.isdigit():

From 46b8ae2f520c17aaa756082676788c6287b6809e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 19 Aug 2022 15:34:33 +0100
Subject: [PATCH 106/312] [jsinterp] Clean up and pull yt-dlp style * add
 compat_re_Pattern * improve compat_collections_chain_map * use class
 JS_Undefined * remove unused code

---
 test/test_jsinterp.py          |  20 +++---
 test/test_youtube_signature.py |   3 +-
 youtube_dl/compat.py           |  21 +++++-
 youtube_dl/jsinterp.py         | 123 ++++++++++++---------------------
 4 files changed, 77 insertions(+), 90 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index faddf00d5..96786a84c 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -11,8 +11,9 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import math
 import re
 
-from youtube_dl.jsinterp import JSInterpreter
-undefined = JSInterpreter.undefined
+from youtube_dl.compat import compat_re_Pattern
+
+from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
 
 
 class TestJSInterpreter(unittest.TestCase):
@@ -261,12 +262,12 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('''
         function x() { return undefined; }
         ''')
-        self.assertIs(jsi.call_function('x'), undefined)
+        self.assertIs(jsi.call_function('x'), JS_Undefined)
 
         jsi = JSInterpreter('''
         function x() { let v; return v; }
         ''')
-        self.assertIs(jsi.call_function('x'), undefined)
+        self.assertIs(jsi.call_function('x'), JS_Undefined)
 
         jsi = JSInterpreter('''
         function x() { return [undefined === undefined, undefined == undefined, undefined < undefined, undefined > undefined]; }
@@ -307,7 +308,7 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('''
         function x() { let v; return [v>42, v<=42, v&&42, 42&&v]; }
         ''')
-        self.assertEqual(jsi.call_function('x'), [False, False, undefined, undefined])
+        self.assertEqual(jsi.call_function('x'), [False, False, JS_Undefined, JS_Undefined])
 
         jsi = JSInterpreter('function x(){return undefined ?? 42; }')
         self.assertEqual(jsi.call_function('x'), 42)
@@ -326,12 +327,12 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('''
         function x() { let a; return a?.qq; }
         ''')
-        self.assertIs(jsi.call_function('x'), undefined)
+        self.assertIs(jsi.call_function('x'), JS_Undefined)
 
         jsi = JSInterpreter('''
         function x() { let a = {m1: 42, m2: 0 }; return a?.qq; }
         ''')
-        self.assertIs(jsi.call_function('x'), undefined)
+        self.assertIs(jsi.call_function('x'), JS_Undefined)
 
     def test_regex(self):
         jsi = JSInterpreter('''
@@ -342,13 +343,12 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('''
         function x() { let a=/,,[/,913,/](,)}/; return a; }
         ''')
-        # Pythons disagree on the type of a pattern
-        self.assertTrue(isinstance(jsi.call_function('x'), type(re.compile(''))))
+        self.assertIsInstance(jsi.call_function('x'), compat_re_Pattern)
 
         jsi = JSInterpreter('''
         function x() { let a=/,,[/,913,/](,)}/i; return a; }
         ''')
-        self.assertEqual(jsi.call_function('x').flags & re.I, re.I)
+        self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
 
 
 if __name__ == '__main__':
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 43e22388d..327d4c40d 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -12,10 +12,11 @@ import io
 import re
 import string
 
+from youtube_dl.compat import compat_str, compat_urlretrieve
+
 from test.helper import FakeYDL
 from youtube_dl.extractor import YoutubeIE
 from youtube_dl.jsinterp import JSInterpreter
-from youtube_dl.compat import compat_str, compat_urlretrieve
 
 _SIG_TESTS = [
     (
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 6d2c31a61..3002109ca 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -3023,18 +3023,34 @@ except ImportError:
             self.maps[0].__setitem__(k, v)
             return
 
-        def __delitem__(self, k):
+        def __contains__(self, k):
+            return any((k in m) for m in self.maps)
+
+        def __delitem(self, k):
             if k in self.maps[0]:
                 del self.maps[0][k]
                 return
             raise KeyError(k)
 
+        def __delitem__(self, k):
+            self.__delitem(k)
+
         def __iter__(self):
             return itertools.chain(*reversed(self.maps))
 
         def __len__(self):
             return len(iter(self))
 
+        # to match Py3, don't del directly
+        def pop(self, k, *args):
+            if self.__contains__(k):
+                off = self.__getitem__(k)
+                self.__delitem(k)
+                return off
+            elif len(args) > 0:
+                return args[0]
+            raise KeyError(k)
+
         def new_child(self, m=None, **kwargs):
             m = m or {}
             m.update(kwargs)
@@ -3044,6 +3060,8 @@ except ImportError:
         def parents(self):
             return compat_collections_chain_map(*(self.maps[1:]))
 
+# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
+compat_re_Pattern = type(re.compile(''))
 
 if sys.version_info < (3, 3):
     def compat_b64decode(s, *args, **kwargs):
@@ -3110,6 +3128,7 @@ __all__ = [
     'compat_os_name',
     'compat_parse_qs',
     'compat_print',
+    'compat_re_Pattern',
     'compat_realpath',
     'compat_setenv',
     'compat_shlex_quote',
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 48c27a1c0..6719d0dfd 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -19,16 +19,12 @@ from .compat import (
     compat_str,
 )
 
-_NAME_RE = r'[a-zA-Z_$][\w$]*'
-
-_UNDEFINED = object()
-
 
 def _js_bit_op(op):
 
     def wrapped(a, b):
         def zeroise(x):
-            return 0 if x in (None, _UNDEFINED) else x
+            return 0 if x in (None, JS_Undefined) else x
         return op(zeroise(a), zeroise(b))
 
     return wrapped
@@ -37,7 +33,7 @@ def _js_bit_op(op):
 def _js_arith_op(op):
 
     def wrapped(a, b):
-        if _UNDEFINED in (a, b):
+        if JS_Undefined in (a, b):
             return float('nan')
         return op(a or 0, b or 0)
 
@@ -45,22 +41,21 @@ def _js_arith_op(op):
 
 
 def _js_div(a, b):
-    if _UNDEFINED in (a, b) or not (a and b):
+    if JS_Undefined in (a, b) or not (a and b):
         return float('nan')
     return float('inf') if not b else operator.truediv(a or 0, b)
 
 
 def _js_mod(a, b):
-    if _UNDEFINED in (a, b) or not b:
+    if JS_Undefined in (a, b) or not b:
         return float('nan')
     return (a or 0) % b
 
 
 def _js_exp(a, b):
     if not b:
-        # even 0 ** 0 !!
-        return 1
-    if _UNDEFINED in (a, b):
+        return 1  # even 0 ** 0 !!
+    elif JS_Undefined in (a, b):
         return float('nan')
     return (a or 0) ** b
 
@@ -68,7 +63,7 @@ def _js_exp(a, b):
 def _js_eq_op(op):
 
     def wrapped(a, b):
-        if set((a, b)) <= set((None, _UNDEFINED)):
+        if set((a, b)) <= set((None, JS_Undefined)):
             return op(a, a)
         return op(a, b)
 
@@ -78,21 +73,28 @@ def _js_eq_op(op):
 def _js_comp_op(op):
 
     def wrapped(a, b):
-        if _UNDEFINED in (a, b):
+        if JS_Undefined in (a, b):
             return False
         return op(a or 0, b or 0)
 
     return wrapped
 
 
+def _js_ternary(cndn, if_true=True, if_false=False):
+    """Simulate JS's ternary operator (cndn?if_true:if_false)"""
+    if cndn in (False, None, 0, '', JS_Undefined):
+        return if_false
+    try:
+        if math.isnan(cndn):  # NB: NaN cannot be checked by membership
+            return if_false
+    except TypeError:
+        pass
+    return if_true
+
+
 # (op, definition) in order of binding priority, tightest first
 # avoid dict to maintain order
 # definition None => Defined in JSInterpreter._operator
-_DOT_OPERATORS = (
-    ('.', None),
-    # TODO: ('?.', None),
-)
-
 _OPERATORS = (
     ('>>', _js_bit_op(operator.rshift)),
     ('<<', _js_bit_op(operator.lshift)),
@@ -130,20 +132,13 @@ _SC_OPERATORS = (
 
 _OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS))
 
+_NAME_RE = r'[a-zA-Z_$][\w$]*'
 _MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
 _QUOTES = '\'"/'
 
 
-def _ternary(cndn, if_true=True, if_false=False):
-    """Simulate JS's ternary operator (cndn?if_true:if_false)"""
-    if cndn in (False, None, 0, '', _UNDEFINED):
-        return if_false
-    try:
-        if math.isnan(cndn):  # NB: NaN cannot be checked by membership
-            return if_false
-    except TypeError:
-        pass
-    return if_true
+class JS_Undefined(object):
+    pass
 
 
 class JS_Break(ExtractorError):
@@ -167,7 +162,7 @@ class LocalNameSpace(ChainMap):
         try:
             return super(LocalNameSpace, self).__getitem__(key)
         except KeyError:
-            return _UNDEFINED
+            return JS_Undefined
 
     def __setitem__(self, key, value):
         for scope in self.maps:
@@ -179,24 +174,6 @@ class LocalNameSpace(ChainMap):
     def __delitem__(self, key):
         raise NotImplementedError('Deleting is not supported')
 
-    # except
-    def pop(self, key, *args):
-        try:
-            off = self.__getitem__(key)
-            super(LocalNameSpace, self).__delitem__(key)
-            return off
-        except KeyError:
-            if len(args) > 0:
-                return args[0]
-            raise
-
-    def __contains__(self, key):
-        try:
-            super(LocalNameSpace, self).__getitem__(key)
-            return True
-        except KeyError:
-            return False
-
     def __repr__(self):
         return 'LocalNameSpace%s' % (self.maps, )
 
@@ -204,9 +181,7 @@ class LocalNameSpace(ChainMap):
 class JSInterpreter(object):
     __named_object_counter = 0
 
-    undefined = _UNDEFINED
-
-    RE_FLAGS = {
+    _RE_FLAGS = {
         # special knowledge: Python's re flags are bitmask values, current max 128
         # invent new bitmask values well above that for literal parsing
         # TODO: new pattern class to execute matches with these flags
@@ -257,10 +232,10 @@ class JSInterpreter(object):
         if not expr:
             return flags, expr
         for idx, ch in enumerate(expr):
-            if ch not in cls.RE_FLAGS:
+            if ch not in cls._RE_FLAGS:
                 break
-            flags |= cls.RE_FLAGS[ch]
-        return flags, expr[idx:] if idx > 0 else expr
+            flags |= cls._RE_FLAGS[ch]
+        return flags, expr[idx + 1:]
 
     @classmethod
     def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
@@ -283,14 +258,6 @@ class JSInterpreter(object):
             if not escaping and char in _QUOTES and in_quote in (char, None):
                 if in_quote or after_op or char != '/':
                     in_quote = None if in_quote and not in_regex_char_group else char
-                    if in_quote is None and char == '/' and delim != '/':
-                        # regexp flags
-                        n_idx = idx + 1
-                        while n_idx < len(expr) and expr[n_idx] in cls.RE_FLAGS:
-                            n_idx += 1
-                        skip_re = n_idx - idx - 1
-                        if skip_re > 0:
-                            continue
             elif in_quote == '/' and char in '[]':
                 in_regex_char_group = char == '['
             escaping = not escaping and in_quote and char == '\\'
@@ -336,13 +303,13 @@ class JSInterpreter(object):
 
     def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
         if op in ('||', '&&'):
-            if (op == '&&') ^ _ternary(left_val):
+            if (op == '&&') ^ _js_ternary(left_val):
                 return left_val  # short circuiting
         elif op == '??':
-            if left_val not in (None, self.undefined):
+            if left_val not in (None, JS_Undefined):
                 return left_val
         elif op == '?':
-            right_expr = _ternary(left_val, *self._separate(right_expr, ':', 1))
+            right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1))
 
         right_val = self.interpret_expression(right_expr, local_vars, allow_recursion)
         opfunc = op and next((v for k, v in self._all_operators() if k == op), None)
@@ -361,7 +328,7 @@ class JSInterpreter(object):
             return obj[int(idx)] if isinstance(obj, list) else obj[idx]
         except Exception as e:
             if allow_undefined:
-                return self.undefined
+                return JS_Undefined
             raise self.Exception('Cannot get index {idx}'.format(**locals()), expr=repr(obj), cause=e)
 
     def _dump(self, obj, namespace):
@@ -395,9 +362,8 @@ class JSInterpreter(object):
         if expr[0] in _QUOTES:
             inner, outer = self._separate(expr, expr[0], 1)
             if expr[0] == '/':
-                flags, _ = self._regex_flags(outer)
-                inner, outer = inner.replace('"', r'\"'), ''
-                inner = re.compile(js_to_json(inner + expr[0]), flags=flags)  # , strict=True))
+                flags, outer = self._regex_flags(outer)
+                inner = re.compile(inner[1:], flags=flags)  # , strict=True))
             else:
                 inner = json.loads(js_to_json(inner + expr[0]))  # , strict=True))
             if not outer:
@@ -422,7 +388,7 @@ class JSInterpreter(object):
 
         if expr.startswith('{'):
             inner, outer = self._separate_at_paren(expr, '}')
-            # try for object expression
+            # try for object expression (Map)
             sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)]
             if all(len(sub_expr) == 2 for sub_expr in sub_expressions):
                 return dict(
@@ -455,7 +421,8 @@ class JSInterpreter(object):
             (?P<try>try|finally)\s*|
             (?P<catch>catch\s*(?P<err>\(\s*{_NAME_RE}\s*\)))|
             (?P<switch>switch)\s*\(|
-            (?P<for>for)\s*\(|'''.format(**globals()), expr)
+            (?P<for>for)\s*\(|
+            '''.format(**globals()), expr)
         md = m.groupdict() if m else {}
         if md.get('try'):
             if expr[m.end()] == '{':
@@ -500,7 +467,7 @@ class JSInterpreter(object):
             start, cndn, increment = self._separate(constructor, ';')
             self.interpret_expression(start, local_vars, allow_recursion)
             while True:
-                if not _ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
+                if not _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
                     break
                 try:
                     ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion)
@@ -587,7 +554,7 @@ class JSInterpreter(object):
                 local_vars[m.group('out')] = self._operator(
                     m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
                 return local_vars[m.group('out')], should_return
-            elif left_val in (None, self.undefined):
+            elif left_val in (None, JS_Undefined):
                 raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)
 
             idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
@@ -607,7 +574,7 @@ class JSInterpreter(object):
             raise JS_Continue()
 
         elif expr == 'undefined':
-            return self.undefined, should_return
+            return JS_Undefined, should_return
 
         elif md.get('return'):
             return local_vars[m.group('name')], should_return
@@ -663,9 +630,9 @@ class JSInterpreter(object):
                     'Math': float,
                 }
                 obj = local_vars.get(variable)
-                if obj in (self.undefined, None):
-                    obj = types.get(variable, self.undefined)
-                if obj is self.undefined:
+                if obj in (JS_Undefined, None):
+                    obj = types.get(variable, JS_Undefined)
+                if obj is JS_Undefined:
                     try:
                         if variable not in self._objects:
                             self._objects[variable] = self.extract_object(variable)
@@ -674,8 +641,8 @@ class JSInterpreter(object):
                         if not nullish:
                             raise
 
-                if nullish and obj is self.undefined:
-                    return self.undefined
+                if nullish and obj is JS_Undefined:
+                    return JS_Undefined
 
                 # Member access
                 if arg_str is None:

From fd3f3bebd0699f4b782a24a503093c965c4f4f5e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 19 Aug 2022 19:11:08 +0100
Subject: [PATCH 107/312] [uktvplay] Support domain without .uktv

---
 youtube_dl/extractor/uktvplay.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/uktvplay.py b/youtube_dl/extractor/uktvplay.py
index f28fd514d..9ef9638cd 100644
--- a/youtube_dl/extractor/uktvplay.py
+++ b/youtube_dl/extractor/uktvplay.py
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 
 
 class UKTVPlayIE(InfoExtractor):
-    _VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
+    _VALID_URL = r'https?://uktvplay\.(?:uktv\.)?co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
     _TESTS = [{
         'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
         'info_dict': {

From a8d5316aaf3dc740aad486b8c394b2f3e70f5a58 Mon Sep 17 00:00:00 2001
From: gudata <gudata@users.noreply.github.com>
Date: Fri, 19 Aug 2022 23:00:21 +0300
Subject: [PATCH 108/312] [infoq] Avoid crash if the page has no `mp3Form`

* proposed fix for issue #31131, aligns with yt-dlp

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/infoq.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py
index 0a70a1fb4..60b02b699 100644
--- a/youtube_dl/extractor/infoq.py
+++ b/youtube_dl/extractor/infoq.py
@@ -1,6 +1,9 @@
 # coding: utf-8
 
 from __future__ import unicode_literals
+from ..utils import (
+    ExtractorError,
+)
 
 from ..compat import (
     compat_b64decode,
@@ -90,7 +93,11 @@ class InfoQIE(BokeCCBaseIE):
         }]
 
     def _extract_http_audio(self, webpage, video_id):
-        fields = self._form_hidden_inputs('mp3Form', webpage)
+        try:
+            fields = self._form_hidden_inputs('mp3Form', webpage)
+        except ExtractorError:
+            fields = {}
+
         http_audio_url = fields.get('filename')
         if not http_audio_url:
             return []

From 556862bc911bb54435b7b0b01451789b884b0390 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 21 Aug 2022 00:19:19 +0100
Subject: [PATCH 109/312] [utils] Ensure RFC3986 encoding result is unicode

---
 youtube_dl/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index a5f584ec5..fea38ed32 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -3970,7 +3970,8 @@ def escape_rfc3986(s):
     """Escape non-ASCII characters as suggested by RFC 3986"""
     if sys.version_info < (3, 0) and isinstance(s, compat_str):
         s = s.encode('utf-8')
-    return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
+    # ensure unicode: after quoting, it can always be converted
+    return compat_str(compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]"))
 
 
 def escape_url(url):

From 66e58dccc29de65cc95ee97915987d785b2b4b31 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 21 Aug 2022 00:21:02 +0100
Subject: [PATCH 110/312] [core] Avoid processing empty format list after
 removing bad formats * also ensure compat encoding of error strings

---
 youtube_dl/YoutubeDL.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index e77b8d50c..8e8546596 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -721,7 +721,7 @@ class YoutubeDL(object):
                 filename = encodeFilename(filename, True).decode(preferredencoding())
             return sanitize_path(filename)
         except ValueError as err:
-            self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
+            self.report_error('Error in output template: ' + error_to_compat_str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
             return None
 
     def _match_entry(self, info_dict, incomplete):
@@ -1570,9 +1570,6 @@ class YoutubeDL(object):
         else:
             formats = info_dict['formats']
 
-        if not formats:
-            raise ExtractorError('No video formats found!')
-
         def is_wellformed(f):
             url = f.get('url')
             if not url:
@@ -1585,7 +1582,10 @@ class YoutubeDL(object):
             return True
 
         # Filter out malformed formats for better extraction robustness
-        formats = list(filter(is_wellformed, formats))
+        formats = list(filter(is_wellformed, formats or []))
+
+        if not formats:
+            raise ExtractorError('No video formats found!')
 
         formats_dict = {}
 
@@ -2058,7 +2058,7 @@ class YoutubeDL(object):
                 try:
                     self.post_process(filename, info_dict)
                 except (PostProcessingError) as err:
-                    self.report_error('postprocessing: %s' % str(err))
+                    self.report_error('postprocessing: %s' % error_to_compat_str(err))
                     return
                 self.record_download_archive(info_dict)
                 # avoid possible nugatory search for further items (PR #26638)

From 573b13410e5c2f939676116e2700ec8efd9cf97b Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 25 Aug 2022 12:14:59 +0100
Subject: [PATCH 111/312] [YouTube] Improve error check for n-sig processing

---
 youtube_dl/extractor/youtube.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 91a3b6058..3d12e2e4a 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1500,7 +1500,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         return lambda s: jsi.extract_function_from_code(*func_code)([s])
 
     def _n_descramble(self, n_param, player_url, video_id):
-        """Compute the response to YT's "n" parameter challenge
+        """Compute the response to YT's "n" parameter challenge,
+           or None
 
         Args:
         n_param     -- challenge string that is the value of the
@@ -1518,7 +1519,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if player_id not in self._player_cache:
                 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
             func = self._player_cache[player_id]
-            self._player_cache[sig_id] = func(n_param)
+            ret = func(n_param)
+            if ret.startswith('enhanced_except_'):
+                raise ExtractorError('Unhandled exception in decode')
+            self._player_cache[sig_id] = ret
             if self._downloader.params.get('verbose', False):
                 self._downloader.to_screen('[debug] [%s] %s' % (self.IE_NAME, 'Decrypted nsig {0} => {1}'.format(n_param, self._player_cache[sig_id])))
             return self._player_cache[sig_id]
@@ -1539,10 +1543,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 continue
             n_param = n_param[-1]
             n_response = self._n_descramble(n_param, player_url, video_id)
-            if n_response:
-                qs['n'] = [n_response]
-                fmt['url'] = compat_urlparse.urlunparse(
-                    parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+            if n_response is None:
+                # give up if descrambling failed
+                break
+            qs['n'] = [n_response]
+            fmt['url'] = compat_urlparse.urlunparse(
+                parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
 
     def _mark_watched(self, video_id, player_response):
         playback_url = url_or_none(try_get(

From d619dd712f63aab1964f8fdde9ceea514a5e581d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 25 Aug 2022 12:16:10 +0100
Subject: [PATCH 112/312] [jsinterp] Fix bug in operator precedence * from
 https://github.com/yt-dlp/yt-dlp/commit/164b03c4864b0d44cfee5e7702f7c2317164a6cf
 * added tests

---
 test/test_jsinterp.py          | 25 +++++++++++++++++++++++++
 test/test_youtube_signature.py |  4 ++++
 youtube_dl/jsinterp.py         |  7 ++++++-
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 96786a84c..0a97bdbc4 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -192,6 +192,31 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x'), 10)
 
+    def test_catch(self):
+        jsi = JSInterpreter('''
+        function x() { try{throw 10} catch(e){return 5} }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 5)
+
+    @unittest.expectedFailure
+    def test_finally(self):
+        jsi = JSInterpreter('''
+        function x() { try{throw 10} finally {return 42} }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 42)
+        jsi = JSInterpreter('''
+        function x() { try{throw 10} catch(e){return 5} finally {return 42} }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 42)
+
+    def test_nested_try(self):
+        jsi = JSInterpreter('''
+        function x() {try {
+            try{throw 10} finally {throw 42} 
+            } catch(e){return 5} }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 5)
+
     def test_for_loop_continue(self):
         jsi = JSInterpreter('''
         function x() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 327d4c40d..4bb0a30b0 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -111,6 +111,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
         'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
     ),
+    (
+        'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
+        '5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 6719d0dfd..a8456ec1c 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -5,6 +5,7 @@ import json
 import math
 import operator
 import re
+from collections import Counter
 
 from .utils import (
     error_to_compat_str,
@@ -108,8 +109,8 @@ _OPERATORS = (
 
 _COMP_OPERATORS = (
     ('===', operator.is_),
-    ('==', _js_eq_op(operator.eq)),
     ('!==', operator.is_not),
+    ('==', _js_eq_op(operator.eq)),
     ('!=', _js_eq_op(operator.ne)),
     ('<=', _js_comp_op(operator.le)),
     ('>=', _js_comp_op(operator.ge)),
@@ -241,7 +242,9 @@ class JSInterpreter(object):
     def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
         if not expr:
             return
+        # collections.Counter() is ~10% slower
         counters = {k: 0 for k in _MATCHING_PARENS.values()}
+        # counters = Counter()
         start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
         in_quote, escaping, skipping = None, False, 0
         after_op, in_regex_char_group, skip_re = True, False, 0
@@ -442,6 +445,7 @@ class JSInterpreter(object):
             return ret, should_abort or should_return
 
         elif md.get('catch'):
+
             catch_expr, expr = self._separate_at_paren(expr[m.end():], '}')
             if self._EXC_NAME in local_vars:
                 catch_vars = local_vars.new_child({m.group('err'): local_vars.pop(self._EXC_NAME)})
@@ -450,6 +454,7 @@ class JSInterpreter(object):
                     return ret, True
 
             ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
+
             return ret, should_abort or should_return
 
         elif md.get('for'):

From 4c6fba37650d60acbd32a9f2d6e2468a730d0f1c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 26 Aug 2022 08:17:54 +0100
Subject: [PATCH 113/312] [jsinterp] Improve try/catch/finally support

---
 test/test_jsinterp.py  | 14 ++++++-
 youtube_dl/jsinterp.py | 88 +++++++++++++++++++++++-------------------
 2 files changed, 61 insertions(+), 41 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 0a97bdbc4..fb4882d00 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -74,6 +74,9 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function f(){return 0 ?? 42;}')
         self.assertEqual(jsi.call_function('f'), 0)
 
+        jsi = JSInterpreter('function f(){return "life, the universe and everything" < 42;}')
+        self.assertFalse(jsi.call_function('f'))
+
     def test_array_access(self):
         jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
         self.assertEqual(jsi.call_function('f'), [5, 2, 7])
@@ -198,7 +201,6 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x'), 5)
 
-    @unittest.expectedFailure
     def test_finally(self):
         jsi = JSInterpreter('''
         function x() { try{throw 10} finally {return 42} }
@@ -212,7 +214,7 @@ class TestJSInterpreter(unittest.TestCase):
     def test_nested_try(self):
         jsi = JSInterpreter('''
         function x() {try {
-            try{throw 10} finally {throw 42} 
+            try{throw 10} finally {throw 42}
             } catch(e){return 5} }
         ''')
         self.assertEqual(jsi.call_function('x'), 5)
@@ -229,6 +231,14 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x'), 0)
 
+    def test_for_loop_try(self):
+        jsi = JSInterpreter('''
+        function x() {
+            for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
+            return 42 }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 42)
+
     def test_literal_list(self):
         jsi = JSInterpreter('''
         function x() { return [1, 2, "asdf", [5, 6, 7]][3] }
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index a8456ec1c..08726e478 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -5,7 +5,6 @@ import json
 import math
 import operator
 import re
-from collections import Counter
 
 from .utils import (
     error_to_compat_str,
@@ -15,6 +14,7 @@ from .utils import (
     unified_timestamp,
 )
 from .compat import (
+    compat_basestring,
     compat_collections_chain_map as ChainMap,
     compat_itertools_zip_longest as zip_longest,
     compat_str,
@@ -76,6 +76,10 @@ def _js_comp_op(op):
     def wrapped(a, b):
         if JS_Undefined in (a, b):
             return False
+        if isinstance(a, compat_basestring):
+            b = compat_str(b or 0)
+        elif isinstance(b, compat_basestring):
+            a = compat_str(a or 0)
         return op(a or 0, b or 0)
 
     return wrapped
@@ -195,7 +199,6 @@ class JSInterpreter(object):
         'y': 4096,  # Perform a "sticky" search that matches starting at the current position in the target string
     }
 
-    _EXC_NAME = '__youtube_dl_exception__'
     _OBJ_NAME = '__youtube_dl_jsinterp_obj'
 
     OP_CHARS = None
@@ -242,9 +245,8 @@ class JSInterpreter(object):
     def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
         if not expr:
             return
-        # collections.Counter() is ~10% slower
+        # collections.Counter() is ~10% slower in both 2.7 and 3.9
         counters = {k: 0 for k in _MATCHING_PARENS.values()}
-        # counters = Counter()
         start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
         in_quote, escaping, skipping = None, False, 0
         after_op, in_regex_char_group, skip_re = True, False, 0
@@ -291,7 +293,9 @@ class JSInterpreter(object):
         yield expr[start:]
 
     @classmethod
-    def _separate_at_paren(cls, expr, delim):
+    def _separate_at_paren(cls, expr, delim=None):
+        if delim is None:
+            delim = expr and _MATCHING_PARENS[expr[0]]
         separated = list(cls._separate(expr, delim, 1))
 
         if len(separated) < 2:
@@ -376,7 +380,7 @@ class JSInterpreter(object):
         if expr.startswith('new '):
             obj = expr[4:]
             if obj.startswith('Date('):
-                left, right = self._separate_at_paren(obj[4:], ')')
+                left, right = self._separate_at_paren(obj[4:])
                 expr = unified_timestamp(
                     self.interpret_expression(left, local_vars, allow_recursion), False)
                 if not expr:
@@ -390,7 +394,7 @@ class JSInterpreter(object):
             return None, should_return
 
         if expr.startswith('{'):
-            inner, outer = self._separate_at_paren(expr, '}')
+            inner, outer = self._separate_at_paren(expr)
             # try for object expression (Map)
             sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)]
             if all(len(sub_expr) == 2 for sub_expr in sub_expressions):
@@ -406,7 +410,7 @@ class JSInterpreter(object):
                 expr = self._dump(inner, local_vars) + outer
 
         if expr.startswith('('):
-            inner, outer = self._separate_at_paren(expr, ')')
+            inner, outer = self._separate_at_paren(expr)
             inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
             if not outer or should_abort:
                 return inner, should_abort or should_return
@@ -414,57 +418,63 @@ class JSInterpreter(object):
                 expr = self._dump(inner, local_vars) + outer
 
         if expr.startswith('['):
-            inner, outer = self._separate_at_paren(expr, ']')
+            inner, outer = self._separate_at_paren(expr)
             name = self._named_object(local_vars, [
                 self.interpret_expression(item, local_vars, allow_recursion)
                 for item in self._separate(inner)])
             expr = name + outer
 
         m = re.match(r'''(?x)
-            (?P<try>try|finally)\s*|
-            (?P<catch>catch\s*(?P<err>\(\s*{_NAME_RE}\s*\)))|
-            (?P<switch>switch)\s*\(|
-            (?P<for>for)\s*\(|
-            '''.format(**globals()), expr)
+                (?P<try>try)\s*\{|
+                (?P<switch>switch)\s*\(|
+                (?P<for>for)\s*\(
+                ''', expr)
         md = m.groupdict() if m else {}
         if md.get('try'):
-            if expr[m.end()] == '{':
-                try_expr, expr = self._separate_at_paren(expr[m.end():], '}')
-            else:
-                try_expr, expr = expr[m.end() - 1:], ''
+            try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+            err = None
             try:
                 ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion)
                 if should_abort:
                     return ret, True
-            except JS_Throw as e:
-                local_vars[self._EXC_NAME] = e.error
             except Exception as e:
                 # XXX: This works for now, but makes debugging future issues very hard
-                local_vars[self._EXC_NAME] = e
-            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
-            return ret, should_abort or should_return
+                err = e
 
-        elif md.get('catch'):
+            pending = (None, False)
+            m = re.match(r'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{'.format(**globals()), expr)
+            if m:
+                sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+                if err:
+                    catch_vars = {}
+                    if m.group('err'):
+                        catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
+                    catch_vars = local_vars.new_child(m=catch_vars)
+                    err = None
+                    pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion)
 
-            catch_expr, expr = self._separate_at_paren(expr[m.end():], '}')
-            if self._EXC_NAME in local_vars:
-                catch_vars = local_vars.new_child({m.group('err'): local_vars.pop(self._EXC_NAME)})
-                ret, should_abort = self.interpret_statement(catch_expr, catch_vars, allow_recursion)
+            m = re.match(r'finally\s*\{', expr)
+            if m:
+                sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+                ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
                 if should_abort:
                     return ret, True
 
-            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
+            ret, should_abort = pending
+            if should_abort:
+                return ret, True
 
-            return ret, should_abort or should_return
+            if err:
+                raise err
 
         elif md.get('for'):
-            constructor, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
+            constructor, remaining = self._separate_at_paren(expr[m.end() - 1:])
             if remaining.startswith('{'):
-                body, expr = self._separate_at_paren(remaining, '}')
+                body, expr = self._separate_at_paren(remaining)
             else:
                 switch_m = re.match(r'switch\s*\(', remaining)  # FIXME
                 if switch_m:
-                    switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:], ')')
+                    switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:])
                     body, expr = self._separate_at_paren(remaining, '}')
                     body = 'switch(%s){%s}' % (switch_val, body)
                 else:
@@ -483,11 +493,9 @@ class JSInterpreter(object):
                 except JS_Continue:
                     pass
                 self.interpret_expression(increment, local_vars, allow_recursion)
-            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
-            return ret, should_abort or should_return
 
         elif md.get('switch'):
-            switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
+            switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:])
             switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion)
             body, expr = self._separate_at_paren(remaining, '}')
             items = body.replace('default:', 'case default:').split('case ')[1:]
@@ -510,6 +518,8 @@ class JSInterpreter(object):
                         break
                 if matched:
                     break
+
+        if md:
             ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
             return ret, should_abort or should_return
 
@@ -618,7 +628,7 @@ class JSInterpreter(object):
                 member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion)
             arg_str = expr[m.end():]
             if arg_str.startswith('('):
-                arg_str, remaining = self._separate_at_paren(arg_str, ')')
+                arg_str, remaining = self._separate_at_paren(arg_str)
             else:
                 arg_str, remaining = None, arg_str
 
@@ -795,7 +805,7 @@ class JSInterpreter(object):
                 \((?P<args>[^)]*)\)\s*
                 (?P<code>{.+})''' % {'name': re.escape(funcname)},
             self.code)
-        code, _ = self._separate_at_paren(func_m.group('code'), '}')  # refine the match
+        code, _ = self._separate_at_paren(func_m.group('code'))  # refine the match
         if func_m is None:
             raise self.Exception('Could not find JS function "{funcname}"'.format(**locals()))
         return self.build_arglist(func_m.group('args')), code
@@ -810,7 +820,7 @@ class JSInterpreter(object):
             if mobj is None:
                 break
             start, body_start = mobj.span()
-            body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
+            body, remaining = self._separate_at_paren(code[body_start - 1:])
             name = self._named_object(
                 local_vars,
                 self.extract_function_from_code(

From 0f6422590e44e99e9b81cf2367666efe89fae3aa Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 26 Aug 2022 10:17:56 +0100
Subject: [PATCH 114/312] [compat] Replace deficient ChainMap class in Py3.3
 and earlier

---
 youtube_dl/compat.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 3002109ca..366a93924 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -3004,8 +3004,11 @@ except ImportError:
 # new class in collections
 try:
     from collections import ChainMap as compat_collections_chain_map
+    # Py3.3's ChainMap is deficient
+    if sys.version_info <= (3, 3):
+        raise ImportError
 except ImportError:
-    # Py < 3.3
+    # Py <= 3.3
     class compat_collections_chain_map(compat_collections_abc.MutableMapping):
 
         maps = [{}]
@@ -3060,6 +3063,7 @@ except ImportError:
         def parents(self):
             return compat_collections_chain_map(*(self.maps[1:]))
 
+
 # Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
 compat_re_Pattern = type(re.compile(''))
 

From ed5c44e7b74ac77f87ca5ed6cb5e964a0c6a0678 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 26 Aug 2022 12:22:01 +0100
Subject: [PATCH 115/312] [compat] Replace deficient ChainMap class in Py3.3
 and earlier * fix version check

---
 youtube_dl/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 366a93924..eca6d63de 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -3005,7 +3005,7 @@ except ImportError:
 try:
     from collections import ChainMap as compat_collections_chain_map
     # Py3.3's ChainMap is deficient
-    if sys.version_info <= (3, 3):
+    if sys.version_info < (3, 4):
         raise ImportError
 except ImportError:
     # Py <= 3.3

From 4050e10a4c3445c5399239567eb074acb2f65c18 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 29 Aug 2022 13:02:17 +0100
Subject: [PATCH 116/312] [options] Document that postprocessing is not forced
 by --postprocessor-args

Resolves #30307
---
 youtube_dl/options.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index f6621ef91..f6d2b0898 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -801,7 +801,7 @@ def parseOpts(overrideArguments=None):
     postproc.add_option(
         '--postprocessor-args',
         dest='postprocessor_args', metavar='ARGS',
-        help='Give these arguments to the postprocessor')
+        help='Give these arguments to the postprocessor (if postprocessing is required)')
     postproc.add_option(
         '-k', '--keep-video',
         action='store_true', dest='keepvideo', default=False,

From 55c823634db890a328ffc23588fcd6f35d9b3ddf Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 31 Aug 2022 23:22:48 +0100
Subject: [PATCH 117/312] [jsinterp] Handle new YT players 113ca41c, c57c113c *
 add NaN * allow any white-space character for `after_op` * align with yt-dlp
 f26af78a8ac11d9d617ed31ea5282cfaa5bcbcfa (charcodeAt and bitwise overflow) *
 allow escaping in regex, fixing player c57c113c

---
 test/test_jsinterp.py          | 21 ++++++++++++++++
 test/test_youtube_signature.py | 16 ++++++++++++
 youtube_dl/jsinterp.py         | 46 +++++++++++++++++++++-------------
 3 files changed, 65 insertions(+), 18 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index fb4882d00..5121c8cf8 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -135,6 +135,11 @@ class TestJSInterpreter(unittest.TestCase):
         self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
 
     def test_builtins(self):
+        jsi = JSInterpreter('''
+        function x() { return NaN }
+        ''')
+        self.assertTrue(math.isnan(jsi.call_function('x')))
+
         jsi = JSInterpreter('''
         function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
         ''')
@@ -385,6 +390,22 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
 
+    def test_char_code_at(self):
+        jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
+        self.assertEqual(jsi.call_function('x', 0), 116)
+        self.assertEqual(jsi.call_function('x', 1), 101)
+        self.assertEqual(jsi.call_function('x', 2), 115)
+        self.assertEqual(jsi.call_function('x', 3), 116)
+        self.assertEqual(jsi.call_function('x', 4), None)
+        self.assertEqual(jsi.call_function('x', 'not_a_number'), 116)
+
+    def test_bitwise_operators_overflow(self):
+        jsi = JSInterpreter('function x(){return -524999584 << 5}')
+        self.assertEqual(jsi.call_function('x'), 379882496)
+
+        jsi = JSInterpreter('function x(){return 1236566549 << 5}')
+        self.assertEqual(jsi.call_function('x'), 915423904)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 4bb0a30b0..ec914a871 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -111,10 +111,26 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
         'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
     ),
+    (
+        'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js',
+        '5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw',
+    ),
     (
         'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
         '5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
     ),
+    (
+        'https://www.youtube.com/s/player/c2199353/player_ias.vflset/en_US/base.js',
+        '5EHDMgYLV6HPGk_Mu-kk', 'AD5rgS85EkrE7',
+    ),
+    (
+        'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js',
+        'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg',
+    ),
+    (
+        'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
+        '-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 08726e478..d13329396 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -23,10 +23,11 @@ from .compat import (
 
 def _js_bit_op(op):
 
+    def zeroise(x):
+        return 0 if x in (None, JS_Undefined) else x
+
     def wrapped(a, b):
-        def zeroise(x):
-            return 0 if x in (None, JS_Undefined) else x
-        return op(zeroise(a), zeroise(b))
+        return op(zeroise(a), zeroise(b)) & 0xffffffff
 
     return wrapped
 
@@ -44,7 +45,7 @@ def _js_arith_op(op):
 def _js_div(a, b):
     if JS_Undefined in (a, b) or not (a and b):
         return float('nan')
-    return float('inf') if not b else operator.truediv(a or 0, b)
+    return operator.truediv(a or 0, b) if b else float('inf')
 
 
 def _js_mod(a, b):
@@ -260,13 +261,14 @@ class JSInterpreter(object):
                     counters[_MATCHING_PARENS[char]] += 1
                 elif char in counters:
                     counters[char] -= 1
-            if not escaping and char in _QUOTES and in_quote in (char, None):
-                if in_quote or after_op or char != '/':
-                    in_quote = None if in_quote and not in_regex_char_group else char
-            elif in_quote == '/' and char in '[]':
-                in_regex_char_group = char == '['
+            if not escaping:
+                if char in _QUOTES and in_quote in (char, None):
+                    if in_quote or after_op or char != '/':
+                        in_quote = None if in_quote and not in_regex_char_group else char
+                elif in_quote == '/' and char in '[]':
+                    in_regex_char_group = char == '['
             escaping = not escaping and in_quote and char == '\\'
-            after_op = not in_quote and char in cls.OP_CHARS or (char == ' ' and after_op)
+            after_op = not in_quote and (char in cls.OP_CHARS or (char.isspace() and after_op))
 
             if char != delim[pos] or any(counters.values()) or in_quote:
                 pos = skipping = 0
@@ -590,6 +592,8 @@ class JSInterpreter(object):
 
         elif expr == 'undefined':
             return JS_Undefined, should_return
+        elif expr == 'NaN':
+            return float('NaN'), should_return
 
         elif md.get('return'):
             return local_vars[m.group('name')], should_return
@@ -635,7 +639,8 @@ class JSInterpreter(object):
             def assertion(cndn, msg):
                 """ assert, but without risk of getting optimized out """
                 if not cndn:
-                    raise ExtractorError('{member} {msg}'.format(**locals()), expr=expr)
+                    memb = member
+                    raise self.Exception('{member} {msg}'.format(**locals()), expr=expr)
 
             def eval_method():
                 if (variable, member) == ('console', 'debug'):
@@ -737,6 +742,13 @@ class JSInterpreter(object):
                         return obj.index(idx, start)
                     except ValueError:
                         return -1
+                elif member == 'charCodeAt':
+                    assertion(isinstance(obj, compat_str), 'must be applied on a string')
+                    # assertion(len(argvals) == 1, 'takes exactly one argument') # but not enforced
+                    idx = argvals[0] if isinstance(argvals[0], int) else 0
+                    if idx >= len(obj):
+                        return None
+                    return ord(obj[idx])
 
                 idx = int(member) if isinstance(obj, list) else member
                 return obj[idx](argvals, allow_recursion=allow_recursion)
@@ -820,12 +832,10 @@ class JSInterpreter(object):
             if mobj is None:
                 break
             start, body_start = mobj.span()
-            body, remaining = self._separate_at_paren(code[body_start - 1:])
-            name = self._named_object(
-                local_vars,
-                self.extract_function_from_code(
-                    self.build_arglist(mobj.group('args')),
-                    body, local_vars, *global_stack))
+            body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
+            name = self._named_object(local_vars, self.extract_function_from_code(
+                [x.strip() for x in mobj.group('args').split(',')],
+                body, local_vars, *global_stack))
             code = code[:start] + name + remaining
         return self.build_function(argnames, code, local_vars, *global_stack)
 
@@ -854,7 +864,7 @@ class JSInterpreter(object):
                 zip_longest(argnames, args, fillvalue=None))
             global_stack[0].update(kwargs)
             var_stack = LocalNameSpace(*global_stack)
-            ret, should_abort = self.interpret_statement(code.replace('\n', ''), var_stack, allow_recursion - 1)
+            ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
             if should_abort:
                 return ret
         return resf

From 218c423bc042674a8834ffc09520a94fbbe7b138 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 1 Sep 2022 13:28:30 +0100
Subject: [PATCH 118/312] [cache] Add cache validation by program version,
 based on yt-dlp

---
 test/test_cache.py  | 16 ++++++++++++++--
 youtube_dl/cache.py | 28 +++++++++++++++++++++++-----
 2 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/test/test_cache.py b/test/test_cache.py
index a16160142..931074aa1 100644
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -3,17 +3,18 @@
 
 from __future__ import unicode_literals
 
-import shutil
-
 # Allow direct execution
 import os
 import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+import shutil
 
 from test.helper import FakeYDL
 from youtube_dl.cache import Cache
+from youtube_dl.utils import version_tuple
+from youtube_dl.version import __version__
 
 
 def _is_empty(d):
@@ -54,6 +55,17 @@ class TestCache(unittest.TestCase):
         self.assertFalse(os.path.exists(self.test_dir))
         self.assertEqual(c.load('test_cache', 'k.'), None)
 
+    def test_cache_validation(self):
+        ydl = FakeYDL({
+            'cachedir': self.test_dir,
+        })
+        c = Cache(ydl)
+        obj = {'x': 1, 'y': ['ä', '\\a', True]}
+        c.store('test_cache', 'k.', obj)
+        self.assertEqual(c.load('test_cache', 'k.', min_ver='1970.01.01'), obj)
+        new_version = '.'.join(('%d' % ((v + 1) if i == 0 else v, )) for i, v in enumerate(version_tuple(__version__)))
+        self.assertIs(c.load('test_cache', 'k.', min_ver=new_version), None)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py
index 7bdade1bd..4822439d0 100644
--- a/youtube_dl/cache.py
+++ b/youtube_dl/cache.py
@@ -10,12 +10,21 @@ import traceback
 
 from .compat import compat_getenv
 from .utils import (
+    error_to_compat_str,
     expand_path,
+    is_outdated_version,
+    try_get,
     write_json_file,
 )
+from .version import __version__
 
 
 class Cache(object):
+
+    _YTDL_DIR = 'youtube-dl'
+    _VERSION_KEY = _YTDL_DIR + '_version'
+    _DEFAULT_VERSION = '2021.12.17'
+
     def __init__(self, ydl):
         self._ydl = ydl
 
@@ -23,7 +32,7 @@ class Cache(object):
         res = self._ydl.params.get('cachedir')
         if res is None:
             cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
-            res = os.path.join(cache_root, 'youtube-dl')
+            res = os.path.join(cache_root, self._YTDL_DIR)
         return expand_path(res)
 
     def _get_cache_fn(self, section, key, dtype):
@@ -50,13 +59,22 @@ class Cache(object):
             except OSError as ose:
                 if ose.errno != errno.EEXIST:
                     raise
-            write_json_file(data, fn)
+            write_json_file({self._VERSION_KEY: __version__, 'data': data}, fn)
         except Exception:
             tb = traceback.format_exc()
             self._ydl.report_warning(
                 'Writing cache to %r failed: %s' % (fn, tb))
 
-    def load(self, section, key, dtype='json', default=None):
+    def _validate(self, data, min_ver):
+        version = try_get(data, lambda x: x[self._VERSION_KEY])
+        if not version:  # Backward compatibility
+            data, version = {'data': data}, self._DEFAULT_VERSION
+        if not is_outdated_version(version, min_ver or '0', assume_new=False):
+            return data['data']
+        self._ydl.to_screen(
+            'Discarding old cache from version {version} (needs {min_ver})'.format(**locals()))
+
+    def load(self, section, key, dtype='json', default=None, min_ver=None):
         assert dtype in ('json',)
 
         if not self.enabled:
@@ -66,12 +84,12 @@ class Cache(object):
         try:
             try:
                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
-                    return json.load(cachef)
+                    return self._validate(json.load(cachef), min_ver)
             except ValueError:
                 try:
                     file_size = os.path.getsize(cache_fn)
                 except (OSError, IOError) as oe:
-                    file_size = str(oe)
+                    file_size = error_to_compat_str(oe)
                 self._ydl.report_warning(
                     'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
         except IOError:

From 7009bb9f3182449ae8cc05cc28b768b63030a485 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Fri, 2 Sep 2022 20:41:39 +0530
Subject: [PATCH 119/312] [jsinterp] Workaround operator associativity issue *
 temporary fix for player 5a3b6271 [1]

1. https://github.com/yt-dlp/yt-dlp/issues/4635#issuecomment-1235384480
---
 test/test_youtube_signature.py | 4 ++++
 youtube_dl/jsinterp.py         | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index ec914a871..4e678cae0 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -131,6 +131,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
         '-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg',
     ),
+    (
+        'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
+        'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index d13329396..99dd98435 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -107,8 +107,8 @@ _OPERATORS = (
     ('+', _js_arith_op(operator.add)),
     ('-', _js_arith_op(operator.sub)),
     ('*', _js_arith_op(operator.mul)),
-    ('/', _js_div),
     ('%', _js_mod),
+    ('/', _js_div),
     ('**', _js_exp),
 )
 

From 9493ffdb8b690732e995422621bad3ed6c9041f5 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 4 Oct 2022 00:42:15 +0100
Subject: [PATCH 120/312] [test] Use windows-2019 for tests (At least for now)
 resolves #31249

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 90bd63c32..a609f3704 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,12 +15,12 @@ jobs:
         run-tests-ext: [sh]
         include:
         # python 3.2 is only available on windows via setup-python
-        - os: windows-latest
+        - os: windows-2019
           python-version: 3.2
           python-impl: cpython
           ytdl-test-set: core
           run-tests-ext: bat
-        - os: windows-latest
+        - os: windows-2019
           python-version: 3.2
           python-impl: cpython
           ytdl-test-set: download

From d35557a75d943865e40410d51bfcc18276e98532 Mon Sep 17 00:00:00 2001
From: coletdjnz <coletdjnz@protonmail.com>
Date: Fri, 23 Sep 2022 12:10:35 +1200
Subject: [PATCH 121/312] [Telegraaf] Use mobile GraphQL API endpoint

Workaround for Cloudflare 403
Fixes https://github.com/yt-dlp/yt-dlp/issues/5000
Authored by: coletdjnz
---
 youtube_dl/extractor/telegraaf.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/telegraaf.py b/youtube_dl/extractor/telegraaf.py
index 2dc020537..5174898f2 100644
--- a/youtube_dl/extractor/telegraaf.py
+++ b/youtube_dl/extractor/telegraaf.py
@@ -34,7 +34,9 @@ class TelegraafIE(InfoExtractor):
         article_id = self._match_id(url)
 
         video_id = self._download_json(
-            'https://www.telegraaf.nl/graphql', article_id, query={
+            'https://app.telegraaf.nl/graphql', article_id,
+            headers={'User-Agent': 'De Telegraaf/6.8.11 (Android 11; en_US)'},
+            query={
                 'query': '''{
   article(uid: %s) {
     videos {

From 22127b271c8f3e9266840bc5a2fb994d6248e369 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 10 Oct 2022 17:41:40 +0000
Subject: [PATCH 122/312] [NRK] Remove explicit Accept-Encoding header that
 invites Brotli

Fixes #31285
---
 youtube_dl/extractor/nrk.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index 6d01a25c3..5a62b50fc 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -60,8 +60,7 @@ class NRKBaseIE(InfoExtractor):
         return self._download_json(
             urljoin('https://psapi.nrk.no/', path),
             video_id, note or 'Downloading %s JSON' % item,
-            fatal=fatal, query=query,
-            headers={'Accept-Encoding': 'gzip, deflate, br'})
+            fatal=fatal, query=query)
 
 
 class NRKIE(NRKBaseIE):

From 1b1442887e67b63545453e10816904e2b4c561c1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 10 Oct 2022 19:26:32 +0100
Subject: [PATCH 123/312] [manyvids] Improve extraction (#31172)

* extract all formats from page
* extract description, uploader, views, likes
* downrate previews
* fix tests
* use txt_or_none()
---
 youtube_dl/extractor/manyvids.py | 113 +++++++++++++++++++++++++------
 1 file changed, 91 insertions(+), 22 deletions(-)

diff --git a/youtube_dl/extractor/manyvids.py b/youtube_dl/extractor/manyvids.py
index e8d7163e4..6805102ba 100644
--- a/youtube_dl/extractor/manyvids.py
+++ b/youtube_dl/extractor/manyvids.py
@@ -1,11 +1,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     determine_ext,
+    extract_attributes,
     int_or_none,
     str_to_int,
+    url_or_none,
     urlencode_postdata,
 )
 
@@ -20,17 +25,20 @@ class ManyVidsIE(InfoExtractor):
             'id': '133957',
             'ext': 'mp4',
             'title': 'everthing about me (Preview)',
+            'uploader': 'ellyxxix',
             'view_count': int,
             'like_count': int,
         },
     }, {
         # full video
         'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
-        'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
+        'md5': 'bb47bab0e0802c2a60c24ef079dfe60f',
         'info_dict': {
             'id': '935718',
             'ext': 'mp4',
             'title': 'MY FACE REVEAL',
+            'description': 'md5:ec5901d41808b3746fed90face161612',
+            'uploader': 'Sarah Calanthe',
             'view_count': int,
             'like_count': int,
         },
@@ -41,15 +49,43 @@ class ManyVidsIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        video_url = self._search_regex(
-            r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
-            webpage, 'video URL', group='url')
+        info = self._search_regex(
+            r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
+            webpage, 'meta details', default='')
+        info = extract_attributes(info)
 
-        title = self._html_search_regex(
-            (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
-             r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
-            webpage, 'title', default=None) or self._html_search_meta(
-            'twitter:title', webpage, 'title', fatal=True)
+        player = self._search_regex(
+            r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
+            webpage, 'player details', default='')
+        player = extract_attributes(player)
+
+        video_urls_and_ids = (
+            (info.get('data-meta-video'), 'video'),
+            (player.get('data-video-transcoded'), 'transcoded'),
+            (player.get('data-video-filepath'), 'filepath'),
+            (self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
+        )
+
+        def txt_or_none(s, default=None):
+            return (s.strip() or default) if isinstance(s, compat_str) else default
+
+        uploader = txt_or_none(info.get('data-meta-author'))
+
+        def mung_title(s):
+            if uploader:
+                s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s)
+            return txt_or_none(s)
+
+        title = (
+            mung_title(info.get('data-meta-title'))
+            or self._html_search_regex(
+                (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
+                 r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
+                webpage, 'title', default=None)
+            or self._html_search_meta(
+                'twitter:title', webpage, 'title', fatal=True))
+
+        title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
 
         if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
             title += ' (Preview)'
@@ -70,23 +106,56 @@ class ManyVidsIE(InfoExtractor):
                     'X-Requested-With': 'XMLHttpRequest'
                 })
 
-        if determine_ext(video_url) == 'm3u8':
-            formats = self._extract_m3u8_formats(
-                video_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                m3u8_id='hls')
-        else:
-            formats = [{'url': video_url}]
+        formats = []
+        for v_url, fmt in video_urls_and_ids:
+            v_url = url_or_none(v_url)
+            if not v_url:
+                continue
+            if determine_ext(v_url) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    v_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls'))
+            else:
+                formats.append({
+                    'url': v_url,
+                    'format_id': fmt,
+                })
 
-        like_count = int_or_none(self._search_regex(
-            r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
-        view_count = str_to_int(self._html_search_regex(
-            r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
-            'view count', default=None))
+        self._remove_duplicate_formats(formats)
+
+        for f in formats:
+            if f.get('height') is None:
+                f['height'] = int_or_none(
+                    self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
+            if '/preview/' in f['url']:
+                f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
+                f['preference'] = -10
+            if 'transcoded' in f['format_id']:
+                f['preference'] = f.get('preference', -1) - 1
+
+        self._sort_formats(formats)
+
+        def get_likes():
+            likes = self._search_regex(
+                r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ),
+                webpage, 'likes', default='')
+            likes = extract_attributes(likes)
+            return int_or_none(likes.get('data-likes'))
+
+        def get_views():
+            return str_to_int(self._html_search_regex(
+                r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
+                webpage, 'view count', default=None))
 
         return {
             'id': video_id,
             'title': title,
-            'view_count': view_count,
-            'like_count': like_count,
             'formats': formats,
+            'description': txt_or_none(info.get('data-meta-description')),
+            'uploader': txt_or_none(info.get('data-meta-author')),
+            'thumbnail': (
+                url_or_none(info.get('data-meta-image'))
+                or url_or_none(player.get('data-video-screenshot'))),
+            'view_count': get_views(),
+            'like_count': get_likes(),
         }

From 82e4eca711a128138ed0b84ddb4321e403d56340 Mon Sep 17 00:00:00 2001
From: Xiyue <113869642+xiyue077@users.noreply.github.com>
Date: Tue, 11 Oct 2022 09:52:48 +1100
Subject: [PATCH 124/312] [motherless] Fixed the broken uploader_id in the
 extractor (#31243)

* Fixed the broken uploader_id in the extractor.
* Make uploader_id RE looser
* Fix uploader_id in test Motherless_3
* Fix group pagination
* # coding: utf-8

Co-authored-by: Andy Xuming <xuminic@gmail.com>
Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/motherless.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py
index ef1e081f2..35d2b46ed 100644
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import datetime
@@ -71,7 +72,7 @@ class MotherlessIE(InfoExtractor):
             'title': 'a/ Hot Teens',
             'categories': list,
             'upload_date': '20210104',
-            'uploader_id': 'yonbiw',
+            'uploader_id': 'anonymous',
             'thumbnail': r're:https?://.*\.jpg',
             'age_limit': 18,
         },
@@ -127,7 +128,7 @@ class MotherlessIE(InfoExtractor):
 
         comment_count = webpage.count('class="media-comment-contents"')
         uploader_id = self._html_search_regex(
-            r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
+            r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)''',
             webpage, 'uploader_id')
 
         categories = self._html_search_meta('keywords', webpage, default=None)
@@ -169,7 +170,7 @@ class MotherlessGroupIE(InfoExtractor):
             'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
                            'any kind!'
         },
-        'playlist_mincount': 9,
+        'playlist_mincount': 0,
     }]
 
     @classmethod
@@ -208,9 +209,9 @@ class MotherlessGroupIE(InfoExtractor):
             r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
         description = self._html_search_meta(
             'description', webpage, fatal=False)
-        page_count = self._int(self._search_regex(
-            r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
-            webpage, 'page_count'), 'page_count')
+        page_count = str_to_int(self._search_regex(
+            r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
+            webpage, 'page_count', default='1'))
         PAGE_SIZE = 80
 
         def _get_page(idx):

From 2ced5a79128f53faad94dc494d05925eb957c414 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 9 Aug 2022 19:34:34 +0100
Subject: [PATCH 125/312] [test] Implement string "lambda x: condition(x)" as
 an expected value

Semantics equivalent to `assert condition(got)`
---
 test/helper.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/test/helper.py b/test/helper.py
index e62aab11e..c6a2f0667 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -128,6 +128,12 @@ def expect_value(self, got, expected, field):
         self.assertTrue(
             contains_str in got,
             'field %s (value: %r) should contain %r' % (field, got, contains_str))
+    elif isinstance(expected, compat_str) and re.match(r'^lambda \w+:', expected):
+        fn = eval(expected)
+        suite = expected.split(':', 1)[1].strip()
+        self.assertTrue(
+            fn(got),
+            'Expected field %s to meet condition %s, but value %r failed ' % (field, suite, got))
     elif isinstance(expected, type):
         self.assertTrue(
             isinstance(got, expected),
@@ -137,7 +143,7 @@ def expect_value(self, got, expected, field):
     elif isinstance(expected, list) and isinstance(got, list):
         self.assertEqual(
             len(expected), len(got),
-            'Expect a list of length %d, but got a list of length %d for field %s' % (
+            'Expected a list of length %d, but got a list of length %d for field %s' % (
                 len(expected), len(got), field))
         for index, (item_got, item_expected) in enumerate(zip(got, expected)):
             type_got = type(item_got)

From c282e5f8d723763ba88c521221e4535f46453949 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 9 Aug 2022 19:37:58 +0100
Subject: [PATCH 126/312] [ZDF] Overhaul ZDF extractors * pull some yt-dlp
 changes into ZDFBaseIE._extract_format() * add test cases from yt-dlp to
 ZDFIE * fix crash in ZDFIE._extract_mobile() when object had no `formitaeten`
 * improve title extraction in ZDFChannelIE (remove trailing station ident) *
 avoid extracting non-video playlist items (fixes #31149)

---
 youtube_dl/extractor/zdf.py | 169 +++++++++++++++++++++---------------
 1 file changed, 97 insertions(+), 72 deletions(-)

diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py
index 3d39bb33a..fcc63ef52 100644
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -8,13 +8,14 @@ from ..compat import compat_str
 from ..utils import (
     determine_ext,
     ExtractorError,
+    extract_attributes,
     float_or_none,
     int_or_none,
     merge_dicts,
     NO_DEFAULT,
-    orderedSet,
     parse_codecs,
     qualities,
+    str_or_none,
     try_get,
     unified_timestamp,
     update_url_query,
@@ -57,28 +58,39 @@ class ZDFBaseIE(InfoExtractor):
         format_urls.add(format_url)
         mime_type = meta.get('mimeType')
         ext = determine_ext(format_url)
+
+        join_nonempty = lambda s, l: s.join(filter(None, l))
+        meta_map = lambda t: map(lambda x: str_or_none(meta.get(x)), t)
+
         if mime_type == 'application/x-mpegURL' or ext == 'm3u8':
-            formats.extend(self._extract_m3u8_formats(
+            new_formats = self._extract_m3u8_formats(
                 format_url, video_id, 'mp4', m3u8_id='hls',
-                entry_protocol='m3u8_native', fatal=False))
+                entry_protocol='m3u8_native', fatal=False)
         elif mime_type == 'application/f4m+xml' or ext == 'f4m':
-            formats.extend(self._extract_f4m_formats(
-                update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False))
+            new_formats = self._extract_f4m_formats(
+                update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False)
         else:
             f = parse_codecs(meta.get('mimeCodec'))
+            if not f:
+                data = meta.get('type', '').split('_')
+                if try_get(data, lambda x: x[2]) == ext:
+                    f = dict(zip(('vcodec', 'acodec'), data[1]))
+
             format_id = ['http']
-            for p in (meta.get('type'), meta.get('quality')):
-                if p and isinstance(p, compat_str):
-                    format_id.append(p)
+            format_id.extend(join_nonempty('-', meta_map(('type', 'quality'))))
             f.update({
                 'url': format_url,
                 'format_id': '-'.join(format_id),
-                'format_note': meta.get('quality'),
-                'language': meta.get('language'),
-                'quality': qualities(self._QUALITIES)(meta.get('quality')),
-                'preference': -10,
+                'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None))
             })
-            formats.append(f)
+            new_formats = [f]
+
+        formats.extend(merge_dicts(f, {
+            'format_note': join_nonempty(',', meta_map(('quality', 'class'))),
+            'language': meta.get('language'),
+            'language_preference': 10 if meta.get('class') == 'main' else -10 if meta.get('class') == 'ad' else -1,
+            'quality': qualities(self._QUALITIES)(meta.get('quality')),
+        }) for f in new_formats)
 
     def _extract_ptmd(self, ptmd_url, video_id, api_token, referrer):
         ptmd = self._call_api(
@@ -107,6 +119,7 @@ class ZDFBaseIE(InfoExtractor):
                                 'type': f.get('type'),
                                 'mimeType': f.get('mimeType'),
                                 'quality': quality.get('quality'),
+                                'class': track.get('class'),
                                 'language': track.get('language'),
                             })
         self._sort_formats(formats)
@@ -171,6 +184,20 @@ class ZDFIE(ZDFBaseIE):
             'duration': 2615,
             'timestamp': 1465021200,
             'upload_date': '20160604',
+            'thumbnail': 'https://www.zdf.de/assets/mauve-im-labor-100~768x432?cb=1464909117806',
+        },
+    }, {
+        'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
+        'md5': '1b93bdec7d02fc0b703c5e7687461628',
+        'info_dict': {
+            'ext': 'mp4',
+            'id': 'video_funk_1770473',
+            'duration': 1278,
+            'description': 'Die Neue an der Schule verdreht Ismail den Kopf.',
+            'title': 'Alles ist verzaubert',
+            'timestamp': 1635520560,
+            'upload_date': '20211029',
+            'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-100~1920x1080?cb=1636466431799',
         },
     }, {
         # Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche
@@ -204,6 +231,19 @@ class ZDFIE(ZDFBaseIE):
             'timestamp': 1641355200,
             'upload_date': '20220105',
         },
+        'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"'
+    }, {
+        'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html',
+        'info_dict': {
+            'id': '191205_1800_sendung_sok8',
+            'ext': 'mp4',
+            'title': 'Das Geld anderer Leute',
+            'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d',
+            'duration': 2581.0,
+            'timestamp': 1654790700,
+            'upload_date': '20220609',
+            'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350',
+        },
     }]
 
     def _extract_entry(self, url, player, content, video_id):
@@ -265,15 +305,16 @@ class ZDFIE(ZDFBaseIE):
             'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
             video_id)
 
-        document = video['document']
-
-        title = document['titel']
-        content_id = document['basename']
-
         formats = []
-        format_urls = set()
-        for f in document['formitaeten']:
-            self._extract_format(content_id, formats, format_urls, f)
+        formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
+        document = formitaeten and video['document']
+        if formitaeten:
+            title = document['titel']
+            content_id = document['basename']
+
+            format_urls = set()
+            for f in formitaeten or []:
+                self._extract_format(content_id, formats, format_urls, f)
         self._sort_formats(formats)
 
         thumbnails = []
@@ -320,9 +361,9 @@ class ZDFChannelIE(ZDFBaseIE):
         'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
         'info_dict': {
             'id': 'das-aktuelle-sportstudio',
-            'title': 'das aktuelle sportstudio | ZDF',
+            'title': 'das aktuelle sportstudio',
         },
-        'playlist_mincount': 23,
+        'playlist_mincount': 18,
     }, {
         'url': 'https://www.zdf.de/dokumentation/planet-e',
         'info_dict': {
@@ -330,6 +371,14 @@ class ZDFChannelIE(ZDFBaseIE):
             'title': 'planet e.',
         },
         'playlist_mincount': 50,
+    }, {
+        'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
+        'info_dict': {
+            'id': 'aktenzeichen-xy-ungeloest',
+            'title': 'Aktenzeichen XY... ungelöst',
+            'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
+        },
+        'playlist_mincount': 2,
     }, {
         'url': 'https://www.zdf.de/filme/taunuskrimi/',
         'only_matching': True,
@@ -339,60 +388,36 @@ class ZDFChannelIE(ZDFBaseIE):
     def suitable(cls, url):
         return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
 
+    def _og_search_title(self, webpage, fatal=False):
+        title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal)
+        return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
+
     def _real_extract(self, url):
         channel_id = self._match_id(url)
 
         webpage = self._download_webpage(url, channel_id)
 
-        entries = [
-            self.url_result(item_url, ie=ZDFIE.ie_key())
-            for item_url in orderedSet(re.findall(
-                r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
+        matches = re.finditer(
+            r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL,
+            webpage)
 
-        return self.playlist_result(
-            entries, channel_id, self._og_search_title(webpage, fatal=False))
+        if self._downloader.params.get('noplaylist', False):
+            entry = next(
+                (self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches),
+                None)
+            self.to_screen('Downloading just the main video because of --no-playlist')
+            if entry:
+                return entry
+        else:
+            self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, ))
 
-        r"""
-        player = self._extract_player(webpage, channel_id)
+        def check_video(m):
+            v_ref = self._search_regex(
+                r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ),
+                webpage, 'check id', default='')
+            v_ref = extract_attributes(v_ref)
+            return v_ref.get('data-target-video-type') != 'novideo'
 
-        channel_id = self._search_regex(
-            r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage,
-            'channel id', group='id')
-
-        channel = self._call_api(
-            'https://api.zdf.de/content/documents/%s.json' % channel_id,
-            player, url, channel_id)
-
-        items = []
-        for module in channel['module']:
-            for teaser in try_get(module, lambda x: x['teaser'], list) or []:
-                t = try_get(
-                    teaser, lambda x: x['http://zdf.de/rels/target'], dict)
-                if not t:
-                    continue
-                items.extend(try_get(
-                    t,
-                    lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
-                    list) or [])
-            items.extend(try_get(
-                module,
-                lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
-                list) or [])
-
-        entries = []
-        entry_urls = set()
-        for item in items:
-            t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
-            if not t:
-                continue
-            sharing_url = t.get('http://zdf.de/rels/sharing-url')
-            if not sharing_url or not isinstance(sharing_url, compat_str):
-                continue
-            if sharing_url in entry_urls:
-                continue
-            entry_urls.add(sharing_url)
-            entries.append(self.url_result(
-                sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
-
-        return self.playlist_result(entries, channel_id, channel.get('title'))
-        """
+        return self.playlist_from_matches(
+            (m.group('url') for m in matches if check_video(m)),
+            channel_id, self._og_search_title(webpage, fatal=False))

From 6e2626f092c63a5fa22a31df409610b5deaf3968 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 11 Oct 2022 05:58:10 +0100
Subject: [PATCH 127/312] [JSInterp] Improve separation logic

Based on https://github.com/yt-dlp/yt-dlp/commit/0468a3b3253957bfbeb98b4a7c71542ff80e9e06
---
 youtube_dl/jsinterp.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 99dd98435..530a705b4 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -214,7 +214,7 @@ class JSInterpreter(object):
         def __init__(self, msg, *args, **kwargs):
             expr = kwargs.pop('expr', None)
             if expr is not None:
-                msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
+                msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
             super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
 
     @classmethod
@@ -268,7 +268,7 @@ class JSInterpreter(object):
                 elif in_quote == '/' and char in '[]':
                     in_regex_char_group = char == '['
             escaping = not escaping and in_quote and char == '\\'
-            after_op = not in_quote and (char in cls.OP_CHARS or (char.isspace() and after_op))
+            after_op = not in_quote and (char in cls.OP_CHARS or char == '[' or (char.isspace() and after_op))
 
             if char != delim[pos] or any(counters.values()) or in_quote:
                 pos = skipping = 0
@@ -301,7 +301,7 @@ class JSInterpreter(object):
         separated = list(cls._separate(expr, delim, 1))
 
         if len(separated) < 2:
-            raise cls.Exception('No terminating paren {delim} in {expr}'.format(**locals()))
+            raise cls.Exception('No terminating paren {delim} in {expr:.100}'.format(**locals()))
         return separated[0][1:].strip(), separated[1].strip()
 
     @staticmethod

From c94a459a248352fd97dccc79ed6604a558459bfd Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 11 Oct 2022 12:18:12 +0000
Subject: [PATCH 128/312] [utils] Sanitize look-alike Unicode glyphs in non-ID
 filename fields when --restrict-filenames

Implements https://github.com/ytdl-org/youtube-dl/issues/31216#issuecomment-1236102822, which has a test.
---
 youtube_dl/utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index fea38ed32..23a65a81c 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -33,6 +33,7 @@ import sys
 import tempfile
 import time
 import traceback
+import unicodedata
 import xml.etree.ElementTree
 import zlib
 
@@ -2118,6 +2119,9 @@ def sanitize_filename(s, restricted=False, is_id=False):
             return '_'
         return char
 
+    # Replace look-alike Unicode glyphs
+    if restricted and not is_id:
+        s = unicodedata.normalize('NFKC', s)
     # Handle timestamps
     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
     result = ''.join(map(replace_insane, s))

From 11b284c81fe2988813c817918536fc3a5630870a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 11 Oct 2022 12:36:44 +0000
Subject: [PATCH 129/312] [Common:JWPlayer] Fix x1000 scaling error

See https://github.com/yt-dlp/yt-dlp/issues/5106#issuecomment-1264625161
---
 youtube_dl/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 797c35fd5..1f33a1e06 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2844,7 +2844,7 @@ class InfoExtractor(object):
                     'url': source_url,
                     'width': int_or_none(source.get('width')),
                     'height': height,
-                    'tbr': int_or_none(source.get('bitrate')),
+                    'tbr': int_or_none(source.get('bitrate'), scale=1000),
                     'ext': ext,
                 }
                 if source_url.startswith('rtmp'):

From c91cbf60729af93c4677864aa6c8b74b576146ca Mon Sep 17 00:00:00 2001
From: Xie Yanbo <xieyanbo@gmail.com>
Date: Tue, 11 Oct 2022 20:55:09 +0800
Subject: [PATCH 130/312] [netease] Get netease music download url through
 player api (#31235)

* remove unplayable song from test
* compatible with python 2
* using standard User_Agent, fix imports
* use hash instead of long description
* fix lint
* fix hash
---
 test/test_aes.py                     |   9 +-
 youtube_dl/aes.py                    |  37 +++++++-
 youtube_dl/extractor/neteasemusic.py | 121 +++++++++++++++++++--------
 3 files changed, 128 insertions(+), 39 deletions(-)

diff --git a/test/test_aes.py b/test/test_aes.py
index cc89fb6ab..0f181466b 100644
--- a/test/test_aes.py
+++ b/test/test_aes.py
@@ -8,7 +8,7 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text
+from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text, aes_ecb_encrypt
 from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
 import base64
 
@@ -58,6 +58,13 @@ class TestAES(unittest.TestCase):
         decrypted = (aes_decrypt_text(encrypted, password, 32))
         self.assertEqual(decrypted, self.secret_msg)
 
+    def test_ecb_encrypt(self):
+        data = bytes_to_intlist(self.secret_msg)
+        encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key))
+        self.assertEqual(
+            encrypted,
+            b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py
index d0de2d93f..a94a41079 100644
--- a/youtube_dl/aes.py
+++ b/youtube_dl/aes.py
@@ -8,6 +8,18 @@ from .utils import bytes_to_intlist, intlist_to_bytes
 BLOCK_SIZE_BYTES = 16
 
 
+def pkcs7_padding(data):
+    """
+    PKCS#7 padding
+
+    @param {int[]} data        cleartext
+    @returns {int[]}           padding data
+    """
+
+    remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES
+    return data + [remaining_length] * remaining_length
+
+
 def aes_ctr_decrypt(data, key, counter):
     """
     Decrypt with aes in counter mode
@@ -76,8 +88,7 @@ def aes_cbc_encrypt(data, key, iv):
     previous_cipher_block = iv
     for i in range(block_count):
         block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
-        remaining_length = BLOCK_SIZE_BYTES - len(block)
-        block += [remaining_length] * remaining_length
+        block = pkcs7_padding(block)
         mixed_block = xor(block, previous_cipher_block)
 
         encrypted_block = aes_encrypt(mixed_block, expanded_key)
@@ -88,6 +99,28 @@ def aes_cbc_encrypt(data, key, iv):
     return encrypted_data
 
 
+def aes_ecb_encrypt(data, key):
+    """
+    Encrypt with aes in ECB mode. Using PKCS#7 padding
+
+    @param {int[]} data        cleartext
+    @param {int[]} key         16/24/32-Byte cipher key
+    @returns {int[]}           encrypted data
+    """
+    expanded_key = key_expansion(key)
+    block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+    encrypted_data = []
+    for i in range(block_count):
+        block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+        block = pkcs7_padding(block)
+
+        encrypted_block = aes_encrypt(block, expanded_key)
+        encrypted_data += encrypted_block
+
+    return encrypted_data
+
+
 def key_expansion(data):
     """
     Generate key schedule
diff --git a/youtube_dl/extractor/neteasemusic.py b/youtube_dl/extractor/neteasemusic.py
index 978a05841..fad22a2cd 100644
--- a/youtube_dl/extractor/neteasemusic.py
+++ b/youtube_dl/extractor/neteasemusic.py
@@ -1,20 +1,31 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from hashlib import md5
 from base64 import b64encode
+from binascii import hexlify
 from datetime import datetime
+from hashlib import md5
+from random import randint
+import json
 import re
+import time
 
 from .common import InfoExtractor
+from ..aes import aes_ecb_encrypt, pkcs7_padding
 from ..compat import (
     compat_urllib_parse_urlencode,
     compat_str,
     compat_itertools_count,
 )
 from ..utils import (
-    sanitized_Request,
+    ExtractorError,
+    bytes_to_intlist,
     float_or_none,
+    int_or_none,
+    intlist_to_bytes,
+    sanitized_Request,
+    std_headers,
+    try_get,
 )
 
 
@@ -35,32 +46,85 @@ class NetEaseMusicBaseIE(InfoExtractor):
         result = b64encode(m.digest()).decode('ascii')
         return result.replace('/', '_').replace('+', '-')
 
+    @classmethod
+    def make_player_api_request_data_and_headers(cls, song_id, bitrate):
+        KEY = b'e82ckenh8dichen8'
+        URL = '/api/song/enhance/player/url'
+        now = int(time.time() * 1000)
+        rand = randint(0, 1000)
+        cookie = {
+            'osver': None,
+            'deviceId': None,
+            'appver': '8.0.0',
+            'versioncode': '140',
+            'mobilename': None,
+            'buildver': '1623435496',
+            'resolution': '1920x1080',
+            '__csrf': '',
+            'os': 'pc',
+            'channel': None,
+            'requestId': '{0}_{1:04}'.format(now, rand),
+        }
+        request_text = json.dumps(
+            {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
+            separators=(',', ':'))
+        message = 'nobody{0}use{1}md5forencrypt'.format(
+            URL, request_text).encode('latin1')
+        msg_digest = md5(message).hexdigest()
+
+        data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
+            URL, request_text, msg_digest)
+        data = pkcs7_padding(bytes_to_intlist(data))
+        encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
+        encrypted_params = hexlify(encrypted).decode('ascii').upper()
+
+        cookie = '; '.join(
+            ['{0}={1}'.format(k, v if v is not None else 'undefined')
+             for [k, v] in cookie.items()])
+
+        headers = {
+            'User-Agent': std_headers['User-Agent'],
+            'Content-Type': 'application/x-www-form-urlencoded',
+            'Referer': 'https://music.163.com',
+            'Cookie': cookie,
+        }
+        return ('params={0}'.format(encrypted_params), headers)
+
+    def _call_player_api(self, song_id, bitrate):
+        url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
+        data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
+        try:
+            return self._download_json(
+                url, song_id, data=data.encode('ascii'), headers=headers)
+        except ExtractorError as e:
+            if type(e.cause) in (ValueError, TypeError):
+                # JSON load failure
+                raise
+        except Exception:
+            pass
+        return {}
+
     def extract_formats(self, info):
         formats = []
+        song_id = info['id']
         for song_format in self._FORMATS:
             details = info.get(song_format)
             if not details:
                 continue
-            song_file_path = '/%s/%s.%s' % (
-                self._encrypt(details['dfsId']), details['dfsId'], details['extension'])
 
-            # 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature
-            # from NetEase's CDN provider that can be used if m5.music.126.net does not
-            # work, especially for users outside of Mainland China
-            # via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880
-            for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net',
-                         'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'):
-                song_url = host + song_file_path
+            bitrate = int_or_none(details.get('bitrate')) or 999000
+            data = self._call_player_api(song_id, bitrate)
+            for song in try_get(data, lambda x: x['data'], list) or []:
+                song_url = try_get(song, lambda x: x['url'])
                 if self._is_valid_url(song_url, info['id'], 'song'):
                     formats.append({
                         'url': song_url,
                         'ext': details.get('extension'),
-                        'abr': float_or_none(details.get('bitrate'), scale=1000),
+                        'abr': float_or_none(song.get('br'), scale=1000),
                         'format_id': song_format,
-                        'filesize': details.get('size'),
-                        'asr': details.get('sr')
+                        'filesize': int_or_none(song.get('size')),
+                        'asr': int_or_none(details.get('sr')),
                     })
-                    break
         return formats
 
     @classmethod
@@ -79,30 +143,16 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
     _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
     _TESTS = [{
         'url': 'http://music.163.com/#/song?id=32102397',
-        'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
+        'md5': '3e909614ce09b1ccef4a3eb205441190',
         'info_dict': {
             'id': '32102397',
             'ext': 'mp3',
-            'title': 'Bad Blood (feat. Kendrick Lamar)',
+            'title': 'Bad Blood',
             'creator': 'Taylor Swift / Kendrick Lamar',
-            'upload_date': '20150517',
-            'timestamp': 1431878400,
-            'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
+            'upload_date': '20150516',
+            'timestamp': 1431792000,
+            'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
         },
-        'skip': 'Blocked outside Mainland China',
-    }, {
-        'note': 'No lyrics translation.',
-        'url': 'http://music.163.com/#/song?id=29822014',
-        'info_dict': {
-            'id': '29822014',
-            'ext': 'mp3',
-            'title': '听见下雨的声音',
-            'creator': '周杰伦',
-            'upload_date': '20141225',
-            'timestamp': 1419523200,
-            'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
-        },
-        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'No lyrics.',
         'url': 'http://music.163.com/song?id=17241424',
@@ -112,9 +162,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'title': 'Opus 28',
             'creator': 'Dustin O\'Halloran',
             'upload_date': '20080211',
+            'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
             'timestamp': 1202745600,
         },
-        'skip': 'Blocked outside Mainland China',
     }, {
         'note': 'Has translated name.',
         'url': 'http://music.163.com/#/song?id=22735043',
@@ -128,7 +178,6 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'timestamp': 1264608000,
             'alt_title': '说出愿望吧(Genie)',
         },
-        'skip': 'Blocked outside Mainland China',
     }]
 
     def _process_lyrics(self, lyrics_info):

From 7bbd5b13d4c6cfc3e24f56413ff1a1eace8472b8 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 12 Oct 2022 01:09:55 +0100
Subject: [PATCH 131/312] [Motherless] Pull from yt-dlp, etc

* use username field
* loosen regexes
* warn on page count 0 in group
* avoid reloading group page 1
Closes #29626
---
 youtube_dl/extractor/motherless.py | 33 +++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py
index 35d2b46ed..d352cb180 100644
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@@ -126,9 +126,10 @@ class MotherlessIE(InfoExtractor):
                 kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
                 upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
 
-        comment_count = webpage.count('class="media-comment-contents"')
+        comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
         uploader_id = self._html_search_regex(
-            r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)''',
+            (r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
+             r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
             webpage, 'uploader_id')
 
         categories = self._html_search_meta('keywords', webpage, default=None)
@@ -171,6 +172,17 @@ class MotherlessGroupIE(InfoExtractor):
                            'any kind!'
         },
         'playlist_mincount': 0,
+        'expected_warnings': [
+            'This group has no videos.',
+        ]
+    }, {
+        'url': 'https://motherless.com/g/beautiful_cock',
+        'info_dict': {
+            'id': 'beautiful_cock',
+            'title': 'Beautiful Cock',
+            'description': 'Group for lovely cocks yours, mine, a friends anything human',
+        },
+        'playlist_mincount': 2500,
     }]
 
     @classmethod
@@ -211,14 +223,21 @@ class MotherlessGroupIE(InfoExtractor):
             'description', webpage, fatal=False)
         page_count = str_to_int(self._search_regex(
             r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
-            webpage, 'page_count', default='1'))
+            webpage, 'page_count', default=0))
+        if not page_count:
+            message = self._search_regex(
+                r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''',
+                webpage, 'error_msg', default=None) or 'This group has no videos.'
+            self.report_warning(message, group_id)
+            page_count = 1
         PAGE_SIZE = 80
 
         def _get_page(idx):
-            webpage = self._download_webpage(
-                page_url, group_id, query={'page': idx + 1},
-                note='Downloading page %d/%d' % (idx + 1, page_count)
-            )
+            if idx > 0:
+                webpage = self._download_webpage(
+                    page_url, group_id, query={'page': idx + 1},
+                    note='Downloading page %d/%d' % (idx + 1, page_count)
+                )
             for entry in self._extract_entries(webpage, url):
                 yield entry
 

From 7135277fec497bd7649c31087aba52daa7897484 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 13 Oct 2022 01:59:01 +0000
Subject: [PATCH 132/312] [ManyVids] Support new single-page app structure

See https://github.com/yt-dlp/yt-dlp/issues/5210#issuecomment-1276919962.
---
 youtube_dl/extractor/manyvids.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/manyvids.py b/youtube_dl/extractor/manyvids.py
index 6805102ba..608a02a8d 100644
--- a/youtube_dl/extractor/manyvids.py
+++ b/youtube_dl/extractor/manyvids.py
@@ -47,7 +47,12 @@ class ManyVidsIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, video_id)
+        real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, )
+        try:
+            webpage = self._download_webpage(real_url, video_id)
+        except:
+            # probably useless fallback
+            webpage = self._download_webpage(url, video_id)
 
         info = self._search_regex(
             r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
@@ -98,7 +103,8 @@ class ManyVidsIE(InfoExtractor):
             # Sets some cookies
             self._download_webpage(
                 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
-                video_id, fatal=False, data=urlencode_postdata({
+                video_id, note='Setting format cookies', fatal=False,
+                data=urlencode_postdata({
                     'mvtoken': mv_token,
                     'vid': video_id,
                 }), headers={

From ee8560d01eec511587f8207c3d84219ec620a9a6 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 13 Oct 2022 02:42:49 +0000
Subject: [PATCH 133/312] [ManyVids] Support new single-page app structure

---
 youtube_dl/extractor/manyvids.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/manyvids.py b/youtube_dl/extractor/manyvids.py
index 608a02a8d..75978cfd6 100644
--- a/youtube_dl/extractor/manyvids.py
+++ b/youtube_dl/extractor/manyvids.py
@@ -50,7 +50,7 @@ class ManyVidsIE(InfoExtractor):
         real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, )
         try:
             webpage = self._download_webpage(real_url, video_id)
-        except:
+        except Exception:
             # probably useless fallback
             webpage = self._download_webpage(url, video_id)
 

From 447edc48e63f5f21797ea0d9ee84e37ed1547035 Mon Sep 17 00:00:00 2001
From: ache <ache@ache.one>
Date: Tue, 18 Oct 2022 15:06:27 +0000
Subject: [PATCH 134/312] Fix ADN extractor (#31275)

* Rename Anime Digital Network to Animation Digital Network, animationdigitalnetwork.fr
* Update the test to an available video
* Update the decoding key of subtitles
* Keep the support of old URLs
* Add a test to match the old URL
* Reduce redundancy of the URL name
* Fix md5 ^^"
* Fix undefined _BASE
* Process HTTP error text (eg geo-block) correctly and uniformly in Py3, Py2
* Skip test for CI since geo-blocked

Signed-off-by: ache <ache@ache.one>
Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/adn.py | 57 +++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 25 deletions(-)

diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py
index a55ebbcbd..5ff419f19 100644
--- a/youtube_dl/extractor/adn.py
+++ b/youtube_dl/extractor/adn.py
@@ -31,30 +31,34 @@ from ..utils import (
 
 
 class ADNIE(InfoExtractor):
-    IE_DESC = 'Anime Digital Network'
-    _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
-    _TEST = {
-        'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
-        'md5': '0319c99885ff5547565cacb4f3f9348d',
+    IE_DESC = 'Animation Digital Network'
+    _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
+        'md5': '1c9ef066ceb302c86f80c2b371615261',
         'info_dict': {
-            'id': '7778',
+            'id': '9841',
             'ext': 'mp4',
-            'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
-            'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
-            'series': 'Blue Exorcist - Kyôto Saga',
-            'duration': 1467,
-            'release_date': '20170106',
+            'title': 'Fruits Basket - Episode 1',
+            'description': 'md5:14be2f72c3c96809b0ca424b0097d336',
+            'series': 'Fruits Basket',
+            'duration': 1437,
+            'release_date': '20190405',
             'comment_count': int,
             'average_rating': float,
-            'season_number': 2,
-            'episode': 'Début des hostilités',
+            'season_number': 1,
+            'episode': 'À ce soir !',
             'episode_number': 1,
-        }
-    }
+        },
+        'skip': 'Only available in region (FR, ...)',
+    }, {
+        'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
+        'only_matching': True,
+    }]
 
-    _NETRC_MACHINE = 'animedigitalnetwork'
-    _BASE_URL = 'http://animedigitalnetwork.fr'
-    _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
+    _NETRC_MACHINE = 'animationdigitalnetwork'
+    _BASE = 'animationdigitalnetwork.fr'
+    _API_BASE_URL = 'https://gw.api.' + _BASE + '/'
     _PLAYER_BASE_URL = _API_BASE_URL + 'player/'
     _HEADERS = {}
     _LOGIN_ERR_MESSAGE = 'Unable to log in'
@@ -82,14 +86,14 @@ class ADNIE(InfoExtractor):
         if subtitle_location:
             enc_subtitles = self._download_webpage(
                 subtitle_location, video_id, 'Downloading subtitles data',
-                fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
+                fatal=False, headers={'Origin': 'https://' + self._BASE})
         if not enc_subtitles:
             return None
 
-        # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
+        # http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
         dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
             bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
-            bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
+            bytes_to_intlist(binascii.unhexlify(self._K + '7fac1178830cfe0c')),
             bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
         ))
         subtitles_json = self._parse_json(
@@ -138,9 +142,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
         if not username:
             return
         try:
+            url = self._API_BASE_URL + 'authentication/login'
             access_token = (self._download_json(
-                self._API_BASE_URL + 'authentication/login', None,
-                'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
+                url, None, 'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
                 data=urlencode_postdata({
                     'password': password,
                     'rememberMe': False,
@@ -153,7 +157,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
             message = None
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
                 resp = self._parse_json(
-                    e.cause.read().decode(), None, fatal=False) or {}
+                    self._webpage_read_content(e.cause, url, username),
+                    username, fatal=False) or {}
                 message = resp.get('message') or resp.get('code')
             self.report_warning(message or self._LOGIN_ERR_MESSAGE)
 
@@ -211,7 +216,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
                     # This usually goes away with a different random pkcs1pad, so retry
                     continue
 
-                error = self._parse_json(e.cause.read(), video_id)
+                error = self._parse_json(
+                    self._webpage_read_content(e.cause, links_url, video_id),
+                    video_id, fatal=False) or {}
                 message = error.get('message')
                 if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
                     self.raise_geo_restricted(msg=message)

From 0faa45d6c08f518b73d20e341944ea7292f9f2b2 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Oct 2022 11:06:44 +0000
Subject: [PATCH 135/312] [BongaCams] Support new .net domain

Resolves #31262.
---
 youtube_dl/extractor/bongacams.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/bongacams.py b/youtube_dl/extractor/bongacams.py
index 180542fbc..016999d55 100644
--- a/youtube_dl/extractor/bongacams.py
+++ b/youtube_dl/extractor/bongacams.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -12,13 +13,28 @@ from ..utils import (
 
 
 class BongaCamsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
+    _VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.(?:com|net))/(?P<id>[^/?&#]+)'
     _TESTS = [{
         'url': 'https://de.bongacams.com/azumi-8',
         'only_matching': True,
     }, {
         'url': 'https://cn.bongacams.com/azumi-8',
         'only_matching': True,
+    }, {
+        'url': 'https://de.bongacams.net/claireashton',
+        'info_dict': {
+            'id': 'claireashton',
+            'ext': 'mp4',
+            'title': r're:ClaireAshton \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+            'age_limit': 18,
+            'uploader_id': 'ClaireAshton',
+            'uploader': 'ClaireAshton',
+            'like_count': int,
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):

From 502cefa41f1d24057b6158748b2072dc911af682 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 27 Oct 2022 14:33:00 +0000
Subject: [PATCH 136/312] [Vimeo] Update variable name in hydration JSON
 pattern

Fixes #31311
---
 youtube_dl/extractor/vimeo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index a66912502..853b38402 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -663,7 +663,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
 
         if '//player.vimeo.com/video/' in url:
             config = self._parse_json(self._search_regex(
-                r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
+                r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
             if config.get('view') == 4:
                 config = self._verify_player_video_password(
                     redirect_url, video_id, headers)

From d25cf62086443d86a633b8176b5c7e79f4cc569e Mon Sep 17 00:00:00 2001
From: Xie Yanbo <xieyanbo@gmail.com>
Date: Sun, 30 Oct 2022 19:46:46 +0800
Subject: [PATCH 137/312] [netease] Impove error handling (#31303)

* add warnings for users outside of China
* skip empty song urls

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/neteasemusic.py | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/neteasemusic.py b/youtube_dl/extractor/neteasemusic.py
index fad22a2cd..2bbfc7858 100644
--- a/youtube_dl/extractor/neteasemusic.py
+++ b/youtube_dl/extractor/neteasemusic.py
@@ -20,6 +20,7 @@ from ..compat import (
 from ..utils import (
     ExtractorError,
     bytes_to_intlist,
+    error_to_compat_str,
     float_or_none,
     int_or_none,
     intlist_to_bytes,
@@ -94,17 +95,23 @@ class NetEaseMusicBaseIE(InfoExtractor):
         url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
         data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
         try:
-            return self._download_json(
+            msg = 'empty result'
+            result = self._download_json(
                 url, song_id, data=data.encode('ascii'), headers=headers)
+            if result:
+                return result
         except ExtractorError as e:
             if type(e.cause) in (ValueError, TypeError):
                 # JSON load failure
                 raise
-        except Exception:
-            pass
+        except Exception as e:
+            msg = error_to_compat_str(e)
+            self.report_warning('%s API call (%s) failed: %s' % (
+                song_id, bitrate, msg))
         return {}
 
     def extract_formats(self, info):
+        err = 0
         formats = []
         song_id = info['id']
         for song_format in self._FORMATS:
@@ -116,6 +123,8 @@ class NetEaseMusicBaseIE(InfoExtractor):
             data = self._call_player_api(song_id, bitrate)
             for song in try_get(data, lambda x: x['data'], list) or []:
                 song_url = try_get(song, lambda x: x['url'])
+                if not song_url:
+                    continue
                 if self._is_valid_url(song_url, info['id'], 'song'):
                     formats.append({
                         'url': song_url,
@@ -125,6 +134,19 @@ class NetEaseMusicBaseIE(InfoExtractor):
                         'filesize': int_or_none(song.get('size')),
                         'asr': int_or_none(details.get('sr')),
                     })
+                elif err == 0:
+                    err = try_get(song, lambda x: x['code'], int)
+
+        if not formats:
+            msg = 'No media links found'
+            if err != 0 and (err < 200 or err >= 400):
+                raise ExtractorError(
+                    '%s (site code %d)' % (msg, err, ), expected=True)
+            else:
+                self.raise_geo_restricted(
+                    msg + ': probably this video is not available from your location due to geo restriction.',
+                    countries=['CN'])
+
         return formats
 
     @classmethod

From ce5d36486ea95b8961c639d118bad262c8d7a067 Mon Sep 17 00:00:00 2001
From: Xie Yanbo <xieyanbo@gmail.com>
Date: Sun, 30 Oct 2022 19:48:44 +0800
Subject: [PATCH 138/312] [netease] Support urls shared from mobile app
 (#31304)

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/neteasemusic.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/neteasemusic.py b/youtube_dl/extractor/neteasemusic.py
index 2bbfc7858..5e5c6271b 100644
--- a/youtube_dl/extractor/neteasemusic.py
+++ b/youtube_dl/extractor/neteasemusic.py
@@ -162,7 +162,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
 class NetEaseMusicIE(NetEaseMusicBaseIE):
     IE_NAME = 'netease:song'
     IE_DESC = '网易云音乐'
-    _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
     _TESTS = [{
         'url': 'http://music.163.com/#/song?id=32102397',
         'md5': '3e909614ce09b1ccef4a3eb205441190',
@@ -200,6 +200,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
             'timestamp': 1264608000,
             'alt_title': '说出愿望吧(Genie)',
         },
+    }, {
+        'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
+        'md5': '95826c73ea50b1c288b22180ec9e754d',
+        'info_dict': {
+            'id': '95670',
+            'ext': 'mp3',
+            'title': '国际歌',
+            'creator': '马备',
+            'upload_date': '19911130',
+            'timestamp': 691516800,
+            'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
+        },
     }]
 
     def _process_lyrics(self, lyrics_info):

From a19855f0f50fe7a6eb05a1d8fee554897e4dbdda Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 31 Oct 2022 21:18:36 +0000
Subject: [PATCH 139/312] [compat] Add Python 2 Unicode casefold using a
 trivial wrapper around icu/CaseFolding.txt

---
 youtube_dl/casefold.py | 1643 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1643 insertions(+)
 create mode 100644 youtube_dl/casefold.py

diff --git a/youtube_dl/casefold.py b/youtube_dl/casefold.py
new file mode 100644
index 000000000..546269a3c
--- /dev/null
+++ b/youtube_dl/casefold.py
@@ -0,0 +1,1643 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .compat import compat_str
+
+# CaseFolding-15.0.0.txt
+# Date: 2022-02-02, 23:35:35 GMT
+# © 2022 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see https://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+#   For documentation, see https://www.unicode.org/reports/tr44/
+#
+# Case Folding Properties
+#
+# This file is a supplement to the UnicodeData file.
+# It provides a case folding mapping generated from the Unicode Character Database.
+# If all characters are mapped according to the full mapping below, then
+# case differences (according to UnicodeData.txt and SpecialCasing.txt)
+# are eliminated.
+#
+# The data supports both implementations that require simple case foldings
+# (where string lengths don't change), and implementations that allow full case folding
+# (where string lengths may grow). Note that where they can be supported, the
+# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
+#
+# All code points not listed in this file map to themselves.
+#
+# NOTE: case folding does not preserve normalization formats!
+#
+# For information on case folding, including how to have case folding
+# preserve normalization formats, see Section 3.13 Default Case Algorithms in
+# The Unicode Standard.
+#
+# ================================================================================
+# Format
+# ================================================================================
+# The entries in this file are in the following machine-readable format:
+#
+# <code>; <status>; <mapping>; # <name>
+#
+# The status field is:
+# C: common case folding, common mappings shared by both simple and full mappings.
+# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
+# S: simple case folding, mappings to single characters where different from F.
+# T: special case for uppercase I and dotted uppercase I
+#    - For non-Turkic languages, this mapping is normally not used.
+#    - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
+#      Note that the Turkic mappings do not maintain canonical equivalence without additional processing.
+#      See the discussions of case mapping in the Unicode Standard for more information.
+#
+# Usage:
+#  A. To do a simple case folding, use the mappings with status C + S.
+#  B. To do a full case folding, use the mappings with status C + F.
+#
+#    The mappings with status T can be used or omitted depending on the desired case-folding
+#    behavior. (The default option is to exclude them.)
+#
+# =================================================================
+
+# Property: Case_Folding
+
+#  All code points not explicitly listed for Case_Folding
+#  have the value C for the status field, and the code point itself for the mapping field.
+
+# =================================================================
+_map_str = '''
+0041; C; 0061; # LATIN CAPITAL LETTER A
+0042; C; 0062; # LATIN CAPITAL LETTER B
+0043; C; 0063; # LATIN CAPITAL LETTER C
+0044; C; 0064; # LATIN CAPITAL LETTER D
+0045; C; 0065; # LATIN CAPITAL LETTER E
+0046; C; 0066; # LATIN CAPITAL LETTER F
+0047; C; 0067; # LATIN CAPITAL LETTER G
+0048; C; 0068; # LATIN CAPITAL LETTER H
+0049; C; 0069; # LATIN CAPITAL LETTER I
+0049; T; 0131; # LATIN CAPITAL LETTER I
+004A; C; 006A; # LATIN CAPITAL LETTER J
+004B; C; 006B; # LATIN CAPITAL LETTER K
+004C; C; 006C; # LATIN CAPITAL LETTER L
+004D; C; 006D; # LATIN CAPITAL LETTER M
+004E; C; 006E; # LATIN CAPITAL LETTER N
+004F; C; 006F; # LATIN CAPITAL LETTER O
+0050; C; 0070; # LATIN CAPITAL LETTER P
+0051; C; 0071; # LATIN CAPITAL LETTER Q
+0052; C; 0072; # LATIN CAPITAL LETTER R
+0053; C; 0073; # LATIN CAPITAL LETTER S
+0054; C; 0074; # LATIN CAPITAL LETTER T
+0055; C; 0075; # LATIN CAPITAL LETTER U
+0056; C; 0076; # LATIN CAPITAL LETTER V
+0057; C; 0077; # LATIN CAPITAL LETTER W
+0058; C; 0078; # LATIN CAPITAL LETTER X
+0059; C; 0079; # LATIN CAPITAL LETTER Y
+005A; C; 007A; # LATIN CAPITAL LETTER Z
+00B5; C; 03BC; # MICRO SIGN
+00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
+00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
+00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
+00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
+00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
+00C6; C; 00E6; # LATIN CAPITAL LETTER AE
+00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
+00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
+00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
+00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
+00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
+00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
+00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
+00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
+00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
+00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
+00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
+00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
+00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
+00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
+00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
+00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
+00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
+00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
+00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
+00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
+0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
+0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
+0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
+0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE
+0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE
+010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON
+010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON
+0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE
+0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON
+0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE
+0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE
+0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK
+011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON
+011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE
+0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE
+0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA
+0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
+0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
+012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
+012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
+012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
+0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
+0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
+0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE
+013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA
+013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON
+013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
+0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE
+0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE
+0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA
+0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON
+0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+014A; C; 014B; # LATIN CAPITAL LETTER ENG
+014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON
+014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE
+0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0152; C; 0153; # LATIN CAPITAL LIGATURE OE
+0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE
+0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA
+0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON
+015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE
+015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA
+0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON
+0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA
+0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON
+0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE
+0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE
+016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON
+016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE
+016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE
+0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK
+0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS
+0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE
+017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON
+017F; C; 0073; # LATIN SMALL LETTER LONG S
+0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK
+0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR
+0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX
+0186; C; 0254; # LATIN CAPITAL LETTER OPEN O
+0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK
+0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D
+018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK
+018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR
+018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E
+018F; C; 0259; # LATIN CAPITAL LETTER SCHWA
+0190; C; 025B; # LATIN CAPITAL LETTER OPEN E
+0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK
+0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK
+0194; C; 0263; # LATIN CAPITAL LETTER GAMMA
+0196; C; 0269; # LATIN CAPITAL LETTER IOTA
+0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE
+0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK
+019C; C; 026F; # LATIN CAPITAL LETTER TURNED M
+019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK
+019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
+01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN
+01A2; C; 01A3; # LATIN CAPITAL LETTER OI
+01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK
+01A6; C; 0280; # LATIN LETTER YR
+01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO
+01A9; C; 0283; # LATIN CAPITAL LETTER ESH
+01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK
+01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
+01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN
+01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON
+01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK
+01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK
+01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE
+01B7; C; 0292; # LATIN CAPITAL LETTER EZH
+01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED
+01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE
+01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON
+01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+01C7; C; 01C9; # LATIN CAPITAL LETTER LJ
+01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
+01CA; C; 01CC; # LATIN CAPITAL LETTER NJ
+01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
+01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
+01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
+01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
+01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
+01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
+01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
+01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
+01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
+01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
+01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
+01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON
+01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE
+01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON
+01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON
+01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK
+01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
+01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON
+01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
+01F1; C; 01F3; # LATIN CAPITAL LETTER DZ
+01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
+01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE
+01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR
+01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN
+01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE
+01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
+01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE
+01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
+0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
+0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE
+0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
+0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
+0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
+020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
+020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
+020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
+0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
+0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE
+0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
+0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE
+0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW
+021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW
+021C; C; 021D; # LATIN CAPITAL LETTER YOGH
+021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON
+0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
+0222; C; 0223; # LATIN CAPITAL LETTER OU
+0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK
+0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE
+0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA
+022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
+022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
+022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
+0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
+0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
+023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE
+023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE
+023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR
+023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
+0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP
+0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE
+0244; C; 0289; # LATIN CAPITAL LETTER U BAR
+0245; C; 028C; # LATIN CAPITAL LETTER TURNED V
+0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE
+0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE
+024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
+024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE
+024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE
+0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
+0370; C; 0371; # GREEK CAPITAL LETTER HETA
+0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI
+0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
+037F; C; 03F3; # GREEK CAPITAL LETTER YOT
+0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
+0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
+0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
+038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS
+038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS
+038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS
+038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS
+0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA
+0392; C; 03B2; # GREEK CAPITAL LETTER BETA
+0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA
+0394; C; 03B4; # GREEK CAPITAL LETTER DELTA
+0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON
+0396; C; 03B6; # GREEK CAPITAL LETTER ZETA
+0397; C; 03B7; # GREEK CAPITAL LETTER ETA
+0398; C; 03B8; # GREEK CAPITAL LETTER THETA
+0399; C; 03B9; # GREEK CAPITAL LETTER IOTA
+039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA
+039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA
+039C; C; 03BC; # GREEK CAPITAL LETTER MU
+039D; C; 03BD; # GREEK CAPITAL LETTER NU
+039E; C; 03BE; # GREEK CAPITAL LETTER XI
+039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON
+03A0; C; 03C0; # GREEK CAPITAL LETTER PI
+03A1; C; 03C1; # GREEK CAPITAL LETTER RHO
+03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA
+03A4; C; 03C4; # GREEK CAPITAL LETTER TAU
+03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON
+03A6; C; 03C6; # GREEK CAPITAL LETTER PHI
+03A7; C; 03C7; # GREEK CAPITAL LETTER CHI
+03A8; C; 03C8; # GREEK CAPITAL LETTER PSI
+03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA
+03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA
+03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL
+03D0; C; 03B2; # GREEK BETA SYMBOL
+03D1; C; 03B8; # GREEK THETA SYMBOL
+03D5; C; 03C6; # GREEK PHI SYMBOL
+03D6; C; 03C0; # GREEK PI SYMBOL
+03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA
+03DA; C; 03DB; # GREEK LETTER STIGMA
+03DC; C; 03DD; # GREEK LETTER DIGAMMA
+03DE; C; 03DF; # GREEK LETTER KOPPA
+03E0; C; 03E1; # GREEK LETTER SAMPI
+03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI
+03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI
+03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI
+03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI
+03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA
+03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA
+03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
+03F0; C; 03BA; # GREEK KAPPA SYMBOL
+03F1; C; 03C1; # GREEK RHO SYMBOL
+03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
+03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
+03F7; C; 03F8; # GREEK CAPITAL LETTER SHO
+03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL
+03FA; C; 03FB; # GREEK CAPITAL LETTER SAN
+03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
+03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
+03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
+0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
+0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
+0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
+0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE
+0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE
+0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0407; C; 0457; # CYRILLIC CAPITAL LETTER YI
+0408; C; 0458; # CYRILLIC CAPITAL LETTER JE
+0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE
+040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE
+040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE
+040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE
+040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE
+040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U
+040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE
+0410; C; 0430; # CYRILLIC CAPITAL LETTER A
+0411; C; 0431; # CYRILLIC CAPITAL LETTER BE
+0412; C; 0432; # CYRILLIC CAPITAL LETTER VE
+0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE
+0414; C; 0434; # CYRILLIC CAPITAL LETTER DE
+0415; C; 0435; # CYRILLIC CAPITAL LETTER IE
+0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE
+0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE
+0418; C; 0438; # CYRILLIC CAPITAL LETTER I
+0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I
+041A; C; 043A; # CYRILLIC CAPITAL LETTER KA
+041B; C; 043B; # CYRILLIC CAPITAL LETTER EL
+041C; C; 043C; # CYRILLIC CAPITAL LETTER EM
+041D; C; 043D; # CYRILLIC CAPITAL LETTER EN
+041E; C; 043E; # CYRILLIC CAPITAL LETTER O
+041F; C; 043F; # CYRILLIC CAPITAL LETTER PE
+0420; C; 0440; # CYRILLIC CAPITAL LETTER ER
+0421; C; 0441; # CYRILLIC CAPITAL LETTER ES
+0422; C; 0442; # CYRILLIC CAPITAL LETTER TE
+0423; C; 0443; # CYRILLIC CAPITAL LETTER U
+0424; C; 0444; # CYRILLIC CAPITAL LETTER EF
+0425; C; 0445; # CYRILLIC CAPITAL LETTER HA
+0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE
+0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE
+0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA
+0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA
+042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN
+042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU
+042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN
+042D; C; 044D; # CYRILLIC CAPITAL LETTER E
+042E; C; 044E; # CYRILLIC CAPITAL LETTER YU
+042F; C; 044F; # CYRILLIC CAPITAL LETTER YA
+0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA
+0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT
+0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E
+0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS
+0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
+046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS
+046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
+046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI
+0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI
+0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA
+0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA
+0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
+0478; C; 0479; # CYRILLIC CAPITAL LETTER UK
+047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA
+047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
+047E; C; 047F; # CYRILLIC CAPITAL LETTER OT
+0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA
+048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
+048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
+048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK
+0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
+0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
+0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
+0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
+049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
+049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
+049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE
+04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA
+04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
+04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE
+04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
+04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
+04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
+04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
+04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U
+04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
+04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
+04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE
+04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
+04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
+04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
+04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
+04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
+04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA
+04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
+04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
+04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
+04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK
+04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL
+04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
+04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL
+04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE
+04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
+04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE
+04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE
+04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA
+04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
+04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
+04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
+04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
+04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON
+04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
+04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
+04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O
+04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
+04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
+04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON
+04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
+04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
+04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
+04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
+04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
+04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
+04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK
+04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE
+0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
+0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
+0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
+0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE
+0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE
+050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
+050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
+050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
+0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE
+0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK
+0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA
+0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA
+0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE
+051A; C; 051B; # CYRILLIC CAPITAL LETTER QA
+051C; C; 051D; # CYRILLIC CAPITAL LETTER WE
+051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA
+0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
+0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
+0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER
+0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
+0528; C; 0529; # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK
+052A; C; 052B; # CYRILLIC CAPITAL LETTER DZZHE
+052C; C; 052D; # CYRILLIC CAPITAL LETTER DCHE
+052E; C; 052F; # CYRILLIC CAPITAL LETTER EL WITH DESCENDER
+0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
+0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
+0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
+0534; C; 0564; # ARMENIAN CAPITAL LETTER DA
+0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH
+0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA
+0537; C; 0567; # ARMENIAN CAPITAL LETTER EH
+0538; C; 0568; # ARMENIAN CAPITAL LETTER ET
+0539; C; 0569; # ARMENIAN CAPITAL LETTER TO
+053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE
+053B; C; 056B; # ARMENIAN CAPITAL LETTER INI
+053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN
+053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH
+053E; C; 056E; # ARMENIAN CAPITAL LETTER CA
+053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN
+0540; C; 0570; # ARMENIAN CAPITAL LETTER HO
+0541; C; 0571; # ARMENIAN CAPITAL LETTER JA
+0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD
+0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH
+0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN
+0545; C; 0575; # ARMENIAN CAPITAL LETTER YI
+0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW
+0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA
+0548; C; 0578; # ARMENIAN CAPITAL LETTER VO
+0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA
+054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH
+054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH
+054C; C; 057C; # ARMENIAN CAPITAL LETTER RA
+054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH
+054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW
+054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN
+0550; C; 0580; # ARMENIAN CAPITAL LETTER REH
+0551; C; 0581; # ARMENIAN CAPITAL LETTER CO
+0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN
+0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR
+0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH
+0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
+0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
+0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
+10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN
+10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN
+10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN
+10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON
+10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN
+10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN
+10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN
+10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN
+10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN
+10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN
+10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS
+10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN
+10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR
+10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON
+10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR
+10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR
+10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE
+10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN
+10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR
+10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN
+10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR
+10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR
+10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN
+10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR
+10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN
+10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN
+10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN
+10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL
+10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL
+10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR
+10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN
+10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN
+10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE
+10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE
+10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE
+10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE
+10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR
+10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE
+10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN
+10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN
+13F8; C; 13F0; # CHEROKEE SMALL LETTER YE
+13F9; C; 13F1; # CHEROKEE SMALL LETTER YI
+13FA; C; 13F2; # CHEROKEE SMALL LETTER YO
+13FB; C; 13F3; # CHEROKEE SMALL LETTER YU
+13FC; C; 13F4; # CHEROKEE SMALL LETTER YV
+13FD; C; 13F5; # CHEROKEE SMALL LETTER MV
+1C80; C; 0432; # CYRILLIC SMALL LETTER ROUNDED VE
+1C81; C; 0434; # CYRILLIC SMALL LETTER LONG-LEGGED DE
+1C82; C; 043E; # CYRILLIC SMALL LETTER NARROW O
+1C83; C; 0441; # CYRILLIC SMALL LETTER WIDE ES
+1C84; C; 0442; # CYRILLIC SMALL LETTER TALL TE
+1C85; C; 0442; # CYRILLIC SMALL LETTER THREE-LEGGED TE
+1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN
+1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT
+1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK
+1C90; C; 10D0; # GEORGIAN MTAVRULI CAPITAL LETTER AN
+1C91; C; 10D1; # GEORGIAN MTAVRULI CAPITAL LETTER BAN
+1C92; C; 10D2; # GEORGIAN MTAVRULI CAPITAL LETTER GAN
+1C93; C; 10D3; # GEORGIAN MTAVRULI CAPITAL LETTER DON
+1C94; C; 10D4; # GEORGIAN MTAVRULI CAPITAL LETTER EN
+1C95; C; 10D5; # GEORGIAN MTAVRULI CAPITAL LETTER VIN
+1C96; C; 10D6; # GEORGIAN MTAVRULI CAPITAL LETTER ZEN
+1C97; C; 10D7; # GEORGIAN MTAVRULI CAPITAL LETTER TAN
+1C98; C; 10D8; # GEORGIAN MTAVRULI CAPITAL LETTER IN
+1C99; C; 10D9; # GEORGIAN MTAVRULI CAPITAL LETTER KAN
+1C9A; C; 10DA; # GEORGIAN MTAVRULI CAPITAL LETTER LAS
+1C9B; C; 10DB; # GEORGIAN MTAVRULI CAPITAL LETTER MAN
+1C9C; C; 10DC; # GEORGIAN MTAVRULI CAPITAL LETTER NAR
+1C9D; C; 10DD; # GEORGIAN MTAVRULI CAPITAL LETTER ON
+1C9E; C; 10DE; # GEORGIAN MTAVRULI CAPITAL LETTER PAR
+1C9F; C; 10DF; # GEORGIAN MTAVRULI CAPITAL LETTER ZHAR
+1CA0; C; 10E0; # GEORGIAN MTAVRULI CAPITAL LETTER RAE
+1CA1; C; 10E1; # GEORGIAN MTAVRULI CAPITAL LETTER SAN
+1CA2; C; 10E2; # GEORGIAN MTAVRULI CAPITAL LETTER TAR
+1CA3; C; 10E3; # GEORGIAN MTAVRULI CAPITAL LETTER UN
+1CA4; C; 10E4; # GEORGIAN MTAVRULI CAPITAL LETTER PHAR
+1CA5; C; 10E5; # GEORGIAN MTAVRULI CAPITAL LETTER KHAR
+1CA6; C; 10E6; # GEORGIAN MTAVRULI CAPITAL LETTER GHAN
+1CA7; C; 10E7; # GEORGIAN MTAVRULI CAPITAL LETTER QAR
+1CA8; C; 10E8; # GEORGIAN MTAVRULI CAPITAL LETTER SHIN
+1CA9; C; 10E9; # GEORGIAN MTAVRULI CAPITAL LETTER CHIN
+1CAA; C; 10EA; # GEORGIAN MTAVRULI CAPITAL LETTER CAN
+1CAB; C; 10EB; # GEORGIAN MTAVRULI CAPITAL LETTER JIL
+1CAC; C; 10EC; # GEORGIAN MTAVRULI CAPITAL LETTER CIL
+1CAD; C; 10ED; # GEORGIAN MTAVRULI CAPITAL LETTER CHAR
+1CAE; C; 10EE; # GEORGIAN MTAVRULI CAPITAL LETTER XAN
+1CAF; C; 10EF; # GEORGIAN MTAVRULI CAPITAL LETTER JHAN
+1CB0; C; 10F0; # GEORGIAN MTAVRULI CAPITAL LETTER HAE
+1CB1; C; 10F1; # GEORGIAN MTAVRULI CAPITAL LETTER HE
+1CB2; C; 10F2; # GEORGIAN MTAVRULI CAPITAL LETTER HIE
+1CB3; C; 10F3; # GEORGIAN MTAVRULI CAPITAL LETTER WE
+1CB4; C; 10F4; # GEORGIAN MTAVRULI CAPITAL LETTER HAR
+1CB5; C; 10F5; # GEORGIAN MTAVRULI CAPITAL LETTER HOE
+1CB6; C; 10F6; # GEORGIAN MTAVRULI CAPITAL LETTER FI
+1CB7; C; 10F7; # GEORGIAN MTAVRULI CAPITAL LETTER YN
+1CB8; C; 10F8; # GEORGIAN MTAVRULI CAPITAL LETTER ELIFI
+1CB9; C; 10F9; # GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN
+1CBA; C; 10FA; # GEORGIAN MTAVRULI CAPITAL LETTER AIN
+1CBD; C; 10FD; # GEORGIAN MTAVRULI CAPITAL LETTER AEN
+1CBE; C; 10FE; # GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN
+1CBF; C; 10FF; # GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
+1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
+1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
+1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
+1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW
+1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
+1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE
+1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW
+1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW
+1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA
+1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
+1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
+1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
+1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
+1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW
+1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
+1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE
+1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON
+1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE
+1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW
+1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS
+1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
+1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
+1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
+1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
+1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
+1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
+1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
+1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW
+1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
+1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW
+1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
+1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE
+1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE
+1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW
+1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE
+1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW
+1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW
+1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
+1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
+1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
+1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
+1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
+1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE
+1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE
+1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE
+1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW
+1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
+1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW
+1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE
+1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW
+1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
+1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
+1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
+1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE
+1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW
+1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW
+1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
+1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
+1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW
+1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
+1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
+1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
+1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE
+1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW
+1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE
+1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE
+1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS
+1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE
+1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW
+1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE
+1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS
+1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE
+1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
+1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW
+1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW
+1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW
+1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS
+1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE
+1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
+1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
+1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE
+1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S
+1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S
+1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW
+1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE
+1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
+1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
+1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
+1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
+1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
+1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
+1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
+1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
+1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
+1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
+1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW
+1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE
+1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE
+1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
+1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
+1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
+1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
+1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
+1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
+1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
+1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
+1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
+1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
+1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
+1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
+1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
+1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
+1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
+1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
+1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
+1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE
+1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
+1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW
+1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE
+1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
+1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
+1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
+1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE
+1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
+1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE
+1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW
+1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
+1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE
+1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL
+1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V
+1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP
+1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI
+1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA
+1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
+1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
+1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
+1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
+1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
+1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
+1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI
+1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA
+1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
+1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
+1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
+1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI
+1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA
+1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
+1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
+1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
+1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
+1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
+1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
+1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI
+1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA
+1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
+1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
+1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
+1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
+1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
+1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
+1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI
+1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA
+1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
+1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
+1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
+1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
+1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
+1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
+1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
+1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA
+1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
+1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
+1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
+1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI
+1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA
+1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
+1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
+1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
+1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
+1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
+1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
+1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
+1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
+1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
+1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
+1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
+1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
+1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
+1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
+1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
+1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY
+1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON
+1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA
+1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA
+1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBE; C; 03B9; # GREEK PROSGEGRAMMENI
+1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
+1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
+1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
+1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA
+1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA
+1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA
+1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA
+1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
+1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
+1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
+1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
+1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON
+1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA
+1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
+1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
+1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
+1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
+1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
+1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY
+1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON
+1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA
+1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA
+1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA
+1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
+1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
+1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
+1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA
+1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA
+1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA
+1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA
+1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+2126; C; 03C9; # OHM SIGN
+212A; C; 006B; # KELVIN SIGN
+212B; C; 00E5; # ANGSTROM SIGN
+2132; C; 214E; # TURNED CAPITAL F
+2160; C; 2170; # ROMAN NUMERAL ONE
+2161; C; 2171; # ROMAN NUMERAL TWO
+2162; C; 2172; # ROMAN NUMERAL THREE
+2163; C; 2173; # ROMAN NUMERAL FOUR
+2164; C; 2174; # ROMAN NUMERAL FIVE
+2165; C; 2175; # ROMAN NUMERAL SIX
+2166; C; 2176; # ROMAN NUMERAL SEVEN
+2167; C; 2177; # ROMAN NUMERAL EIGHT
+2168; C; 2178; # ROMAN NUMERAL NINE
+2169; C; 2179; # ROMAN NUMERAL TEN
+216A; C; 217A; # ROMAN NUMERAL ELEVEN
+216B; C; 217B; # ROMAN NUMERAL TWELVE
+216C; C; 217C; # ROMAN NUMERAL FIFTY
+216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
+216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
+216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
+2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED
+24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
+24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
+24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
+24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D
+24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E
+24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F
+24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G
+24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H
+24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I
+24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J
+24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K
+24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L
+24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M
+24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N
+24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O
+24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P
+24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q
+24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R
+24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S
+24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T
+24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U
+24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V
+24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W
+24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
+24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
+24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
+2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU
+2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY
+2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE
+2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI
+2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO
+2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU
+2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE
+2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO
+2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA
+2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE
+2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE
+2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I
+2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI
+2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO
+2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE
+2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE
+2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI
+2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU
+2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI
+2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI
+2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO
+2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO
+2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU
+2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU
+2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU
+2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU
+2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE
+2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA
+2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI
+2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI
+2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA
+2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU
+2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI
+2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI
+2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA
+2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU
+2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS
+2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL
+2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO
+2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS
+2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS
+2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS
+2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA
+2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA
+2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC
+2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A
+2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
+2C2F; C; 2C5F; # GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI
+2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR
+2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE
+2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE
+2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL
+2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER
+2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER
+2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER
+2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA
+2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK
+2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A
+2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA
+2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK
+2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H
+2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL
+2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL
+2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA
+2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA
+2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA
+2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA
+2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE
+2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU
+2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA
+2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE
+2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE
+2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA
+2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA
+2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA
+2C98; C; 2C99; # COPTIC CAPITAL LETTER MI
+2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI
+2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI
+2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O
+2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI
+2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO
+2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA
+2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU
+2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA
+2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI
+2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI
+2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI
+2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU
+2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF
+2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN
+2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
+2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA
+2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI
+2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
+2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU
+2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI
+2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI
+2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI
+2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH
+2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI
+2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI
+2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI
+2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA
+2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA
+2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI
+2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT
+2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA
+2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA
+2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA
+2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
+2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI
+2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI
+2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU
+2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
+2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
+2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI
+A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA
+A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO
+A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE
+A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA
+A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV
+A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK
+A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA
+A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER
+A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER
+A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT
+A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU
+A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A
+A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS
+A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS
+A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
+A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN
+A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE
+A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE
+A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL
+A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM
+A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O
+A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O
+A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O
+A680; C; A681; # CYRILLIC CAPITAL LETTER DWE
+A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE
+A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE
+A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE
+A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE
+A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK
+A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE
+A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE
+A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE
+A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE
+A694; C; A695; # CYRILLIC CAPITAL LETTER HWE
+A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE
+A698; C; A699; # CYRILLIC CAPITAL LETTER DOUBLE O
+A69A; C; A69B; # CYRILLIC CAPITAL LETTER CROSSED O
+A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
+A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
+A726; C; A727; # LATIN CAPITAL LETTER HENG
+A728; C; A729; # LATIN CAPITAL LETTER TZ
+A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO
+A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO
+A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA
+A732; C; A733; # LATIN CAPITAL LETTER AA
+A734; C; A735; # LATIN CAPITAL LETTER AO
+A736; C; A737; # LATIN CAPITAL LETTER AU
+A738; C; A739; # LATIN CAPITAL LETTER AV
+A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
+A73C; C; A73D; # LATIN CAPITAL LETTER AY
+A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT
+A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE
+A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
+A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
+A746; C; A747; # LATIN CAPITAL LETTER BROKEN L
+A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE
+A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
+A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP
+A74E; C; A74F; # LATIN CAPITAL LETTER OO
+A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
+A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH
+A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
+A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
+A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
+A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA
+A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA
+A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
+A760; C; A761; # LATIN CAPITAL LETTER VY
+A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z
+A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE
+A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
+A768; C; A769; # LATIN CAPITAL LETTER VEND
+A76A; C; A76B; # LATIN CAPITAL LETTER ET
+A76C; C; A76D; # LATIN CAPITAL LETTER IS
+A76E; C; A76F; # LATIN CAPITAL LETTER CON
+A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D
+A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F
+A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G
+A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G
+A780; C; A781; # LATIN CAPITAL LETTER TURNED L
+A782; C; A783; # LATIN CAPITAL LETTER INSULAR R
+A784; C; A785; # LATIN CAPITAL LETTER INSULAR S
+A786; C; A787; # LATIN CAPITAL LETTER INSULAR T
+A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO
+A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H
+A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER
+A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR
+A796; C; A797; # LATIN CAPITAL LETTER B WITH FLOURISH
+A798; C; A799; # LATIN CAPITAL LETTER F WITH STROKE
+A79A; C; A79B; # LATIN CAPITAL LETTER VOLAPUK AE
+A79C; C; A79D; # LATIN CAPITAL LETTER VOLAPUK OE
+A79E; C; A79F; # LATIN CAPITAL LETTER VOLAPUK UE
+A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
+A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
+A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
+A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
+A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
+A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK
+A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E
+A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G
+A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT
+A7AE; C; 026A; # LATIN CAPITAL LETTER SMALL CAPITAL I
+A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K
+A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T
+A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL
+A7B3; C; AB53; # LATIN CAPITAL LETTER CHI
+A7B4; C; A7B5; # LATIN CAPITAL LETTER BETA
+A7B6; C; A7B7; # LATIN CAPITAL LETTER OMEGA
+A7B8; C; A7B9; # LATIN CAPITAL LETTER U WITH STROKE
+A7BA; C; A7BB; # LATIN CAPITAL LETTER GLOTTAL A
+A7BC; C; A7BD; # LATIN CAPITAL LETTER GLOTTAL I
+A7BE; C; A7BF; # LATIN CAPITAL LETTER GLOTTAL U
+A7C0; C; A7C1; # LATIN CAPITAL LETTER OLD POLISH O
+A7C2; C; A7C3; # LATIN CAPITAL LETTER ANGLICANA W
+A7C4; C; A794; # LATIN CAPITAL LETTER C WITH PALATAL HOOK
+A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK
+A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK
+A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
+A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
+A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G
+A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S
+A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S
+A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H
+AB70; C; 13A0; # CHEROKEE SMALL LETTER A
+AB71; C; 13A1; # CHEROKEE SMALL LETTER E
+AB72; C; 13A2; # CHEROKEE SMALL LETTER I
+AB73; C; 13A3; # CHEROKEE SMALL LETTER O
+AB74; C; 13A4; # CHEROKEE SMALL LETTER U
+AB75; C; 13A5; # CHEROKEE SMALL LETTER V
+AB76; C; 13A6; # CHEROKEE SMALL LETTER GA
+AB77; C; 13A7; # CHEROKEE SMALL LETTER KA
+AB78; C; 13A8; # CHEROKEE SMALL LETTER GE
+AB79; C; 13A9; # CHEROKEE SMALL LETTER GI
+AB7A; C; 13AA; # CHEROKEE SMALL LETTER GO
+AB7B; C; 13AB; # CHEROKEE SMALL LETTER GU
+AB7C; C; 13AC; # CHEROKEE SMALL LETTER GV
+AB7D; C; 13AD; # CHEROKEE SMALL LETTER HA
+AB7E; C; 13AE; # CHEROKEE SMALL LETTER HE
+AB7F; C; 13AF; # CHEROKEE SMALL LETTER HI
+AB80; C; 13B0; # CHEROKEE SMALL LETTER HO
+AB81; C; 13B1; # CHEROKEE SMALL LETTER HU
+AB82; C; 13B2; # CHEROKEE SMALL LETTER HV
+AB83; C; 13B3; # CHEROKEE SMALL LETTER LA
+AB84; C; 13B4; # CHEROKEE SMALL LETTER LE
+AB85; C; 13B5; # CHEROKEE SMALL LETTER LI
+AB86; C; 13B6; # CHEROKEE SMALL LETTER LO
+AB87; C; 13B7; # CHEROKEE SMALL LETTER LU
+AB88; C; 13B8; # CHEROKEE SMALL LETTER LV
+AB89; C; 13B9; # CHEROKEE SMALL LETTER MA
+AB8A; C; 13BA; # CHEROKEE SMALL LETTER ME
+AB8B; C; 13BB; # CHEROKEE SMALL LETTER MI
+AB8C; C; 13BC; # CHEROKEE SMALL LETTER MO
+AB8D; C; 13BD; # CHEROKEE SMALL LETTER MU
+AB8E; C; 13BE; # CHEROKEE SMALL LETTER NA
+AB8F; C; 13BF; # CHEROKEE SMALL LETTER HNA
+AB90; C; 13C0; # CHEROKEE SMALL LETTER NAH
+AB91; C; 13C1; # CHEROKEE SMALL LETTER NE
+AB92; C; 13C2; # CHEROKEE SMALL LETTER NI
+AB93; C; 13C3; # CHEROKEE SMALL LETTER NO
+AB94; C; 13C4; # CHEROKEE SMALL LETTER NU
+AB95; C; 13C5; # CHEROKEE SMALL LETTER NV
+AB96; C; 13C6; # CHEROKEE SMALL LETTER QUA
+AB97; C; 13C7; # CHEROKEE SMALL LETTER QUE
+AB98; C; 13C8; # CHEROKEE SMALL LETTER QUI
+AB99; C; 13C9; # CHEROKEE SMALL LETTER QUO
+AB9A; C; 13CA; # CHEROKEE SMALL LETTER QUU
+AB9B; C; 13CB; # CHEROKEE SMALL LETTER QUV
+AB9C; C; 13CC; # CHEROKEE SMALL LETTER SA
+AB9D; C; 13CD; # CHEROKEE SMALL LETTER S
+AB9E; C; 13CE; # CHEROKEE SMALL LETTER SE
+AB9F; C; 13CF; # CHEROKEE SMALL LETTER SI
+ABA0; C; 13D0; # CHEROKEE SMALL LETTER SO
+ABA1; C; 13D1; # CHEROKEE SMALL LETTER SU
+ABA2; C; 13D2; # CHEROKEE SMALL LETTER SV
+ABA3; C; 13D3; # CHEROKEE SMALL LETTER DA
+ABA4; C; 13D4; # CHEROKEE SMALL LETTER TA
+ABA5; C; 13D5; # CHEROKEE SMALL LETTER DE
+ABA6; C; 13D6; # CHEROKEE SMALL LETTER TE
+ABA7; C; 13D7; # CHEROKEE SMALL LETTER DI
+ABA8; C; 13D8; # CHEROKEE SMALL LETTER TI
+ABA9; C; 13D9; # CHEROKEE SMALL LETTER DO
+ABAA; C; 13DA; # CHEROKEE SMALL LETTER DU
+ABAB; C; 13DB; # CHEROKEE SMALL LETTER DV
+ABAC; C; 13DC; # CHEROKEE SMALL LETTER DLA
+ABAD; C; 13DD; # CHEROKEE SMALL LETTER TLA
+ABAE; C; 13DE; # CHEROKEE SMALL LETTER TLE
+ABAF; C; 13DF; # CHEROKEE SMALL LETTER TLI
+ABB0; C; 13E0; # CHEROKEE SMALL LETTER TLO
+ABB1; C; 13E1; # CHEROKEE SMALL LETTER TLU
+ABB2; C; 13E2; # CHEROKEE SMALL LETTER TLV
+ABB3; C; 13E3; # CHEROKEE SMALL LETTER TSA
+ABB4; C; 13E4; # CHEROKEE SMALL LETTER TSE
+ABB5; C; 13E5; # CHEROKEE SMALL LETTER TSI
+ABB6; C; 13E6; # CHEROKEE SMALL LETTER TSO
+ABB7; C; 13E7; # CHEROKEE SMALL LETTER TSU
+ABB8; C; 13E8; # CHEROKEE SMALL LETTER TSV
+ABB9; C; 13E9; # CHEROKEE SMALL LETTER WA
+ABBA; C; 13EA; # CHEROKEE SMALL LETTER WE
+ABBB; C; 13EB; # CHEROKEE SMALL LETTER WI
+ABBC; C; 13EC; # CHEROKEE SMALL LETTER WO
+ABBD; C; 13ED; # CHEROKEE SMALL LETTER WU
+ABBE; C; 13EE; # CHEROKEE SMALL LETTER WV
+ABBF; C; 13EF; # CHEROKEE SMALL LETTER YA
+FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
+FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
+FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
+FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
+FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
+FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
+FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
+FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
+FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
+FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI
+FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW
+FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH
+FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A
+FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B
+FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C
+FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D
+FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E
+FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F
+FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G
+FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H
+FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I
+FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J
+FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K
+FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L
+FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M
+FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N
+FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O
+FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P
+FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q
+FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R
+FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S
+FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T
+FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U
+FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V
+FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W
+FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X
+FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y
+FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
+10400; C; 10428; # DESERET CAPITAL LETTER LONG I
+10401; C; 10429; # DESERET CAPITAL LETTER LONG E
+10402; C; 1042A; # DESERET CAPITAL LETTER LONG A
+10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH
+10404; C; 1042C; # DESERET CAPITAL LETTER LONG O
+10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO
+10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I
+10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E
+10408; C; 10430; # DESERET CAPITAL LETTER SHORT A
+10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH
+1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O
+1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO
+1040C; C; 10434; # DESERET CAPITAL LETTER AY
+1040D; C; 10435; # DESERET CAPITAL LETTER OW
+1040E; C; 10436; # DESERET CAPITAL LETTER WU
+1040F; C; 10437; # DESERET CAPITAL LETTER YEE
+10410; C; 10438; # DESERET CAPITAL LETTER H
+10411; C; 10439; # DESERET CAPITAL LETTER PEE
+10412; C; 1043A; # DESERET CAPITAL LETTER BEE
+10413; C; 1043B; # DESERET CAPITAL LETTER TEE
+10414; C; 1043C; # DESERET CAPITAL LETTER DEE
+10415; C; 1043D; # DESERET CAPITAL LETTER CHEE
+10416; C; 1043E; # DESERET CAPITAL LETTER JEE
+10417; C; 1043F; # DESERET CAPITAL LETTER KAY
+10418; C; 10440; # DESERET CAPITAL LETTER GAY
+10419; C; 10441; # DESERET CAPITAL LETTER EF
+1041A; C; 10442; # DESERET CAPITAL LETTER VEE
+1041B; C; 10443; # DESERET CAPITAL LETTER ETH
+1041C; C; 10444; # DESERET CAPITAL LETTER THEE
+1041D; C; 10445; # DESERET CAPITAL LETTER ES
+1041E; C; 10446; # DESERET CAPITAL LETTER ZEE
+1041F; C; 10447; # DESERET CAPITAL LETTER ESH
+10420; C; 10448; # DESERET CAPITAL LETTER ZHEE
+10421; C; 10449; # DESERET CAPITAL LETTER ER
+10422; C; 1044A; # DESERET CAPITAL LETTER EL
+10423; C; 1044B; # DESERET CAPITAL LETTER EM
+10424; C; 1044C; # DESERET CAPITAL LETTER EN
+10425; C; 1044D; # DESERET CAPITAL LETTER ENG
+10426; C; 1044E; # DESERET CAPITAL LETTER OI
+10427; C; 1044F; # DESERET CAPITAL LETTER EW
+104B0; C; 104D8; # OSAGE CAPITAL LETTER A
+104B1; C; 104D9; # OSAGE CAPITAL LETTER AI
+104B2; C; 104DA; # OSAGE CAPITAL LETTER AIN
+104B3; C; 104DB; # OSAGE CAPITAL LETTER AH
+104B4; C; 104DC; # OSAGE CAPITAL LETTER BRA
+104B5; C; 104DD; # OSAGE CAPITAL LETTER CHA
+104B6; C; 104DE; # OSAGE CAPITAL LETTER EHCHA
+104B7; C; 104DF; # OSAGE CAPITAL LETTER E
+104B8; C; 104E0; # OSAGE CAPITAL LETTER EIN
+104B9; C; 104E1; # OSAGE CAPITAL LETTER HA
+104BA; C; 104E2; # OSAGE CAPITAL LETTER HYA
+104BB; C; 104E3; # OSAGE CAPITAL LETTER I
+104BC; C; 104E4; # OSAGE CAPITAL LETTER KA
+104BD; C; 104E5; # OSAGE CAPITAL LETTER EHKA
+104BE; C; 104E6; # OSAGE CAPITAL LETTER KYA
+104BF; C; 104E7; # OSAGE CAPITAL LETTER LA
+104C0; C; 104E8; # OSAGE CAPITAL LETTER MA
+104C1; C; 104E9; # OSAGE CAPITAL LETTER NA
+104C2; C; 104EA; # OSAGE CAPITAL LETTER O
+104C3; C; 104EB; # OSAGE CAPITAL LETTER OIN
+104C4; C; 104EC; # OSAGE CAPITAL LETTER PA
+104C5; C; 104ED; # OSAGE CAPITAL LETTER EHPA
+104C6; C; 104EE; # OSAGE CAPITAL LETTER SA
+104C7; C; 104EF; # OSAGE CAPITAL LETTER SHA
+104C8; C; 104F0; # OSAGE CAPITAL LETTER TA
+104C9; C; 104F1; # OSAGE CAPITAL LETTER EHTA
+104CA; C; 104F2; # OSAGE CAPITAL LETTER TSA
+104CB; C; 104F3; # OSAGE CAPITAL LETTER EHTSA
+104CC; C; 104F4; # OSAGE CAPITAL LETTER TSHA
+104CD; C; 104F5; # OSAGE CAPITAL LETTER DHA
+104CE; C; 104F6; # OSAGE CAPITAL LETTER U
+104CF; C; 104F7; # OSAGE CAPITAL LETTER WA
+104D0; C; 104F8; # OSAGE CAPITAL LETTER KHA
+104D1; C; 104F9; # OSAGE CAPITAL LETTER GHA
+104D2; C; 104FA; # OSAGE CAPITAL LETTER ZA
+104D3; C; 104FB; # OSAGE CAPITAL LETTER ZHA
+10570; C; 10597; # VITHKUQI CAPITAL LETTER A
+10571; C; 10598; # VITHKUQI CAPITAL LETTER BBE
+10572; C; 10599; # VITHKUQI CAPITAL LETTER BE
+10573; C; 1059A; # VITHKUQI CAPITAL LETTER CE
+10574; C; 1059B; # VITHKUQI CAPITAL LETTER CHE
+10575; C; 1059C; # VITHKUQI CAPITAL LETTER DE
+10576; C; 1059D; # VITHKUQI CAPITAL LETTER DHE
+10577; C; 1059E; # VITHKUQI CAPITAL LETTER EI
+10578; C; 1059F; # VITHKUQI CAPITAL LETTER E
+10579; C; 105A0; # VITHKUQI CAPITAL LETTER FE
+1057A; C; 105A1; # VITHKUQI CAPITAL LETTER GA
+1057C; C; 105A3; # VITHKUQI CAPITAL LETTER HA
+1057D; C; 105A4; # VITHKUQI CAPITAL LETTER HHA
+1057E; C; 105A5; # VITHKUQI CAPITAL LETTER I
+1057F; C; 105A6; # VITHKUQI CAPITAL LETTER IJE
+10580; C; 105A7; # VITHKUQI CAPITAL LETTER JE
+10581; C; 105A8; # VITHKUQI CAPITAL LETTER KA
+10582; C; 105A9; # VITHKUQI CAPITAL LETTER LA
+10583; C; 105AA; # VITHKUQI CAPITAL LETTER LLA
+10584; C; 105AB; # VITHKUQI CAPITAL LETTER ME
+10585; C; 105AC; # VITHKUQI CAPITAL LETTER NE
+10586; C; 105AD; # VITHKUQI CAPITAL LETTER NJE
+10587; C; 105AE; # VITHKUQI CAPITAL LETTER O
+10588; C; 105AF; # VITHKUQI CAPITAL LETTER PE
+10589; C; 105B0; # VITHKUQI CAPITAL LETTER QA
+1058A; C; 105B1; # VITHKUQI CAPITAL LETTER RE
+1058C; C; 105B3; # VITHKUQI CAPITAL LETTER SE
+1058D; C; 105B4; # VITHKUQI CAPITAL LETTER SHE
+1058E; C; 105B5; # VITHKUQI CAPITAL LETTER TE
+1058F; C; 105B6; # VITHKUQI CAPITAL LETTER THE
+10590; C; 105B7; # VITHKUQI CAPITAL LETTER U
+10591; C; 105B8; # VITHKUQI CAPITAL LETTER VE
+10592; C; 105B9; # VITHKUQI CAPITAL LETTER XE
+10594; C; 105BB; # VITHKUQI CAPITAL LETTER Y
+10595; C; 105BC; # VITHKUQI CAPITAL LETTER ZE
+10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A
+10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA
+10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB
+10C83; C; 10CC3; # OLD HUNGARIAN CAPITAL LETTER AMB
+10C84; C; 10CC4; # OLD HUNGARIAN CAPITAL LETTER EC
+10C85; C; 10CC5; # OLD HUNGARIAN CAPITAL LETTER ENC
+10C86; C; 10CC6; # OLD HUNGARIAN CAPITAL LETTER ECS
+10C87; C; 10CC7; # OLD HUNGARIAN CAPITAL LETTER ED
+10C88; C; 10CC8; # OLD HUNGARIAN CAPITAL LETTER AND
+10C89; C; 10CC9; # OLD HUNGARIAN CAPITAL LETTER E
+10C8A; C; 10CCA; # OLD HUNGARIAN CAPITAL LETTER CLOSE E
+10C8B; C; 10CCB; # OLD HUNGARIAN CAPITAL LETTER EE
+10C8C; C; 10CCC; # OLD HUNGARIAN CAPITAL LETTER EF
+10C8D; C; 10CCD; # OLD HUNGARIAN CAPITAL LETTER EG
+10C8E; C; 10CCE; # OLD HUNGARIAN CAPITAL LETTER EGY
+10C8F; C; 10CCF; # OLD HUNGARIAN CAPITAL LETTER EH
+10C90; C; 10CD0; # OLD HUNGARIAN CAPITAL LETTER I
+10C91; C; 10CD1; # OLD HUNGARIAN CAPITAL LETTER II
+10C92; C; 10CD2; # OLD HUNGARIAN CAPITAL LETTER EJ
+10C93; C; 10CD3; # OLD HUNGARIAN CAPITAL LETTER EK
+10C94; C; 10CD4; # OLD HUNGARIAN CAPITAL LETTER AK
+10C95; C; 10CD5; # OLD HUNGARIAN CAPITAL LETTER UNK
+10C96; C; 10CD6; # OLD HUNGARIAN CAPITAL LETTER EL
+10C97; C; 10CD7; # OLD HUNGARIAN CAPITAL LETTER ELY
+10C98; C; 10CD8; # OLD HUNGARIAN CAPITAL LETTER EM
+10C99; C; 10CD9; # OLD HUNGARIAN CAPITAL LETTER EN
+10C9A; C; 10CDA; # OLD HUNGARIAN CAPITAL LETTER ENY
+10C9B; C; 10CDB; # OLD HUNGARIAN CAPITAL LETTER O
+10C9C; C; 10CDC; # OLD HUNGARIAN CAPITAL LETTER OO
+10C9D; C; 10CDD; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE
+10C9E; C; 10CDE; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE
+10C9F; C; 10CDF; # OLD HUNGARIAN CAPITAL LETTER OEE
+10CA0; C; 10CE0; # OLD HUNGARIAN CAPITAL LETTER EP
+10CA1; C; 10CE1; # OLD HUNGARIAN CAPITAL LETTER EMP
+10CA2; C; 10CE2; # OLD HUNGARIAN CAPITAL LETTER ER
+10CA3; C; 10CE3; # OLD HUNGARIAN CAPITAL LETTER SHORT ER
+10CA4; C; 10CE4; # OLD HUNGARIAN CAPITAL LETTER ES
+10CA5; C; 10CE5; # OLD HUNGARIAN CAPITAL LETTER ESZ
+10CA6; C; 10CE6; # OLD HUNGARIAN CAPITAL LETTER ET
+10CA7; C; 10CE7; # OLD HUNGARIAN CAPITAL LETTER ENT
+10CA8; C; 10CE8; # OLD HUNGARIAN CAPITAL LETTER ETY
+10CA9; C; 10CE9; # OLD HUNGARIAN CAPITAL LETTER ECH
+10CAA; C; 10CEA; # OLD HUNGARIAN CAPITAL LETTER U
+10CAB; C; 10CEB; # OLD HUNGARIAN CAPITAL LETTER UU
+10CAC; C; 10CEC; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE
+10CAD; C; 10CED; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE
+10CAE; C; 10CEE; # OLD HUNGARIAN CAPITAL LETTER EV
+10CAF; C; 10CEF; # OLD HUNGARIAN CAPITAL LETTER EZ
+10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS
+10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN
+10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US
+118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA
+118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A
+118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI
+118A3; C; 118C3; # WARANG CITI CAPITAL LETTER YU
+118A4; C; 118C4; # WARANG CITI CAPITAL LETTER YA
+118A5; C; 118C5; # WARANG CITI CAPITAL LETTER YO
+118A6; C; 118C6; # WARANG CITI CAPITAL LETTER II
+118A7; C; 118C7; # WARANG CITI CAPITAL LETTER UU
+118A8; C; 118C8; # WARANG CITI CAPITAL LETTER E
+118A9; C; 118C9; # WARANG CITI CAPITAL LETTER O
+118AA; C; 118CA; # WARANG CITI CAPITAL LETTER ANG
+118AB; C; 118CB; # WARANG CITI CAPITAL LETTER GA
+118AC; C; 118CC; # WARANG CITI CAPITAL LETTER KO
+118AD; C; 118CD; # WARANG CITI CAPITAL LETTER ENY
+118AE; C; 118CE; # WARANG CITI CAPITAL LETTER YUJ
+118AF; C; 118CF; # WARANG CITI CAPITAL LETTER UC
+118B0; C; 118D0; # WARANG CITI CAPITAL LETTER ENN
+118B1; C; 118D1; # WARANG CITI CAPITAL LETTER ODD
+118B2; C; 118D2; # WARANG CITI CAPITAL LETTER TTE
+118B3; C; 118D3; # WARANG CITI CAPITAL LETTER NUNG
+118B4; C; 118D4; # WARANG CITI CAPITAL LETTER DA
+118B5; C; 118D5; # WARANG CITI CAPITAL LETTER AT
+118B6; C; 118D6; # WARANG CITI CAPITAL LETTER AM
+118B7; C; 118D7; # WARANG CITI CAPITAL LETTER BU
+118B8; C; 118D8; # WARANG CITI CAPITAL LETTER PU
+118B9; C; 118D9; # WARANG CITI CAPITAL LETTER HIYO
+118BA; C; 118DA; # WARANG CITI CAPITAL LETTER HOLO
+118BB; C; 118DB; # WARANG CITI CAPITAL LETTER HORR
+118BC; C; 118DC; # WARANG CITI CAPITAL LETTER HAR
+118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU
+118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII
+118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO
+16E40; C; 16E60; # MEDEFAIDRIN CAPITAL LETTER M
+16E41; C; 16E61; # MEDEFAIDRIN CAPITAL LETTER S
+16E42; C; 16E62; # MEDEFAIDRIN CAPITAL LETTER V
+16E43; C; 16E63; # MEDEFAIDRIN CAPITAL LETTER W
+16E44; C; 16E64; # MEDEFAIDRIN CAPITAL LETTER ATIU
+16E45; C; 16E65; # MEDEFAIDRIN CAPITAL LETTER Z
+16E46; C; 16E66; # MEDEFAIDRIN CAPITAL LETTER KP
+16E47; C; 16E67; # MEDEFAIDRIN CAPITAL LETTER P
+16E48; C; 16E68; # MEDEFAIDRIN CAPITAL LETTER T
+16E49; C; 16E69; # MEDEFAIDRIN CAPITAL LETTER G
+16E4A; C; 16E6A; # MEDEFAIDRIN CAPITAL LETTER F
+16E4B; C; 16E6B; # MEDEFAIDRIN CAPITAL LETTER I
+16E4C; C; 16E6C; # MEDEFAIDRIN CAPITAL LETTER K
+16E4D; C; 16E6D; # MEDEFAIDRIN CAPITAL LETTER A
+16E4E; C; 16E6E; # MEDEFAIDRIN CAPITAL LETTER J
+16E4F; C; 16E6F; # MEDEFAIDRIN CAPITAL LETTER E
+16E50; C; 16E70; # MEDEFAIDRIN CAPITAL LETTER B
+16E51; C; 16E71; # MEDEFAIDRIN CAPITAL LETTER C
+16E52; C; 16E72; # MEDEFAIDRIN CAPITAL LETTER U
+16E53; C; 16E73; # MEDEFAIDRIN CAPITAL LETTER YU
+16E54; C; 16E74; # MEDEFAIDRIN CAPITAL LETTER L
+16E55; C; 16E75; # MEDEFAIDRIN CAPITAL LETTER Q
+16E56; C; 16E76; # MEDEFAIDRIN CAPITAL LETTER HP
+16E57; C; 16E77; # MEDEFAIDRIN CAPITAL LETTER NY
+16E58; C; 16E78; # MEDEFAIDRIN CAPITAL LETTER X
+16E59; C; 16E79; # MEDEFAIDRIN CAPITAL LETTER D
+16E5A; C; 16E7A; # MEDEFAIDRIN CAPITAL LETTER OE
+16E5B; C; 16E7B; # MEDEFAIDRIN CAPITAL LETTER N
+16E5C; C; 16E7C; # MEDEFAIDRIN CAPITAL LETTER R
+16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O
+16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI
+16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y
+1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF
+1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI
+1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM
+1E903; C; 1E925; # ADLAM CAPITAL LETTER MIIM
+1E904; C; 1E926; # ADLAM CAPITAL LETTER BA
+1E905; C; 1E927; # ADLAM CAPITAL LETTER SINNYIIYHE
+1E906; C; 1E928; # ADLAM CAPITAL LETTER PE
+1E907; C; 1E929; # ADLAM CAPITAL LETTER BHE
+1E908; C; 1E92A; # ADLAM CAPITAL LETTER RA
+1E909; C; 1E92B; # ADLAM CAPITAL LETTER E
+1E90A; C; 1E92C; # ADLAM CAPITAL LETTER FA
+1E90B; C; 1E92D; # ADLAM CAPITAL LETTER I
+1E90C; C; 1E92E; # ADLAM CAPITAL LETTER O
+1E90D; C; 1E92F; # ADLAM CAPITAL LETTER DHA
+1E90E; C; 1E930; # ADLAM CAPITAL LETTER YHE
+1E90F; C; 1E931; # ADLAM CAPITAL LETTER WAW
+1E910; C; 1E932; # ADLAM CAPITAL LETTER NUN
+1E911; C; 1E933; # ADLAM CAPITAL LETTER KAF
+1E912; C; 1E934; # ADLAM CAPITAL LETTER YA
+1E913; C; 1E935; # ADLAM CAPITAL LETTER U
+1E914; C; 1E936; # ADLAM CAPITAL LETTER JIIM
+1E915; C; 1E937; # ADLAM CAPITAL LETTER CHI
+1E916; C; 1E938; # ADLAM CAPITAL LETTER HA
+1E917; C; 1E939; # ADLAM CAPITAL LETTER QAAF
+1E918; C; 1E93A; # ADLAM CAPITAL LETTER GA
+1E919; C; 1E93B; # ADLAM CAPITAL LETTER NYA
+1E91A; C; 1E93C; # ADLAM CAPITAL LETTER TU
+1E91B; C; 1E93D; # ADLAM CAPITAL LETTER NHA
+1E91C; C; 1E93E; # ADLAM CAPITAL LETTER VA
+1E91D; C; 1E93F; # ADLAM CAPITAL LETTER KHA
+1E91E; C; 1E940; # ADLAM CAPITAL LETTER GBE
+1E91F; C; 1E941; # ADLAM CAPITAL LETTER ZAL
+1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO
+1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA
+'''
+_map = dict(
+    (unichr(int(from_, 16)), ''.join((unichr(int(v, 16)) for v in to_.split(' '))))
+    for from_, type_, to_, _ in (
+        l.split('; ', 3) for l in _map_str.splitlines() if l)
+    if type_ in ('C', 'F'))
+del _map_str
+
+def casefold(s):
+    assert isinstance(s, compat_str)
+    return ''.join((_map.get(c, c) for c in s))
+
+__all__ = [
+    casefold
+]

From f102e3dc4efe27e6c8697d6d117f05d1bb6d1b91 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 31 Oct 2022 21:27:14 +0000
Subject: [PATCH 140/312] [compat] Add compat_casefold and compat_re_Match, for
 traverse_obj() port

---
 youtube_dl/compat.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index eca6d63de..4b5e1f6ed 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2375,8 +2375,10 @@ except ImportError:
 
 try:
     compat_str = unicode  # Python 2
+    from .casefold import casefold as compat_casefold
 except NameError:
     compat_str = str
+    compat_casefold = lambda s: s.casefold()
 
 try:
     from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
@@ -3066,6 +3068,9 @@ except ImportError:
 
 # Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
 compat_re_Pattern = type(re.compile(''))
+# and on the type of a match
+compat_re_Match = type(re.match('a', 'a'))
+
 
 if sys.version_info < (3, 3):
     def compat_b64decode(s, *args, **kwargs):
@@ -3101,6 +3106,7 @@ __all__ = [
     'compat_Struct',
     'compat_b64decode',
     'compat_basestring',
+    'compat_casefold',
     'compat_chr',
     'compat_collections_abc',
     'compat_collections_chain_map',
@@ -3132,6 +3138,7 @@ __all__ = [
     'compat_os_name',
     'compat_parse_qs',
     'compat_print',
+    'compat_re_Match',
     'compat_re_Pattern',
     'compat_realpath',
     'compat_setenv',

From b7c25959f0f76aad4ee24e254f82e6c5cca2c1ff Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 1 Nov 2022 12:40:23 +0000
Subject: [PATCH 141/312] [compat] Unify unicode/str compat and move up

---
 youtube_dl/compat.py | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 4b5e1f6ed..28942a8c1 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -21,6 +21,19 @@ import subprocess
 import sys
 import xml.etree.ElementTree
 
+# deal with critical unicode/str things first
+try:
+    # Python 2
+    compat_str, compat_basestring, compat_chr = (
+        unicode, basestring, unichr
+    )
+    from .casefold import casefold as compat_casefold
+except NameError:
+    compat_str, compat_basestring, compat_chr = (
+        str, str, chr
+    )
+    compat_casefold = lambda s: s.casefold()
+
 try:
     import collections.abc as compat_collections_abc
 except ImportError:
@@ -2373,13 +2386,6 @@ try:
 except ImportError:
     import BaseHTTPServer as compat_http_server
 
-try:
-    compat_str = unicode  # Python 2
-    from .casefold import casefold as compat_casefold
-except NameError:
-    compat_str = str
-    compat_casefold = lambda s: s.casefold()
-
 try:
     from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
     from urllib.parse import unquote as compat_urllib_parse_unquote
@@ -2510,22 +2516,11 @@ except ImportError:  # Python < 3.4
 
             return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
 
-try:
-    compat_basestring = basestring  # Python 2
-except NameError:
-    compat_basestring = str
-
-try:
-    compat_chr = unichr  # Python 2
-except NameError:
-    compat_chr = chr
-
 try:
     from xml.etree.ElementTree import ParseError as compat_xml_parse_error
 except ImportError:  # Python 2.6
     from xml.parsers.expat import ExpatError as compat_xml_parse_error
 
-
 etree = xml.etree.ElementTree
 
 

From a874871801b8b05d06e8ffe52bed94fdfc26611e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 1 Nov 2022 19:25:59 +0000
Subject: [PATCH 142/312] [compat] Reformat casefold.py for easier updating

---
 youtube_dl/casefold.py | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/casefold.py b/youtube_dl/casefold.py
index 546269a3c..7e91c3811 100644
--- a/youtube_dl/casefold.py
+++ b/youtube_dl/casefold.py
@@ -1,8 +1,20 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .compat import compat_str
+from .compat import (
+    compat_str,
+    compat_chr,
+)
 
+# Below is included the text of icu/CaseFolding.txt retrieved from
+# https://github.com/unicode-org/icu/blob/main/icu4c/source/data/unidata/CaseFolding.txt
+# In case newly foldable Unicode characters are defined, paste the new version
+# of the text inside the ''' marks.
+# The text is expected to have only blank lines andlines with 1st character #,
+# all ignored, and fold definitions like this:
+# `from_hex_code; space_separated_to_hex_code_list; comment`
+
+_map_str = '''
 # CaseFolding-15.0.0.txt
 # Date: 2022-02-02, 23:35:35 GMT
 # © 2022 Unicode®, Inc.
@@ -65,7 +77,6 @@ from .compat import compat_str
 #  have the value C for the status field, and the code point itself for the mapping field.
 
 # =================================================================
-_map_str = '''
 0041; C; 0061; # LATIN CAPITAL LETTER A
 0042; C; 0062; # LATIN CAPITAL LETTER B
 0043; C; 0063; # LATIN CAPITAL LETTER C
@@ -1627,17 +1638,22 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
 1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO
 1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA
 '''
+
+_parse_unichr = lambda s: compat_chr(int(s, 16))
+
 _map = dict(
-    (unichr(int(from_, 16)), ''.join((unichr(int(v, 16)) for v in to_.split(' '))))
+    (_parse_unichr(from_), ''.join(map(_parse_unichr, to_.split(' '))))
     for from_, type_, to_, _ in (
-        l.split('; ', 3) for l in _map_str.splitlines() if l)
+        l.split('; ', 3) for l in _map_str.splitlines() if l and not l[0] == '#')
     if type_ in ('C', 'F'))
 del _map_str
 
+
 def casefold(s):
     assert isinstance(s, compat_str)
     return ''.join((_map.get(c, c) for c in s))
 
+
 __all__ = [
     casefold
 ]

From 65ccb0dd4eb52cced7d0e11af021c09dbe2aed4a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 1 Nov 2022 21:33:39 +0000
Subject: [PATCH 143/312] [compat] Add test for compat_casefold()

---
 test/test_compat.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/test/test_compat.py b/test/test_compat.py
index 86ff389fd..05995372a 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -118,9 +118,21 @@ class TestCompat(unittest.TestCase):
 <smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
         compat_etree_fromstring(xml)
 
-    def test_struct_unpack(self):
+    def test_compat_struct_unpack(self):
         self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
 
+    def test_compat_casefold(self):
+        if hasattr(compat_str, 'casefold'):
+            # don't bother to test str.casefold() (again)
+            return
+        # thanks https://bugs.python.org/file24232/casefolding.patch
+        self.assertEqual(compat_casefold('hello'), 'hello')
+        self.assertEqual(compat_casefold('hELlo'), 'hello')
+        self.assertEqual(compat_casefold('ß'), 'ss')
+        self.assertEqual(compat_casefold('ﬁ'), 'fi')
+        self.assertEqual(compat_casefold('\u03a3'), '\u03c3')
+        self.assertEqual(compat_casefold('A\u0345\u03a3'), 'a\u03b9\u03c3')
+
 
 if __name__ == '__main__':
     unittest.main()

From 087ddc237132103859cc00183d8d70bd75c0e44e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 1 Nov 2022 22:47:02 +0000
Subject: [PATCH 144/312] [compat] Add test for compat_casefold()

---
 test/test_compat.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_compat.py b/test/test_compat.py
index 05995372a..0986cff37 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 
 from youtube_dl.compat import (
+    compat_casefold,
     compat_getenv,
     compat_setenv,
     compat_etree_Element,

From c4b19a88169fa76c5eb665d274e7270a0fe452c4 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 2 Nov 2022 11:56:26 +0000
Subject: [PATCH 145/312] [compat] Work around in case folding for narrow
 Python build

Resolves #31324.
---
 youtube_dl/casefold.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/casefold.py b/youtube_dl/casefold.py
index 7e91c3811..748c2d491 100644
--- a/youtube_dl/casefold.py
+++ b/youtube_dl/casefold.py
@@ -1639,7 +1639,15 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
 1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA
 '''
 
-_parse_unichr = lambda s: compat_chr(int(s, 16))
+
+def _parse_unichr(s):
+    s = int(s, 16)
+    try:
+        return compat_chr(s)
+    except ValueError:
+        # work around "unichr() arg not in range(0x10000) (narrow Python build)"
+        return ('\\U%08x' % s).decode('unicode-escape')
+
 
 _map = dict(
     (_parse_unichr(from_), ''.join(map(_parse_unichr, to_.split(' '))))

From 27ed77aabba8c9eb08d66f34092b1bfcc22c482e Mon Sep 17 00:00:00 2001
From: Andrei Lebedev <lebdron@gmail.com>
Date: Thu, 3 Nov 2022 11:09:37 +0100
Subject: [PATCH 146/312] [utils] Backport traverse_obj (etc) from yt-dlp
 (#31156)

* Backport traverse_obj and closely related function from yt-dlp (code by pukkandan)
* Backport LazyList, variadic(), try_call (code by pukkandan)
* Recast using yt-dlp's newer traverse_obj() implementation and tests (code by grub4k)
* Add tests for Unicode case folding support matching Py3.5+ (requires f102e3d)
* Improve/add tests for variadic, try_call, join_nonempty

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 test/test_utils.py  | 323 +++++++++++++++++++++++++++++++++++++++++
 youtube_dl/utils.py | 339 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 662 insertions(+)

diff --git a/test/test_utils.py b/test/test_utils.py
index f1a748dde..9d364c863 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -12,7 +12,9 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 # Various small unit tests
 import io
+import itertools
 import json
+import re
 import xml.etree.ElementTree
 
 from youtube_dl.utils import (
@@ -40,11 +42,14 @@ from youtube_dl.utils import (
     get_element_by_attribute,
     get_elements_by_class,
     get_elements_by_attribute,
+    get_first,
     InAdvancePagedList,
     int_or_none,
     intlist_to_bytes,
     is_html,
+    join_nonempty,
     js_to_json,
+    LazyList,
     limit_length,
     merge_dicts,
     mimetype2ext,
@@ -79,6 +84,8 @@ from youtube_dl.utils import (
     strip_or_none,
     subtitles_filename,
     timeconvert,
+    traverse_obj,
+    try_call,
     unescapeHTML,
     unified_strdate,
     unified_timestamp,
@@ -92,6 +99,7 @@ from youtube_dl.utils import (
     urlencode_postdata,
     urshift,
     update_url_query,
+    variadic,
     version_tuple,
     xpath_with_ns,
     xpath_element,
@@ -112,12 +120,18 @@ from youtube_dl.compat import (
     compat_getenv,
     compat_os_name,
     compat_setenv,
+    compat_str,
     compat_urlparse,
     compat_parse_qs,
 )
 
 
 class TestUtil(unittest.TestCase):
+
+    # yt-dlp shim
+    def assertCountEqual(self, expected, got, msg='count should be the same'):
+        return self.assertEqual(len(tuple(expected)), len(tuple(got)), msg=msg)
+
     def test_timeconvert(self):
         self.assertTrue(timeconvert('') is None)
         self.assertTrue(timeconvert('bougrg') is None)
@@ -1478,6 +1492,315 @@ Line 1
         self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
         self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
 
+    def test_LazyList(self):
+        it = list(range(10))
+
+        self.assertEqual(list(LazyList(it)), it)
+        self.assertEqual(LazyList(it).exhaust(), it)
+        self.assertEqual(LazyList(it)[5], it[5])
+
+        self.assertEqual(LazyList(it)[5:], it[5:])
+        self.assertEqual(LazyList(it)[:5], it[:5])
+        self.assertEqual(LazyList(it)[::2], it[::2])
+        self.assertEqual(LazyList(it)[1::2], it[1::2])
+        self.assertEqual(LazyList(it)[5::-1], it[5::-1])
+        self.assertEqual(LazyList(it)[6:2:-2], it[6:2:-2])
+        self.assertEqual(LazyList(it)[::-1], it[::-1])
+
+        self.assertTrue(LazyList(it))
+        self.assertFalse(LazyList(range(0)))
+        self.assertEqual(len(LazyList(it)), len(it))
+        self.assertEqual(repr(LazyList(it)), repr(it))
+        self.assertEqual(compat_str(LazyList(it)), compat_str(it))
+
+        self.assertEqual(list(LazyList(it, reverse=True)), it[::-1])
+        self.assertEqual(list(reversed(LazyList(it))[::-1]), it)
+        self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7])
+
+    def test_LazyList_laziness(self):
+
+        def test(ll, idx, val, cache):
+            self.assertEqual(ll[idx], val)
+            self.assertEqual(ll._cache, list(cache))
+
+        ll = LazyList(range(10))
+        test(ll, 0, 0, range(1))
+        test(ll, 5, 5, range(6))
+        test(ll, -3, 7, range(10))
+
+        ll = LazyList(range(10), reverse=True)
+        test(ll, -1, 0, range(1))
+        test(ll, 3, 6, range(10))
+
+        ll = LazyList(itertools.count())
+        test(ll, 10, 10, range(11))
+        ll = reversed(ll)
+        test(ll, -15, 14, range(15))
+
+    def test_try_call(self):
+        def total(*x, **kwargs):
+            return sum(x) + sum(kwargs.values())
+
+        self.assertEqual(try_call(None), None,
+                         msg='not a fn should give None')
+        self.assertEqual(try_call(lambda: 1), 1,
+                         msg='int fn with no expected_type should give int')
+        self.assertEqual(try_call(lambda: 1, expected_type=int), 1,
+                         msg='int fn with expected_type int should give int')
+        self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
+                         msg='int fn with wrong expected_type should give None')
+        self.assertEqual(try_call(total, args=(0, 1, 0, ), expected_type=int), 1,
+                         msg='fn should accept arglist')
+        self.assertEqual(try_call(total, kwargs={'a': 0, 'b': 1, 'c': 0}, expected_type=int), 1,
+                         msg='fn should accept kwargs')
+        self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
+                         msg='int fn with no expected_type should give None')
+        self.assertEqual(try_call(lambda x: {}, total, args=(42, ), expected_type=int), 42,
+                         msg='expect first int result with expected_type int')
+
+    def test_variadic(self):
+        self.assertEqual(variadic(None), (None, ))
+        self.assertEqual(variadic('spam'), ('spam', ))
+        self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
+
+    def test_traverse_obj(self):
+        _TEST_DATA = {
+            100: 100,
+            1.2: 1.2,
+            'str': 'str',
+            'None': None,
+            '...': Ellipsis,
+            'urls': [
+                {'index': 0, 'url': 'https://www.example.com/0'},
+                {'index': 1, 'url': 'https://www.example.com/1'},
+            ],
+            'data': (
+                {'index': 2},
+                {'index': 3},
+            ),
+            'dict': {},
+        }
+
+        # Test base functionality
+        self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
+                         msg='allow tuple path')
+        self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
+                         msg='allow list path')
+        self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
+                         msg='allow iterable path')
+        self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
+                         msg='single items should be treated as a path')
+        self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
+        self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
+        self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
+
+        # Test Ellipsis behavior
+        self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
+                              (item for item in _TEST_DATA.values() if item is not None),
+                              msg='`...` should give all values except `None`')
+        self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
+                              msg='`...` selection for dicts should select all values')
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
+                         ['https://www.example.com/0', 'https://www.example.com/1'],
+                         msg='nested `...` queries should work')
+        self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), range(4),
+                              msg='`...` query result should be flattened')
+
+        # Test function as key
+        self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
+                         [_TEST_DATA['urls']],
+                         msg='function as query key should perform a filter based on (key, value)')
+        self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), {'str'},
+                              msg='exceptions in the query function should be caught')
+
+        # Test alternative paths
+        self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
+                         msg='multiple `paths` should be treated as alternative paths')
+        self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
+                         msg='alternatives should exit early')
+        self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
+                         msg='alternatives should return `default` if exhausted')
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, 'fail'), 100), 100,
+                         msg='alternatives should track their own branching return')
+        self.assertEqual(traverse_obj(_TEST_DATA, ('dict', Ellipsis), ('data', Ellipsis)), list(_TEST_DATA['data']),
+                         msg='alternatives on empty objects should search further')
+
+        # Test branch and path nesting
+        self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
+                         msg='tuple as key should be treated as branches')
+        self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
+                         msg='list as key should be treated as branches')
+        self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
+                         msg='double nesting in path should be treated as paths')
+        self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
+                         msg='do not fail early on branching')
+        self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
+                              ['https://www.example.com/0', 'https://www.example.com/1'],
+                              msg='triple nesting in path should be treated as branches')
+        self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (Ellipsis, 'url')))),
+                         ['https://www.example.com/0', 'https://www.example.com/1'],
+                         msg='ellipsis as branch path start gets flattened')
+
+        # Test dictionary as key
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
+                         msg='dict key should result in a dict with the same keys')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
+                         {0: 'https://www.example.com/0'},
+                         msg='dict key should allow paths')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
+                         {0: ['https://www.example.com/0']},
+                         msg='tuple in dict path should be treated as branches')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
+                         {0: ['https://www.example.com/0']},
+                         msg='double nesting in dict path should be treated as paths')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
+                         {0: ['https://www.example.com/1', 'https://www.example.com/0']},
+                         msg='triple nesting in dict path should be treated as branches')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
+                         msg='remove `None` values when dict key')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
+                         msg='do not remove `None` values if `default`')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {0: {}},
+                         msg='do not remove empty values when dict key')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: {}},
+                         msg='do not remove empty values when dict key and a default')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {0: []},
+                         msg='if branch in dict key not successful, return `[]`')
+
+        # Testing default parameter behavior
+        _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
+                         msg='default value should be `None`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=Ellipsis), Ellipsis,
+                         msg='chained fails should result in default')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
+                         msg='should not short cirquit on `None`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
+                         msg='invalid dict key should result in `default`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
+                         msg='`None` is a deliberate sentinel and should become `default`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
+                         msg='`IndexError` should result in `default`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=1), 1,
+                         msg='if branched but not successful return `default` if defined, not `[]`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=None), None,
+                         msg='if branched but not successful return `default` even if `default` is `None`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail')), [],
+                         msg='if branched but not successful return `[]`, not `default`')
+        self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
+                         msg='if branched but object is empty return `[]`, not `default`')
+
+        # Testing expected_type behavior
+        _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=compat_str), 'str',
+                         msg='accept matching `expected_type` type')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), None,
+                         msg='reject non matching `expected_type` type')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: compat_str(x)), '0',
+                         msg='transform type using type function')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str',
+                                      expected_type=lambda _: 1 / 0), None,
+                         msg='wrap expected_type function in try_call')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=compat_str), ['str'],
+                         msg='eliminate items that expected_type fails on')
+
+        # Test get_all behavior
+        _GET_ALL_DATA = {'key': [0, 1, 2]}
+        self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', Ellipsis), get_all=False), 0,
+                         msg='if not `get_all`, return only first matching value')
+        self.assertEqual(traverse_obj(_GET_ALL_DATA, Ellipsis, get_all=False), [0, 1, 2],
+                         msg='do not overflatten if not `get_all`')
+
+        # Test casesense behavior
+        _CASESENSE_DATA = {
+            'KeY': 'value0',
+            0: {
+                'KeY': 'value1',
+                0: {'KeY': 'value2'},
+            },
+            # FULLWIDTH LATIN CAPITAL LETTER K
+            '\uff2bey': 'value3',
+        }
+        self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
+                         msg='dict keys should be case sensitive unless `casesense`')
+        self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
+                                      casesense=False), 'value0',
+                         msg='allow non matching key case if `casesense`')
+        self.assertEqual(traverse_obj(_CASESENSE_DATA, '\uff4bey',  # FULLWIDTH LATIN SMALL LETTER K
+                                      casesense=False), 'value3',
+                         msg='allow non matching Unicode key case if `casesense`')
+        self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
+                                      casesense=False), ['value1'],
+                         msg='allow non matching key case in branch if `casesense`')
+        self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
+                                      casesense=False), ['value2'],
+                         msg='allow non matching key case in branch path if `casesense`')
+
+        # Test traverse_string behavior
+        _TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
+                         msg='do not traverse into string if not `traverse_string`')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
+                                      _traverse_string=True), 's',
+                         msg='traverse into string if `traverse_string`')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
+                                      _traverse_string=True), '.',
+                         msg='traverse into converted data if `traverse_string`')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
+                                      _traverse_string=True), list('str'),
+                         msg='`...` branching into string should result in list')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
+                                      _traverse_string=True), ['s', 'r'],
+                         msg='branching into string should result in list')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda _, x: x),
+                                      _traverse_string=True), list('str'),
+                         msg='function branching into string should result in list')
+
+        # Test is_user_input behavior
+        _IS_USER_INPUT_DATA = {'range8': list(range(8))}
+        self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'),
+                                      _is_user_input=True), 3,
+                         msg='allow for string indexing if `is_user_input`')
+        self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'),
+                                           _is_user_input=True), tuple(range(8))[3:],
+                              msg='allow for string slice if `is_user_input`')
+        self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'),
+                                           _is_user_input=True), tuple(range(8))[:4:2],
+                              msg='allow step in string slice if `is_user_input`')
+        self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'),
+                                           _is_user_input=True), range(8),
+                              msg='`:` should be treated as `...` if `is_user_input`')
+        with self.assertRaises(TypeError, msg='too many params should result in error'):
+            traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), _is_user_input=True)
+
+        # Test re.Match as input obj
+        mobj = re.match(r'^0(12)(?P<group>3)(4)?$', '0123')
+        self.assertEqual(traverse_obj(mobj, Ellipsis), [x for x in mobj.groups() if x is not None],
+                         msg='`...` on a `re.Match` should give its `groups()`')
+        self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
+                         msg='function on a `re.Match` should give groupno, value starting at 0')
+        self.assertEqual(traverse_obj(mobj, 'group'), '3',
+                         msg='str key on a `re.Match` should give group with that name')
+        self.assertEqual(traverse_obj(mobj, 2), '3',
+                         msg='int key on a `re.Match` should give group with that name')
+        self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
+                         msg='str key on a `re.Match` should respect casesense')
+        self.assertEqual(traverse_obj(mobj, 'fail'), None,
+                         msg='failing str key on a `re.Match` should return `default`')
+        self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
+                         msg='failing str key on a `re.Match` should return `default`')
+        self.assertEqual(traverse_obj(mobj, 8), None,
+                         msg='failing int key on a `re.Match` should return `default`')
+
+    def test_get_first(self):
+        self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
+
+    def test_join_nonempty(self):
+        self.assertEqual(join_nonempty('a', 'b'), 'a-b')
+        self.assertEqual(join_nonempty(
+            'a', 'b', 'c', 'd',
+            from_dict={'a': 'c', 'c': [], 'b': 'd', 'd': None}), 'c-d')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 23a65a81c..e3c3ccff9 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -43,6 +43,7 @@ from .compat import (
     compat_HTTPError,
     compat_basestring,
     compat_chr,
+    compat_collections_abc,
     compat_cookiejar,
     compat_ctypes_WINFUNCTYPE,
     compat_etree_fromstring,
@@ -1685,6 +1686,7 @@ USER_AGENTS = {
 
 
 NO_DEFAULT = object()
+IDENTITY = lambda x: x
 
 ENGLISH_MONTH_NAMES = [
     'January', 'February', 'March', 'April', 'May', 'June',
@@ -3867,6 +3869,105 @@ def detect_exe_version(output, version_re=None, unrecognized='present'):
         return unrecognized
 
 
+class LazyList(compat_collections_abc.Sequence):
+    """Lazy immutable list from an iterable
+    Note that slices of a LazyList are lists and not LazyList"""
+
+    class IndexError(IndexError):
+        def __init__(self, cause=None):
+            if cause:
+                # reproduce `raise from`
+                self.__cause__ = cause
+            super(IndexError, self).__init__()
+
+    def __init__(self, iterable, **kwargs):
+        # kwarg-only
+        reverse = kwargs.get('reverse', False)
+        _cache = kwargs.get('_cache')
+
+        self._iterable = iter(iterable)
+        self._cache = [] if _cache is None else _cache
+        self._reversed = reverse
+
+    def __iter__(self):
+        if self._reversed:
+            # We need to consume the entire iterable to iterate in reverse
+            for item in self.exhaust():
+                yield item
+            return
+        for item in self._cache:
+            yield item
+        for item in self._iterable:
+            self._cache.append(item)
+            yield item
+
+    def _exhaust(self):
+        self._cache.extend(self._iterable)
+        self._iterable = []  # Discard the emptied iterable to make it pickle-able
+        return self._cache
+
+    def exhaust(self):
+        """Evaluate the entire iterable"""
+        return self._exhaust()[::-1 if self._reversed else 1]
+
+    @staticmethod
+    def _reverse_index(x):
+        return None if x is None else ~x
+
+    def __getitem__(self, idx):
+        if isinstance(idx, slice):
+            if self._reversed:
+                idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
+            start, stop, step = idx.start, idx.stop, idx.step or 1
+        elif isinstance(idx, int):
+            if self._reversed:
+                idx = self._reverse_index(idx)
+            start, stop, step = idx, idx, 0
+        else:
+            raise TypeError('indices must be integers or slices')
+        if ((start or 0) < 0 or (stop or 0) < 0
+                or (start is None and step < 0)
+                or (stop is None and step > 0)):
+            # We need to consume the entire iterable to be able to slice from the end
+            # Obviously, never use this with infinite iterables
+            self._exhaust()
+            try:
+                return self._cache[idx]
+            except IndexError as e:
+                raise self.IndexError(e)
+        n = max(start or 0, stop or 0) - len(self._cache) + 1
+        if n > 0:
+            self._cache.extend(itertools.islice(self._iterable, n))
+        try:
+            return self._cache[idx]
+        except IndexError as e:
+            raise self.IndexError(e)
+
+    def __bool__(self):
+        try:
+            self[-1] if self._reversed else self[0]
+        except self.IndexError:
+            return False
+        return True
+
+    def __len__(self):
+        self._exhaust()
+        return len(self._cache)
+
+    def __reversed__(self):
+        return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
+
+    def __copy__(self):
+        return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
+
+    def __repr__(self):
+        # repr and str should mimic a list. So we exhaust the iterable
+        return repr(self.exhaust())
+
+    def __str__(self):
+        return repr(self.exhaust())
+
+
 class PagedList(object):
     def __len__(self):
         # This is only useful for tests
@@ -4092,6 +4193,10 @@ def multipart_encode(data, boundary=None):
     return out, content_type
 
 
+def variadic(x, allowed_types=(compat_str, bytes, dict)):
+    return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,)
+
+
 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
     if isinstance(key_or_keys, (list, tuple)):
         for key in key_or_keys:
@@ -4102,6 +4207,23 @@ def dict_get(d, key_or_keys, default=None, skip_false_values=True):
     return d.get(key_or_keys, default)
 
 
+def try_call(*funcs, **kwargs):
+
+    # parameter defaults
+    expected_type = kwargs.get('expected_type')
+    fargs = kwargs.get('args', [])
+    fkwargs = kwargs.get('kwargs', {})
+
+    for f in funcs:
+        try:
+            val = f(*fargs, **fkwargs)
+        except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
+            pass
+        else:
+            if expected_type is None or isinstance(val, expected_type):
+                return val
+
+
 def try_get(src, getter, expected_type=None):
     if not isinstance(getter, (list, tuple)):
         getter = [getter]
@@ -5835,3 +5957,220 @@ def clean_podcast_url(url):
                 st\.fm # https://podsights.com/docs/
             )/e
         )/''', '', url)
+
+
+def traverse_obj(obj, *paths, **kwargs):
+    """
+    Safely traverse nested `dict`s and `Sequence`s
+
+    >>> obj = [{}, {"key": "value"}]
+    >>> traverse_obj(obj, (1, "key"))
+    "value"
+
+    Each of the provided `paths` is tested and the first producing a valid result will be returned.
+    The next path will also be tested if the path branched but no results could be found.
+    Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
+    A value of None is treated as the absence of a value.
+
+    The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
+
+    The keys in the path can be one of:
+        - `None`:           Return the current object.
+        - `str`/`int`:      Return `obj[key]`. For `re.Match, return `obj.group(key)`.
+        - `slice`:          Branch out and return all values in `obj[key]`.
+        - `Ellipsis`:       Branch out and return a list of all values.
+        - `tuple`/`list`:   Branch out and return a list of all matching values.
+                            Read as: `[traverse_obj(obj, branch) for branch in branches]`.
+        - `function`:       Branch out and return values filtered by the function.
+                            Read as: `[value for key, value in obj if function(key, value)]`.
+                            For `Sequence`s, `key` is the index of the value.
+        - `dict`            Transform the current object and return a matching dict.
+                            Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
+
+        `tuple`, `list`, and `dict` all support nested paths and branches.
+
+    @params paths           Paths which to traverse by.
+    Keyword arguments:
+    @param default          Value to return if the paths do not match.
+    @param expected_type    If a `type`, only accept final values of this type.
+                            If any other callable, try to call the function on each result.
+    @param get_all          If `False`, return the first matching result, otherwise all matching ones.
+    @param casesense        If `False`, consider string dictionary keys as case insensitive.
+
+    The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
+
+    @param _is_user_input    Whether the keys are generated from user input.
+                            If `True` strings get converted to `int`/`slice` if needed.
+    @param _traverse_string  Whether to traverse into objects as strings.
+                            If `True`, any non-compatible object will first be
+                            converted into a string and then traversed into.
+
+
+    @returns                The result of the object traversal.
+                            If successful, `get_all=True`, and the path branches at least once,
+                            then a list of results is returned instead.
+                            A list is always returned if the last path branches and no `default` is given.
+    """
+
+    # parameter defaults
+    default = kwargs.get('default', NO_DEFAULT)
+    expected_type = kwargs.get('expected_type')
+    get_all = kwargs.get('get_all', True)
+    casesense = kwargs.get('casesense', True)
+    _is_user_input = kwargs.get('_is_user_input', False)
+    _traverse_string = kwargs.get('_traverse_string', False)
+
+    # instant compat
+    str = compat_str
+
+    is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes))
+    # stand-in until compat_re_Match is added
+    compat_re_Match = type(re.match('a', 'a'))
+    # stand-in until casefold.py is added
+    try:
+        ''.casefold()
+        compat_casefold = lambda s: s.casefold()
+    except AttributeError:
+        compat_casefold = lambda s: s.lower()
+    casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k
+
+    if isinstance(expected_type, type):
+        type_test = lambda val: val if isinstance(val, expected_type) else None
+    else:
+        type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
+
+    def from_iterable(iterables):
+        # chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
+        for it in iterables:
+            for item in it:
+                yield item
+
+    def apply_key(key, obj):
+        if obj is None:
+            return
+
+        elif key is None:
+            yield obj
+
+        elif isinstance(key, (list, tuple)):
+            for branch in key:
+                _, result = apply_path(obj, branch)
+                for item in result:
+                    yield item
+
+        elif key is Ellipsis:
+            result = []
+            if isinstance(obj, compat_collections_abc.Mapping):
+                result = obj.values()
+            elif is_sequence(obj):
+                result = obj
+            elif isinstance(obj, compat_re_Match):
+                result = obj.groups()
+            elif _traverse_string:
+                result = str(obj)
+            for item in result:
+                yield item
+
+        elif callable(key):
+            if is_sequence(obj):
+                iter_obj = enumerate(obj)
+            elif isinstance(obj, compat_collections_abc.Mapping):
+                iter_obj = obj.items()
+            elif isinstance(obj, compat_re_Match):
+                iter_obj = enumerate(itertools.chain([obj.group()], obj.groups()))
+            elif _traverse_string:
+                iter_obj = enumerate(str(obj))
+            else:
+                return
+            for item in (v for k, v in iter_obj if try_call(key, args=(k, v))):
+                yield item
+
+        elif isinstance(key, dict):
+            iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
+            yield dict((k, v if v is not None else default) for k, v in iter_obj
+                       if v is not None or default is not NO_DEFAULT)
+
+        elif isinstance(obj, compat_collections_abc.Mapping):
+            yield (obj.get(key) if casesense or (key in obj)
+                   else next((v for k, v in obj.items() if casefold(k) == key), None))
+
+        elif isinstance(obj, compat_re_Match):
+            if isinstance(key, int) or casesense:
+                try:
+                    yield obj.group(key)
+                    return
+                except IndexError:
+                    pass
+            if not isinstance(key, str):
+                return
+
+            yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
+
+        else:
+            if _is_user_input:
+                key = (int_or_none(key) if ':' not in key
+                       else slice(*map(int_or_none, key.split(':'))))
+
+            if not isinstance(key, (int, slice)):
+                return
+
+            if not is_sequence(obj):
+                if not _traverse_string:
+                    return
+                obj = str(obj)
+
+            try:
+                yield obj[key]
+            except IndexError:
+                pass
+
+    def apply_path(start_obj, path):
+        objs = (start_obj,)
+        has_branched = False
+
+        for key in variadic(path):
+            if _is_user_input and key == ':':
+                key = Ellipsis
+
+            if not casesense and isinstance(key, str):
+                key = compat_casefold(key)
+
+            if key is Ellipsis or isinstance(key, (list, tuple)) or callable(key):
+                has_branched = True
+
+            key_func = functools.partial(apply_key, key)
+            objs = from_iterable(map(key_func, objs))
+
+        return has_branched, objs
+
+    def _traverse_obj(obj, path, use_list=True):
+        has_branched, results = apply_path(obj, path)
+        results = LazyList(x for x in map(type_test, results) if x is not None)
+
+        if get_all and has_branched:
+            return results.exhaust() if results or use_list else None
+
+        return results[0] if results else None
+
+    for index, path in enumerate(paths, 1):
+        use_list = default is NO_DEFAULT and index == len(paths)
+        result = _traverse_obj(obj, path, use_list)
+        if result is not None:
+            return result
+
+    return None if default is NO_DEFAULT else default
+
+
+def get_first(obj, keys, **kwargs):
+    return traverse_obj(obj, (Ellipsis,) + tuple(variadic(keys)), get_all=False, **kwargs)
+
+
+def join_nonempty(*values, **kwargs):
+
+    # parameter defaults
+    delim = kwargs.get('delim', '-')
+    from_dict = kwargs.get('from_dict')
+
+    if from_dict is not None:
+        values = (traverse_obj(from_dict, variadic(v)) for v in values)
+    return delim.join(map(compat_str, filter(None, values)))

From de39d1281cea499cb1adfce5ff7e0a56f1bad5fe Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 4 Nov 2022 10:13:07 +0000
Subject: [PATCH 147/312] [extractor/ceskatelevize] Back-port extractor from
 yt-dlp, etc (#30713)

* back-port extractor, removing CeskaTelevizePoradyIE
* follow redirect URL
* support liveBroadcast and videobonusDetail in __NEXT__ data
* return single video for singleton playlist
* fix/add tests
---
 youtube_dl/extractor/ceskatelevize.py | 178 ++++++++++++++------------
 youtube_dl/extractor/extractors.py    |   5 +-
 2 files changed, 96 insertions(+), 87 deletions(-)

diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py
index 7cb4efb74..fe677d8e8 100644
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -12,35 +12,21 @@ from ..utils import (
     ExtractorError,
     float_or_none,
     sanitized_Request,
-    unescapeHTML,
-    update_url_query,
+    str_or_none,
+    traverse_obj,
     urlencode_postdata,
     USER_AGENTS,
 )
 
 
 class CeskaTelevizeIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
+    _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
     _TESTS = [{
-        'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
-        'info_dict': {
-            'id': '61924494877246241',
-            'ext': 'mp4',
-            'title': 'Hyde Park Civilizace: Život v Grónsku',
-            'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626',
-            'thumbnail': r're:^https?://.*\.jpg',
-            'duration': 3350,
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-    }, {
         'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
         'info_dict': {
             'id': '61924494877028507',
             'ext': 'mp4',
-            'title': 'Hyde Park Civilizace: Bonus 01 - En',
+            'title': 'Bonus 01 - En - Hyde Park Civilizace',
             'description': 'English Subtittles',
             'thumbnail': r're:^https?://.*\.jpg',
             'duration': 81.3,
@@ -51,31 +37,111 @@ class CeskaTelevizeIE(InfoExtractor):
         },
     }, {
         # live stream
-        'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
+        'url': 'http://www.ceskatelevize.cz/zive/ct1/',
         'info_dict': {
-            'id': 402,
+            'id': '102',
             'ext': 'mp4',
-            'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+            'title': r'ČT1 - živé vysílání online',
+            'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.',
             'is_live': True,
         },
         'params': {
             # m3u8 download
             'skip_download': True,
         },
-        'skip': 'Georestricted to Czech Republic',
+    }, {
+        # another
+        'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
+        'only_matching': True,
+        'info_dict': {
+            'id': 402,
+            'ext': 'mp4',
+            'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+            'is_live': True,
+        },
+        # 'skip': 'Georestricted to Czech Republic',
     }, {
         'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
         'only_matching': True,
+    }, {
+        # video with 18+ caution trailer
+        'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
+        'info_dict': {
+            'id': '215562210900007-bogotart',
+            'title': 'Bogotart - Queer',
+            'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti',
+        },
+        'playlist': [{
+            'info_dict': {
+                'id': '61924494877311053',
+                'ext': 'mp4',
+                'title': 'Bogotart - Queer (Varování 18+)',
+                'duration': 11.9,
+            },
+        }, {
+            'info_dict': {
+                'id': '61924494877068022',
+                'ext': 'mp4',
+                'title': 'Bogotart - Queer (Queer)',
+                'thumbnail': r're:^https?://.*\.jpg',
+                'duration': 1558.3,
+            },
+        }],
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        # iframe embed
+        'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
+        'only_matching': True,
     }]
 
+    def _search_nextjs_data(self, webpage, video_id, **kw):
+        return self._parse_json(
+            self._search_regex(
+                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
+                webpage, 'next.js data', **kw),
+            video_id, **kw)
+
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
+        webpage, urlh = self._download_webpage_handle(url, playlist_id)
+        parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
+        site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
+        playlist_title = self._og_search_title(webpage, default=None)
+        if site_name and playlist_title:
+            playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
+        playlist_description = self._og_search_description(webpage, default=None)
+        if playlist_description:
+            playlist_description = playlist_description.replace('\xa0', ' ')
 
-        webpage = self._download_webpage(url, playlist_id)
+        type_ = 'IDEC'
+        if re.search(r'(^/porady|/zive)/', parsed_url.path):
+            next_data = self._search_nextjs_data(webpage, playlist_id)
+            if '/zive/' in parsed_url.path:
+                idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False)
+            else:
+                idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
+                if not idec:
+                    idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False)
+                    if idec:
+                        type_ = 'bonus'
+            if not idec:
+                raise ExtractorError('Failed to find IDEC id')
+            iframe_hash = self._download_webpage(
+                'https://www.ceskatelevize.cz/v-api/iframe-hash/',
+                playlist_id, note='Getting IFRAME hash')
+            query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, }
+            webpage = self._download_webpage(
+                'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php',
+                playlist_id, note='Downloading player', query=query)
 
         NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
         if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
-            raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
+            self.raise_geo_restricted(NOT_AVAILABLE_STRING)
+        if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )):
+            raise ExtractorError('no video with IDEC available', video_id=idec, expected=True)
 
         type_ = None
         episode_id = None
@@ -100,7 +166,7 @@ class CeskaTelevizeIE(InfoExtractor):
         data = {
             'playlist[0][type]': type_,
             'playlist[0][id]': episode_id,
-            'requestUrl': compat_urllib_parse_urlparse(url).path,
+            'requestUrl': parsed_url.path,
             'requestSource': 'iVysilani',
         }
 
@@ -108,7 +174,7 @@ class CeskaTelevizeIE(InfoExtractor):
 
         for user_agent in (None, USER_AGENTS['Safari']):
             req = sanitized_Request(
-                'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
+                'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
                 data=urlencode_postdata(data))
 
             req.add_header('Content-type', 'application/x-www-form-urlencoded')
@@ -130,9 +196,6 @@ class CeskaTelevizeIE(InfoExtractor):
             req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
             req.add_header('Referer', url)
 
-            playlist_title = self._og_search_title(webpage, default=None)
-            playlist_description = self._og_search_description(webpage, default=None)
-
             playlist = self._download_json(req, playlist_id, fatal=False)
             if not playlist:
                 continue
@@ -167,7 +230,7 @@ class CeskaTelevizeIE(InfoExtractor):
                     entries[num]['formats'].extend(formats)
                     continue
 
-                item_id = item.get('id') or item['assetId']
+                item_id = str_or_none(item.get('id') or item['assetId'])
                 title = item['title']
 
                 duration = float_or_none(item.get('duration'))
@@ -181,8 +244,6 @@ class CeskaTelevizeIE(InfoExtractor):
 
                 if playlist_len == 1:
                     final_title = playlist_title or title
-                    if is_live:
-                        final_title = self._live_title(final_title)
                 else:
                     final_title = '%s (%s)' % (playlist_title, title)
 
@@ -200,6 +261,8 @@ class CeskaTelevizeIE(InfoExtractor):
         for e in entries:
             self._sort_formats(e['formats'])
 
+        if len(entries) == 1:
+            return entries[0]
         return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
 
     def _get_subtitles(self, episode_id, subs):
@@ -236,54 +299,3 @@ class CeskaTelevizeIE(InfoExtractor):
                     yield line
 
         return '\r\n'.join(_fix_subtitle(subtitles))
-
-
-class CeskaTelevizePoradyIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
-    _TESTS = [{
-        # video with 18+ caution trailer
-        'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
-        'info_dict': {
-            'id': '215562210900007-bogotart',
-            'title': 'Queer: Bogotart',
-            'description': 'Alternativní průvodce současným queer světem',
-        },
-        'playlist': [{
-            'info_dict': {
-                'id': '61924494876844842',
-                'ext': 'mp4',
-                'title': 'Queer: Bogotart (Varování 18+)',
-                'duration': 10.2,
-            },
-        }, {
-            'info_dict': {
-                'id': '61924494877068022',
-                'ext': 'mp4',
-                'title': 'Queer: Bogotart (Queer)',
-                'thumbnail': r're:^https?://.*\.jpg',
-                'duration': 1558.3,
-            },
-        }],
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-    }, {
-        # iframe embed
-        'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        data_url = update_url_query(unescapeHTML(self._search_regex(
-            (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
-             r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
-            webpage, 'iframe player url', group='url')), query={
-                'autoStart': 'true',
-        })
-
-        return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 751fc38b6..e36f86be4 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -208,10 +208,7 @@ from .ccc import (
 from .ccma import CCMAIE
 from .cctv import CCTVIE
 from .cda import CDAIE
-from .ceskatelevize import (
-    CeskaTelevizeIE,
-    CeskaTelevizePoradyIE,
-)
+from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .charlierose import CharlieRoseIE
 from .chaturbate import ChaturbateIE

From 47e70fff8ba3de769a31fab0b3572162094733f7 Mon Sep 17 00:00:00 2001
From: Moises Lima <mozlima@users.noreply.github.com>
Date: Wed, 9 Nov 2022 17:26:30 -0300
Subject: [PATCH 148/312] [PeekVids, PlayVids] Add new extractor (#29765)

* Merge back-port from yt-dlp
* Merge features from PR #29798
* Improve metadata extraction

Co-authored-by: dirkf <fieldhouse@gmx.net>
Co-authored by: AXDOOMER
---
 youtube_dl/extractor/extractors.py |   4 +
 youtube_dl/extractor/peekvids.py   | 193 +++++++++++++++++++++++++++++
 2 files changed, 197 insertions(+)
 create mode 100644 youtube_dl/extractor/peekvids.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index e36f86be4..4d9f37424 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -909,6 +909,10 @@ from .parliamentliveuk import ParliamentLiveUKIE
 from .patreon import PatreonIE
 from .pbs import PBSIE
 from .pearvideo import PearVideoIE
+from .peekvids import (
+    PeekVidsIE,
+    PlayVidsIE,
+)
 from .peertube import PeerTubeIE
 from .people import PeopleIE
 from .performgroup import PerformGroupIE
diff --git a/youtube_dl/extractor/peekvids.py b/youtube_dl/extractor/peekvids.py
new file mode 100644
index 000000000..c8aad564b
--- /dev/null
+++ b/youtube_dl/extractor/peekvids.py
@@ -0,0 +1,193 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    get_element_by_class,
+    int_or_none,
+    merge_dicts,
+    url_or_none,
+)
+
+
+class PeekVidsIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?peekvids\.com/
+        (?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=)
+        (?P<id>[^/?&#]*)
+    '''
+    _TESTS = [{
+        'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd',
+        'md5': '2ff6a357a9717dc9dc9894b51307e9a2',
+        'info_dict': {
+            'id': '1262717',
+            'display_id': 'BSyLMbN0YCd',
+            'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
+            'ext': 'mp4',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
+            'timestamp': 1642579329,
+            'upload_date': '20220119',
+            'duration': 416,
+            'view_count': int,
+            'age_limit': 18,
+            'uploader': 'SEXYhub.com',
+            'categories': list,
+            'tags': list,
+        },
+    }]
+    _DOMAIN = 'www.peekvids.com'
+
+    def _get_detail(self, html):
+        return get_element_by_class('detail-video-block', html)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id, expected_status=429)
+        if '>Rate Limit Exceeded' in webpage:
+            raise ExtractorError(
+                '[%s] %s: %s' % (self.IE_NAME, video_id, 'You are suspected as a bot. Wait, or pass the captcha test on the site and provide --cookies.'),
+                expected=True)
+
+        title = self._html_search_regex(r'(?s)<h1\b[^>]*>(.+?)</h1>', webpage, 'title')
+
+        display_id = video_id
+        video_id = self._search_regex(r'(?s)<video\b[^>]+\bdata-id\s*=\s*["\']?([\w-]+)', webpage, 'short video ID')
+        srcs = self._download_json(
+            'https://%s/v-alt/%s' % (self._DOMAIN, video_id), video_id,
+            note='Downloading list of source files')
+        formats = [{
+            'url': f_url,
+            'format_id': f_id,
+            'height': int_or_none(f_id),
+        } for f_url, f_id in (
+            (url_or_none(f_v), f_match.group(1))
+            for f_v, f_match in (
+                (v, re.match(r'^data-src(\d{3,})$', k))
+                for k, v in srcs.items() if v) if f_match)
+            if f_url
+        ]
+        if not formats:
+            formats = [{'url': url} for url in srcs.values()]
+        self._sort_formats(formats)
+
+        info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={})
+        info.pop('url', None)
+        # may not have found the thumbnail if it was in a list in the ld+json
+        info.setdefault('thumbnail', self._og_search_thumbnail(webpage))
+        detail = self._get_detail(webpage) or ''
+        info['description'] = self._html_search_regex(
+            r'(?s)(.+?)(?:%s\s*<|<ul\b)' % (re.escape(info.get('description', '')), ),
+            detail, 'description', default=None) or None
+        info['title'] = re.sub(r'\s*[,-][^,-]+$', '', info.get('title') or title) or self._generic_title(url)
+
+        def cat_tags(name, html):
+            l = self._html_search_regex(
+                r'(?s)<span\b[^>]*>\s*%s\s*:\s*</span>(.+?)</li>' % (re.escape(name), ),
+                html, name, default='')
+            return [x for x in re.split(r'\s+', l) if x]
+
+        return merge_dicts({
+            'id': video_id,
+            'display_id': display_id,
+            'age_limit': 18,
+            'formats': formats,
+            'categories': cat_tags('Categories', detail),
+            'tags': cat_tags('Tags', detail),
+            'uploader': self._html_search_regex(r'[Uu]ploaded\s+by\s(.+?)"', webpage, 'uploader', default=None),
+        }, info)
+
+
+class PlayVidsIE(PeekVidsIE):
+    _VALID_URL = r'https?://(?:www\.)?playvids\.com/(?:embed/|\w\w?/)?(?P<id>[^/?#]*)'
+    _TESTS = [{
+        'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
+        'md5': '2f12e50213dd65f142175da633c4564c',
+        'info_dict': {
+            'id': '1978030',
+            'display_id': 'U3pBrYhsjXM',
+            'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
+            'ext': 'mp4',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
+            'timestamp': 1640435839,
+            'upload_date': '20211225',
+            'duration': 416,
+            'view_count': int,
+            'age_limit': 18,
+            'uploader': 'SEXYhub.com',
+            'categories': list,
+            'tags': list,
+        },
+    }, {
+        'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.playvids.com/embed/U3pBrYhsjXM',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.playvids.com/bKmGLe3IwjZ/sv/brazzers-800-phone-sex-madison-ivy-always-on-the-line',
+        'md5': 'e783986e596cafbf46411a174ab42ba6',
+        'info_dict': {
+            'id': '762385',
+            'display_id': 'bKmGLe3IwjZ',
+            'ext': 'mp4',
+            'title': 'Brazzers - 1 800 Phone Sex: Madison Ivy Always On The Line 6',
+            'description': 'md5:bdcd2db2b8ad85831a491d7c8605dcef',
+            'timestamp': 1516958544,
+            'upload_date': '20180126',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 480,
+            'uploader': 'Brazzers',
+            'age_limit': 18,
+            'view_count': int,
+            'age_limit': 18,
+            'categories': list,
+            'tags': list,
+        },
+    }, {
+        'url': 'https://www.playvids.com/v/47iUho33toY',
+        'md5': 'b056b5049d34b648c1e86497cf4febce',
+        'info_dict': {
+            'id': '700621',
+            'display_id': '47iUho33toY',
+            'ext': 'mp4',
+            'title': 'KATEE OWEN STRIPTIASE IN SEXY RED LINGERIE',
+            'description': None,
+            'timestamp': 1507052209,
+            'upload_date': '20171003',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 332,
+            'uploader': 'Cacerenele',
+            'age_limit': 18,
+            'view_count': int,
+            'categories': list,
+            'tags': list,
+        }
+    }, {
+        'url': 'https://www.playvids.com/z3_7iwWCmqt/sexy-teen-filipina-striptease-beautiful-pinay-bargirl-strips-and-dances',
+        'md5': 'efa09be9f031314b7b7e3bc6510cd0df',
+        'info_dict': {
+            'id': '1523518',
+            'display_id': 'z3_7iwWCmqt',
+            'ext': 'mp4',
+            'title': 'SEXY TEEN FILIPINA STRIPTEASE - Beautiful Pinay Bargirl Strips and Dances',
+            'description': None,
+            'timestamp': 1607470323,
+            'upload_date': '20201208',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 593,
+            'uploader': 'yorours',
+            'age_limit': 18,
+            'view_count': int,
+            'categories': list,
+            'tags': list,
+        },
+    }]
+    _DOMAIN = 'www.playvids.com'
+
+    def _get_detail(self, html):
+        return get_element_by_class('detail-block', html)

From 604762a9f8fa21de3f7349bd612c4f34941a5d20 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 11 Nov 2022 00:49:13 +0000
Subject: [PATCH 149/312] [common:jwplayer] Improve jwplayer extraction and
 parsing (#31000)

* don't crash parser if jwplayer_data is invalid (empty, or no formats)
* use `label` in `sources[n]` as `format_id`
* relax `jwplayer().setup(...)` RE (also rework PR #27274 enhancement)
* detect more manifest formats in _parse_jwplayer_formats() (from PR #29596)
* improve metadata extraction (from PR #25433)
* remember URLs in a set
* use parse_resolution() in format
* extract filesize in format (from yt-dlp)

Co-authored-by: kikuyan <kikuyan@users.noreply.github.com>
Co-authored-by: martin54 <martin54@users.noreply.github.com>
---
 youtube_dl/extractor/common.py | 40 ++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 1f33a1e06..a0a796d7b 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -70,6 +70,7 @@ from ..utils import (
     str_or_none,
     str_to_int,
     strip_or_none,
+    try_get,
     unescapeHTML,
     unified_strdate,
     unified_timestamp,
@@ -2713,7 +2714,7 @@ class InfoExtractor(object):
 
     def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
         mobj = re.search(
-            r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
+            r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
             webpage)
         if mobj:
             try:
@@ -2734,9 +2735,14 @@ class InfoExtractor(object):
 
     def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
                              m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+        flat_pl = try_get(jwplayer_data, lambda x: x.get('playlist') or True)
+        if flat_pl is None:
+            # not even a dict
+            return []
+
         # JWPlayer backward compatibility: flattened playlists
         # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
-        if 'playlist' not in jwplayer_data:
+        if flat_pl is True:
             jwplayer_data = {'playlist': [jwplayer_data]}
 
         entries = []
@@ -2784,6 +2790,13 @@ class InfoExtractor(object):
                 'timestamp': int_or_none(video_data.get('pubdate')),
                 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
                 'subtitles': subtitles,
+                'alt_title': clean_html(video_data.get('subtitle')),  # attributes used e.g. by Tele5 ...
+                'genre': clean_html(video_data.get('genre')),
+                'channel': clean_html(dict_get(video_data, ('category', 'channel'))),
+                'season_number': int_or_none(video_data.get('season')),
+                'episode_number': int_or_none(video_data.get('episode')),
+                'release_year': int_or_none(video_data.get('releasedate')),
+                'age_limit': int_or_none(video_data.get('age_restriction')),
             }
             # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
             if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
@@ -2792,7 +2805,9 @@ class InfoExtractor(object):
                     'url': formats[0]['url'],
                 })
             else:
-                self._sort_formats(formats)
+                # avoid exception in case of only sttls
+                if formats:
+                    self._sort_formats(formats)
                 entry['formats'] = formats
             entries.append(entry)
         if len(entries) == 1:
@@ -2802,7 +2817,7 @@ class InfoExtractor(object):
 
     def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
                                 m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
-        urls = []
+        urls = set()
         formats = []
         for source in jwplayer_sources_data:
             if not isinstance(source, dict):
@@ -2811,14 +2826,14 @@ class InfoExtractor(object):
                 base_url, self._proto_relative_url(source.get('file')))
             if not source_url or source_url in urls:
                 continue
-            urls.append(source_url)
+            urls.add(source_url)
             source_type = source.get('type') or ''
             ext = mimetype2ext(source_type) or determine_ext(source_url)
-            if source_type == 'hls' or ext == 'm3u8':
+            if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
                 formats.extend(self._extract_m3u8_formats(
                     source_url, video_id, 'mp4', entry_protocol='m3u8_native',
                     m3u8_id=m3u8_id, fatal=False))
-            elif source_type == 'dash' or ext == 'mpd':
+            elif source_type == 'dash' or ext == 'mpd' or 'format=mpd-time-csf' in source_url:
                 formats.extend(self._extract_mpd_formats(
                     source_url, video_id, mpd_id=mpd_id, fatal=False))
             elif ext == 'smil':
@@ -2833,20 +2848,23 @@ class InfoExtractor(object):
                     'ext': ext,
                 })
             else:
+                format_id = str_or_none(source.get('label'))
                 height = int_or_none(source.get('height'))
-                if height is None:
+                if height is None and format_id:
                     # Often no height is provided but there is a label in
                     # format like "1080p", "720p SD", or 1080.
-                    height = int_or_none(self._search_regex(
-                        r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
-                        'height', default=None))
+                    height = parse_resolution(format_id).get('height')
                 a_format = {
                     'url': source_url,
                     'width': int_or_none(source.get('width')),
                     'height': height,
                     'tbr': int_or_none(source.get('bitrate'), scale=1000),
+                    'filesize': int_or_none(source.get('filesize')),
                     'ext': ext,
                 }
+                if format_id:
+                    a_format['format_id'] = format_id
+
                 if source_url.startswith('rtmp'):
                     a_format['ext'] = 'flv'
                     # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as

From c2f9be3e63a000cf20e9e4ad789a4f5453d00eb7 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 12 Nov 2022 11:55:05 +0000
Subject: [PATCH 150/312] [generic] Add KVS player extraction

---
 youtube_dl/extractor/generic.py | 183 ++++++++++++++++++++++++++++++++
 1 file changed, 183 insertions(+)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index a9c064105..01e406750 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -28,6 +28,7 @@ from ..utils import (
     mimetype2ext,
     orderedSet,
     parse_duration,
+    parse_resolution,
     sanitized_Request,
     smuggle_url,
     unescapeHTML,
@@ -2227,6 +2228,97 @@ class GenericIE(InfoExtractor):
             # Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
             'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
             'only_matching': True,
+        }, {
+            # KVS Player
+            'url': 'https://www.kvs-demo.com/videos/105/kelis-4th-of-july/',
+            'info_dict': {
+                'id': '105',
+                'display_id': 'kelis-4th-of-july',
+                'ext': 'mp4',
+                'title': 'Kelis - 4th Of July',
+                'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
+            },
+        }, {
+            # KVS Player
+            'url': 'https://www.kvs-demo.com/embed/105/',
+            'info_dict': {
+                'id': '105',
+                'display_id': 'kelis-4th-of-july',
+                'ext': 'mp4',
+                'title': 'Kelis - 4th Of July / Embed Player',
+                'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        }, {
+            # KVS Player
+            'url': 'https://thisvid.com/videos/fruit-is-healthy/',
+            'md5': 'f83e52f409b9139a7efee58ef926a72e',
+            'info_dict': {
+                'id': '7079579',
+                'display_id': 'fruit-is-healthy',
+                'ext': 'mp4',
+                'title': 'Fruit is healthy - ThisVid.com',
+                'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/7079000/7079579/preview.jpg',
+            }
+        }, {
+            # KVS Player
+            'url': 'https://thisvid.com/embed/7079579/',
+            'info_dict': {
+                'id': '7079579',
+                'display_id': 'fruit-is-healthy',
+                'ext': 'mp4',
+                'title': 'Fruit is healthy - ThisVid.com',
+                'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/7079000/7079579/preview.jpg',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        }, {
+            # KVS Player
+            'url': 'https://youix.com/video/leningrad-zoj/',
+            'md5': '94f96ba95706dc3880812b27b7d8a2b8',
+            'info_dict': {
+                'id': '18485',
+                'display_id': 'leningrad-zoj',
+                'ext': 'mp4',
+                'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com',
+                'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
+            },
+        }, {
+            # KVS Player
+            'url': 'https://youix.com/embed/18485',
+            'md5': '94f96ba95706dc3880812b27b7d8a2b8',
+            'info_dict': {
+                'id': '18485',
+                'display_id': 'leningrad-zoj',
+                'ext': 'mp4',
+                'title': 'Ленинград - ЗОЖ',
+                'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
+            },
+        }, {
+            # KVS Player
+            'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/',
+            'md5': '94166bdb26b4cb1fb9214319a629fc51',
+            'info_dict': {
+                'id': '21217',
+                'display_id': '40-nochey-2016',
+                'ext': 'mp4',
+                'title': '40 ночей (2016) - BogMedia.org',
+                'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
+            },
+        }, {
+            # KVS Player (for sites that serve kt_player.js via non-https urls)
+            'url': 'http://www.camhub.world/embed/389508',
+            'md5': 'fbe89af4cfb59c8fd9f34a202bb03e32',
+            'info_dict': {
+                'id': '389508',
+                'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source',
+                'ext': 'mp4',
+                'title': 'Syren De Mer  onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер',
+                'thumbnail': r're:https?://www\.camhub\.world/contents/videos_screenshots/389000/389508/preview\.mp4\.jpg',
+            },
         },
     ]
 
@@ -2332,6 +2424,87 @@ class GenericIE(InfoExtractor):
             'title': title,
         }
 
+    def _extract_kvs(self, url, webpage, video_id):
+
+        def getlicensetoken(license):
+            modlicense = license.replace('$', '').replace('0', '1')
+            center = int(len(modlicense) / 2)
+            fronthalf = int(modlicense[:center + 1])
+            backhalf = int(modlicense[center:])
+
+            modlicense = compat_str(4 * abs(fronthalf - backhalf))
+
+            def parts():
+                for o in range(0, center + 1):
+                    for i in range(1, 5):
+                        yield compat_str((int(license[o + i]) + int(modlicense[o])) % 10)
+
+            return ''.join(parts())
+
+        def getrealurl(video_url, license_code):
+            if not video_url.startswith('function/0/'):
+                return video_url  # not obfuscated
+
+            url_path, _, url_query = video_url.partition('?')
+            urlparts = url_path.split('/')[2:]
+            license = getlicensetoken(license_code)
+            newmagic = urlparts[5][:32]
+
+            def spells(x, o):
+                l = (o + sum(int(n) for n in license[o:])) % 32
+                for i in range(0, len(x)):
+                    yield {l: x[o], o: x[l]}.get(i, x[i])
+
+            for o in range(len(newmagic) - 1, -1, -1):
+                newmagic = ''.join(spells(newmagic, o))
+
+            urlparts[5] = newmagic + urlparts[5][32:]
+            return '/'.join(urlparts) + '?' + url_query
+
+        flashvars = self._search_regex(
+            r'(?s)<script\b[^>]*>.*?var\s+flashvars\s*=\s*(\{.+?\});.*?</script>',
+            webpage, 'flashvars')
+        flashvars = self._parse_json(flashvars, video_id, transform_source=js_to_json)
+
+        # extract the part after the last / as the display_id from the
+        # canonical URL.
+        display_id = self._search_regex(
+            r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>'
+            r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)',
+            webpage, 'display_id', fatal=False
+        )
+        title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title')
+
+        thumbnail = flashvars['preview_url']
+        if thumbnail.startswith('//'):
+            protocol, _, _ = url.partition('/')
+            thumbnail = protocol + thumbnail
+
+        url_keys = list(filter(re.compile(r'^video_(?:url|alt_url\d*)$').match, flashvars.keys()))
+        formats = []
+        for key in url_keys:
+            if '/get_file/' not in flashvars[key]:
+                continue
+            format_id = flashvars.get(key + '_text', key)
+            formats.append(merge_dicts(
+                parse_resolution(format_id) or parse_resolution(flashvars[key]), {
+                    'url': getrealurl(flashvars[key], flashvars['license_code']),
+                    'format_id': format_id,
+                    'ext': 'mp4',
+                }))
+            if not formats[-1].get('height'):
+                formats[-1]['quality'] = 1
+
+        self._sort_formats(formats)
+
+        return {
+            'id': flashvars['video_id'],
+            'display_id': display_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
+
     def _real_extract(self, url):
         if url.startswith('//'):
             return self.url_result(self.http_scheme() + url)
@@ -3389,6 +3562,16 @@ class GenericIE(InfoExtractor):
                 info_dict['formats'] = formats
                 return info_dict
 
+        # Look for generic KVS player (before ld+json for tests)
+        found = re.search(
+            r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)\1[^>]*>',
+            webpage)
+        if found:
+            self.report_extraction('KVS Player')
+            if found.group('maj_ver') not in ('4', '5', '6'):
+                self.report_warning('Untested major version (%s) in player engine - download may fail.' % (found.group('ver'), ))
+            return self._extract_kvs(url, webpage, video_id)
+
         # Looking for http://schema.org/VideoObject
         json_ld = self._search_json_ld(
             webpage, video_id, default={}, expected_type='VideoObject')

From 1a4fbe8462f5e531a891aeac7db6c0bde49c5536 Mon Sep 17 00:00:00 2001
From: FraFraFra-LongD <85188920+FraFraFra-LongD@users.noreply.github.com>
Date: Sun, 13 Nov 2022 14:22:04 +0100
Subject: [PATCH 151/312] Added ThisVid.com support (#29187)

* add ThisVidIE, ThisVidMemberIE, ThisVidPlaylistIE
* redirect embed to main page for more metadata
* use KVS extraction newly added to GenericIE and remove duplicate tests
* also add MrDeepFake etc compat to GenericIE
(closes #22390)

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |   5 +
 youtube_dl/extractor/generic.py    |  54 ++++---
 youtube_dl/extractor/thisvid.py    | 218 +++++++++++++++++++++++++++++
 3 files changed, 249 insertions(+), 28 deletions(-)
 create mode 100644 youtube_dl/extractor/thisvid.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 4d9f37424..947cbe8fd 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1265,6 +1265,11 @@ from .theweatherchannel import TheWeatherChannelIE
 from .thisamericanlife import ThisAmericanLifeIE
 from .thisav import ThisAVIE
 from .thisoldhouse import ThisOldHouseIE
+from .thisvid import (
+    ThisVidIE,
+    ThisVidMemberIE,
+    ThisVidPlaylistIE,
+)
 from .threeqsdn import ThreeQSDNIE
 from .tiktok import (
     TikTokIE,
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 01e406750..597611157 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -2252,31 +2252,7 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             },
         }, {
-            # KVS Player
-            'url': 'https://thisvid.com/videos/fruit-is-healthy/',
-            'md5': 'f83e52f409b9139a7efee58ef926a72e',
-            'info_dict': {
-                'id': '7079579',
-                'display_id': 'fruit-is-healthy',
-                'ext': 'mp4',
-                'title': 'Fruit is healthy - ThisVid.com',
-                'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/7079000/7079579/preview.jpg',
-            }
-        }, {
-            # KVS Player
-            'url': 'https://thisvid.com/embed/7079579/',
-            'info_dict': {
-                'id': '7079579',
-                'display_id': 'fruit-is-healthy',
-                'ext': 'mp4',
-                'title': 'Fruit is healthy - ThisVid.com',
-                'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/7079000/7079579/preview.jpg',
-            },
-            'params': {
-                'skip_download': True,
-            },
-        }, {
-            # KVS Player
+            # KVS Player (tested also in thisvid.py)
             'url': 'https://youix.com/video/leningrad-zoj/',
             'md5': '94f96ba95706dc3880812b27b7d8a2b8',
             'info_dict': {
@@ -2306,6 +2282,7 @@ class GenericIE(InfoExtractor):
                 'display_id': '40-nochey-2016',
                 'ext': 'mp4',
                 'title': '40 ночей (2016) - BogMedia.org',
+                'description': 'md5:4e6d7d622636eb7948275432eb256dc3',
                 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
             },
         }, {
@@ -2319,6 +2296,18 @@ class GenericIE(InfoExtractor):
                 'title': 'Syren De Mer  onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер',
                 'thumbnail': r're:https?://www\.camhub\.world/contents/videos_screenshots/389000/389508/preview\.mp4\.jpg',
             },
+        }, {
+            'url': 'https://mrdeepfakes.com/video/5/selena-gomez-pov-deep-fakes',
+            'md5': 'fec4ad5ec150f655e0c74c696a4a2ff4',
+            'info_dict': {
+                'id': '5',
+                'display_id': 'selena-gomez-pov-deep-fakes',
+                'ext': 'mp4',
+                'title': 'Selena Gomez POV (Deep Fakes) DeepFake Porn - MrDeepFakes',
+                'description': 'md5:17d1f84b578c9c26875ac5ef9a932354',
+                'height': 720,
+                'age_limit': 18,
+            },
         },
     ]
 
@@ -2491,6 +2480,7 @@ class GenericIE(InfoExtractor):
                     'url': getrealurl(flashvars[key], flashvars['license_code']),
                     'format_id': format_id,
                     'ext': 'mp4',
+                    'http_headers': {'Referer': url},
                 }))
             if not formats[-1].get('height'):
                 formats[-1]['quality'] = 1
@@ -2713,9 +2703,15 @@ class GenericIE(InfoExtractor):
         # but actually don't.
         AGE_LIMIT_MARKERS = [
             r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
+            r'>[^<]*you acknowledge you are at least (\d+) years old',
         ]
-        if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
-            age_limit = 18
+        for marker in AGE_LIMIT_MARKERS:
+            m = re.search(marker, webpage)
+            if not m:
+                continue
+            age_limit = max(
+                age_limit or 0,
+                int_or_none(m.groups() and m.group(1), default=18))
 
         # video uploader is domain name
         video_uploader = self._search_regex(
@@ -3570,7 +3566,9 @@ class GenericIE(InfoExtractor):
             self.report_extraction('KVS Player')
             if found.group('maj_ver') not in ('4', '5', '6'):
                 self.report_warning('Untested major version (%s) in player engine - download may fail.' % (found.group('ver'), ))
-            return self._extract_kvs(url, webpage, video_id)
+            return merge_dicts(
+                self._extract_kvs(url, webpage, video_id),
+                info_dict)
 
         # Looking for http://schema.org/VideoObject
         json_ld = self._search_json_ld(
diff --git a/youtube_dl/extractor/thisvid.py b/youtube_dl/extractor/thisvid.py
new file mode 100644
index 000000000..bc4bcb2d1
--- /dev/null
+++ b/youtube_dl/extractor/thisvid.py
@@ -0,0 +1,218 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import itertools
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urlparse,
+)
+from ..utils import (
+    clean_html,
+    get_element_by_class,
+    int_or_none,
+    merge_dicts,
+    url_or_none,
+    urljoin,
+)
+
+
+class ThisVidIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+)'
+    _TESTS = [{
+        'url': 'https://thisvid.com/videos/sitting-on-ball-tight-jeans/',
+        'md5': '839becb572995687e11a69dc4358a386',
+        'info_dict': {
+            'id': '3533241',
+            'ext': 'mp4',
+            'title': 'Sitting on ball tight jeans',
+            'description': 'md5:372353bb995883d1b65fddf507489acd',
+            'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
+            'uploader_id': '150629',
+            'uploader': 'jeanslevisjeans',
+            'age_limit': 18,
+        }
+    }, {
+        'url': 'https://thisvid.com/embed/3533241/',
+        'md5': '839becb572995687e11a69dc4358a386',
+        'info_dict': {
+            'id': '3533241',
+            'ext': 'mp4',
+            'title': 'Sitting on ball tight jeans',
+            'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
+            'uploader_id': '150629',
+            'uploader': 'jeanslevisjeans',
+            'age_limit': 18,
+        }
+    }]
+
+    def _real_extract(self, url):
+        main_id, type_ = re.match(self._VALID_URL, url).group('id', 'type')
+        webpage = self._download_webpage(url, main_id)
+
+        title = self._html_search_regex(
+            r'<title\b[^>]*?>(?:Video:\s+)?(.+?)(?:\s+-\s+ThisVid(?:\.com| tube))?</title>',
+            webpage, 'title')
+
+        if type_ == 'embed':
+            # look for more metadata
+            video_alt_url = url_or_none(self._search_regex(
+                r'''video_alt_url\s*:\s+'(%s/)',''' % (self._VALID_URL, ),
+                webpage, 'video_alt_url', default=None))
+            if video_alt_url and video_alt_url != url:
+                webpage = self._download_webpage(
+                    video_alt_url, main_id,
+                    note='Redirecting embed to main page', fatal=False) or webpage
+
+        video_holder = get_element_by_class('video-holder', webpage) or ''
+        if '>This video is a private video' in video_holder:
+            self.raise_login_required(
+                (clean_html(video_holder) or 'Private video').split('\n', 1)[0])
+
+        uploader = self._html_search_regex(
+            r'''(?s)<span\b[^>]*>Added by:\s*</span><a\b[^>]+\bclass\s*=\s*["']author\b[^>]+\bhref\s*=\s*["']https://thisvid\.com/members/([0-9]+/.{3,}?)\s*</a>''',
+            webpage, 'uploader', default='')
+        uploader = re.split(r'''/["'][^>]*>\s*''', uploader)
+        if len(uploader) == 2:
+            # id must be non-empty, uploader could be ''
+            uploader_id, uploader = uploader
+            uploader = uploader or None
+        else:
+            uploader_id = uploader = None
+
+        return merge_dicts({
+            '_type': 'url_transparent',
+            'title': title,
+            'age_limit': 18,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+        }, self.url_result(url, ie='Generic'))
+
+
+class ThisVidMemberIE(InfoExtractor):
+    _VALID_URL = r'https?://thisvid\.com/members/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://thisvid.com/members/2140501/',
+        'info_dict': {
+            'id': '2140501',
+            'title': 'Rafflesia\'s Profile',
+        },
+        'playlist_mincount': 16,
+    }, {
+        'url': 'https://thisvid.com/members/2140501/favourite_videos/',
+        'info_dict': {
+            'id': '2140501',
+            'title': 'Rafflesia\'s Favourite Videos',
+        },
+        'playlist_mincount': 15,
+    }, {
+        'url': 'https://thisvid.com/members/636468/public_videos/',
+        'info_dict': {
+            'id': '636468',
+            'title': 'Happymouth\'s Public Videos',
+        },
+        'playlist_mincount': 196,
+    },
+    ]
+
+    def _urls(self, html):
+        for m in re.finditer(r'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>%s\b)[^>]+>''' % (ThisVidIE._VALID_URL, ), html):
+            yield m.group('url')
+
+    def _real_extract(self, url):
+        pl_id = self._match_id(url)
+        webpage = self._download_webpage(url, pl_id)
+
+        title = re.split(
+            r'(?i)\s*\|\s*ThisVid\.com\s*$',
+            self._og_search_title(webpage, default=None) or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', 1)[0] or None
+
+        def entries(page_url, html=None):
+            for page in itertools.count(1):
+                if not html:
+                    html = self._download_webpage(
+                        page_url, pl_id, note='Downloading page %d' % (page, ),
+                        fatal=False) or ''
+                for u in self._urls(html):
+                    yield u
+                next_page = get_element_by_class('pagination-next', html) or ''
+                if next_page:
+                    # member list page
+                    next_page = urljoin(url, self._search_regex(
+                        r'''<a\b[^>]+\bhref\s*=\s*("|')(?P<url>(?!#)(?:(?!\1).)+)''',
+                        next_page, 'next page link', group='url', default=None))
+                # in case a member page should have pagination-next with empty link, not just `else:`
+                if next_page is None:
+                    # playlist page
+                    parsed_url = compat_urlparse.urlparse(page_url)
+                    base_path, num = parsed_url.path.rsplit('/', 1)
+                    num = int_or_none(num)
+                    if num is None:
+                        base_path, num = parsed_url.path.rstrip('/'), 1
+                    parsed_url = parsed_url._replace(path=base_path + ('/%d' % (num + 1, )))
+                    next_page = compat_urlparse.urlunparse(parsed_url)
+                    if page_url == next_page:
+                        next_page = None
+                if not next_page:
+                    break
+                page_url, html = next_page, None
+
+        return self.playlist_from_matches(
+            entries(url, webpage), playlist_id=pl_id, playlist_title=title, ie='ThisVid')
+
+
+class ThisVidPlaylistIE(ThisVidMemberIE):
+    _VALID_URL = r'https?://thisvid\.com/playlist/(?P<id>\d+)/video/(?P<video_id>[A-Za-z0-9-]+)'
+    _TESTS = [{
+        'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
+        'info_dict': {
+            'id': '6615',
+            'title': 'Underwear Stuff',
+        },
+        'playlist_mincount': 200,
+    }, {
+        'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
+        'info_dict': {
+            'id': '1072387',
+            'ext': 'mp4',
+            'title': 'Big Italian Booty 28',
+            'description': 'md5:1bccf7b13765e18fb27bf764dba7ede2',
+            'uploader_id': '367912',
+            'uploader': 'Jcmusclefun',
+            'age_limit': 18,
+        },
+        'params': {
+            'noplaylist': True,
+        },
+    }]
+
+    def _get_video_url(self, pl_url):
+        video_id = re.match(self._VALID_URL, pl_url).group('video_id')
+        return urljoin(pl_url, '/videos/%s/' % (video_id, ))
+
+    def _urls(self, html):
+        for m in re.finditer(r'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>%s\b)[^>]+>''' % (self._VALID_URL, ), html):
+            yield self._get_video_url(m.group('url'))
+
+    def _real_extract(self, url):
+        pl_id = self._match_id(url)
+
+        if self._downloader.params.get('noplaylist'):
+            self.to_screen('Downloading just the featured video because of --no-playlist')
+            return self.url_result(self._get_video_url(url), 'ThisVid')
+
+        self.to_screen(
+            'Downloading playlist %s - add --no-playlist to download just the featured video' % (pl_id, ))
+        result = super(ThisVidPlaylistIE, self)._real_extract(url)
+
+        # rework title returned as `the title - the title`
+        title = result['title']
+        t_len = len(title)
+        if t_len > 5 and t_len % 2 != 0:
+            t_len = t_len // 2
+            if title[t_len] == '-':
+                title = [t.strip() for t in (title[:t_len], title[t_len + 1:])]
+                if title[0] and title[0] == title[1]:
+                    result['title'] = title[0]
+        return result

From fc2beab0e701c497a003f11fef5c0df54fba1da3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 13 Nov 2022 14:59:30 +0000
Subject: [PATCH 152/312] [generic] Improve KVS (etc) extraction

* detect kt_player('kt_player', 'https://.../kt_player.swf?v=5...
* detect age limit if 18 USC 2257 is mentioned
* test with shooshtime.com

Partially resolves #31332.
---
 youtube_dl/extractor/generic.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 597611157..3e8281ed3 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -36,6 +36,7 @@ from ..utils import (
     unsmuggle_url,
     UnsupportedError,
     url_or_none,
+    urljoin,
     xpath_attr,
     xpath_text,
     xpath_with_ns,
@@ -2308,6 +2309,17 @@ class GenericIE(InfoExtractor):
                 'height': 720,
                 'age_limit': 18,
             },
+        }, {
+            'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
+            'md5': 'e2f0a4c329f7986280b7328e24036d60',
+            'info_dict': {
+                'id': '284002',
+                'display_id': 'just-out-of-the-shower-joi',
+                'ext': 'mp4',
+                'title': 'Just Out Of The Shower JOI - Shooshtime',
+                'height': 720,
+                'age_limit': 18,
+            },
         },
     ]
 
@@ -2477,7 +2489,7 @@ class GenericIE(InfoExtractor):
             format_id = flashvars.get(key + '_text', key)
             formats.append(merge_dicts(
                 parse_resolution(format_id) or parse_resolution(flashvars[key]), {
-                    'url': getrealurl(flashvars[key], flashvars['license_code']),
+                    'url': urljoin(url, getrealurl(flashvars[key], flashvars['license_code'])),
                     'format_id': format_id,
                     'ext': 'mp4',
                     'http_headers': {'Referer': url},
@@ -2704,6 +2716,7 @@ class GenericIE(InfoExtractor):
         AGE_LIMIT_MARKERS = [
             r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
             r'>[^<]*you acknowledge you are at least (\d+) years old',
+            r'>\s*(?:18\s+U(?:\.S\.C\.|SC)\s+)?(?:§+\s*)?2257\b',
         ]
         for marker in AGE_LIMIT_MARKERS:
             m = re.search(marker, webpage)
@@ -3559,13 +3572,15 @@ class GenericIE(InfoExtractor):
                 return info_dict
 
         # Look for generic KVS player (before ld+json for tests)
-        found = re.search(
-            r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)\1[^>]*>',
-            webpage)
+        found = self._search_regex(
+            (r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>\d+(?:\.\d+)+)\1[^>]*>',
+             # kt_player('kt_player', 'https://i.shoosh.co/player/kt_player.swf?v=5.5.1', ...
+             r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P<ver>\d+(?:\.\d+)+)\2\s*,', 
+            ), webpage, 'KVS player', group='ver', default=False)
         if found:
-            self.report_extraction('KVS Player')
-            if found.group('maj_ver') not in ('4', '5', '6'):
-                self.report_warning('Untested major version (%s) in player engine - download may fail.' % (found.group('ver'), ))
+            self.report_extraction('%s: KVS Player' % (video_id, ))
+            if found.split('.')[0] not in ('4', '5', '6'):
+                self.report_warning('Untested major version (%s) in player engine - download may fail.' % (found, ))
             return merge_dicts(
                 self._extract_kvs(url, webpage, video_id),
                 info_dict)

From 195f22f679330549882a8234e7234942893a4902 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 13 Nov 2022 15:09:29 +0000
Subject: [PATCH 153/312] [generic] Improve KVS (etc) extraction

---
 youtube_dl/extractor/generic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 3e8281ed3..0e473e952 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -3575,8 +3575,8 @@ class GenericIE(InfoExtractor):
         found = self._search_regex(
             (r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>\d+(?:\.\d+)+)\1[^>]*>',
              # kt_player('kt_player', 'https://i.shoosh.co/player/kt_player.swf?v=5.5.1', ...
-             r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P<ver>\d+(?:\.\d+)+)\2\s*,', 
-            ), webpage, 'KVS player', group='ver', default=False)
+             r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P<ver>\d+(?:\.\d+)+)\2\s*,',
+             ), webpage, 'KVS player', group='ver', default=False)
         if found:
             self.report_extraction('%s: KVS Player' % (video_id, ))
             if found.split('.')[0] not in ('4', '5', '6'):

From 14ef89a8dab4f6ba6185d6f5bf0317a705d7b842 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Wed, 1 Feb 2023 09:39:49 +0530
Subject: [PATCH 154/312] Support `if` statements

Fix for yt-dlp/yt_dlp#6131
Closes #31509
---
 test/test_jsinterp.py          | 32 ++++++++++++++++++++++++++++++++
 test/test_youtube_signature.py |  4 ++++
 youtube_dl/jsinterp.py         | 21 ++++++++++++++++++---
 3 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 5121c8cf8..c47def737 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -158,6 +158,38 @@ class TestJSInterpreter(unittest.TestCase):
         self.assertEqual(jsi.call_function('z'), 5)
         self.assertEqual(jsi.call_function('y'), 2)
 
+    def test_if(self):
+        jsi = JSInterpreter('''
+        function x() {
+            let a = 9;
+            if (0==0) {a++}
+            return a
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+
+        jsi = JSInterpreter('''
+        function x() {
+            if (0==0) {return 10}
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+
+        jsi = JSInterpreter('''
+        function x() {
+            if (0!=0) {return 1}
+            else {return 10}
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+
+        """  # Unsupported
+        jsi = JSInterpreter('''
+        function x() {
+            if (0!=0) {return 1}
+            else if (1==0) {return 2}
+            else {return 10}
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+        """
+
     def test_for_loop(self):
         # function x() { a=0; for (i=0; i-10; i++) {a++} a }
         jsi = JSInterpreter('''
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 4e678cae0..ac37ffa45 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -135,6 +135,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
         'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
     ),
+    (
+        'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
+        'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 530a705b4..9a3b8d7f2 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -214,7 +214,7 @@ class JSInterpreter(object):
         def __init__(self, msg, *args, **kwargs):
             expr = kwargs.pop('expr', None)
             if expr is not None:
-                msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
+                msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
             super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
 
     @classmethod
@@ -268,7 +268,7 @@ class JSInterpreter(object):
                 elif in_quote == '/' and char in '[]':
                     in_regex_char_group = char == '['
             escaping = not escaping and in_quote and char == '\\'
-            after_op = not in_quote and (char in cls.OP_CHARS or char == '[' or (char.isspace() and after_op))
+            after_op = not in_quote and (char in cls.OP_CHARS or (char.isspace() and after_op))
 
             if char != delim[pos] or any(counters.values()) or in_quote:
                 pos = skipping = 0
@@ -301,7 +301,7 @@ class JSInterpreter(object):
         separated = list(cls._separate(expr, delim, 1))
 
         if len(separated) < 2:
-            raise cls.Exception('No terminating paren {delim} in {expr:.100}'.format(**locals()))
+            raise cls.Exception('No terminating paren {delim} in {expr}'.format(**locals()))
         return separated[0][1:].strip(), separated[1].strip()
 
     @staticmethod
@@ -428,10 +428,25 @@ class JSInterpreter(object):
 
         m = re.match(r'''(?x)
                 (?P<try>try)\s*\{|
+                (?P<if>if)\s*\(|
                 (?P<switch>switch)\s*\(|
                 (?P<for>for)\s*\(
                 ''', expr)
         md = m.groupdict() if m else {}
+        if md.get('if'):
+            cndn, expr = self._separate_at_paren(expr[m.end() - 1:])
+            if_expr, expr = self._separate_at_paren(expr.lstrip())
+            # TODO: "else if" is not handled
+            else_expr = None
+            m = re.match(r'else\s*{', expr)
+            if m:
+                else_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+            cndn = _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion))
+            ret, should_abort = self.interpret_statement(
+                if_expr if cndn else else_expr, local_vars, allow_recursion)
+            if should_abort:
+                return ret, True
+
         if md.get('try'):
             try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
             err = None

From 295736c9cba714fb5de7d1c3dd31d86e50091cf8 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 2 Feb 2023 14:28:32 +0000
Subject: [PATCH 155/312] [jsinterp] Improve parsing * support subset `... else
 if ...` * support `while` * add `RegExp` class * generalise `new` support *
 limited more debug strings * matching test changes

---
 test/test_jsinterp.py  |  53 +++++++++++++-
 youtube_dl/jsinterp.py | 156 +++++++++++++++++++++++++++--------------
 2 files changed, 154 insertions(+), 55 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index c47def737..b5962356c 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -11,8 +11,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import math
 import re
 
-from youtube_dl.compat import compat_re_Pattern
-
 from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
 
 
@@ -140,15 +138,23 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertTrue(math.isnan(jsi.call_function('x')))
 
+    def test_Date(self):
         jsi = JSInterpreter('''
         function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
         ''')
         self.assertEqual(jsi.call_function('x'), 86000)
+
         jsi = JSInterpreter('''
         function x(dt) { return new Date(dt) - 0; }
         ''')
         self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
 
+        # date format m/d/y
+        jsi = JSInterpreter('''
+        function x() { return new Date('12/31/1969 18:01:26 MDT') - 0; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 86000)
+
     def test_call(self):
         jsi = JSInterpreter('''
         function x() { return 2; }
@@ -181,6 +187,15 @@ class TestJSInterpreter(unittest.TestCase):
         self.assertEqual(jsi.call_function('x'), 10)
 
         """  # Unsupported
+        jsi = JSInterpreter('''
+        function x() {
+            if (0!=0) return 1;
+            else {return 10}
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+        """
+
+    def test_elseif(self):
         jsi = JSInterpreter('''
         function x() {
             if (0!=0) {return 1}
@@ -188,6 +203,16 @@ class TestJSInterpreter(unittest.TestCase):
             else {return 10}
         }''')
         self.assertEqual(jsi.call_function('x'), 10)
+
+        """  # Unsupported
+        jsi = JSInterpreter('''
+        function x() {
+            if (0!=0) return 1;
+            else if (1==0) {return 2}
+            else {return 10}
+        }''')
+        self.assertEqual(jsi.call_function('x'), 10)
+        # etc
         """
 
     def test_for_loop(self):
@@ -197,6 +222,13 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x'), 10)
 
+    def test_while_loop(self):
+        # function x() { a=0; while (a<10) {a++} a }
+        jsi = JSInterpreter('''
+        function x() { a=0; while (a<10) {a++} return a }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 10)
+
     def test_switch(self):
         jsi = JSInterpreter('''
         function x(f) { switch(f){
@@ -415,13 +447,28 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('''
         function x() { let a=/,,[/,913,/](,)}/; return a; }
         ''')
-        self.assertIsInstance(jsi.call_function('x'), compat_re_Pattern)
+        attrs = set(('findall', 'finditer', 'flags', 'groupindex',
+                     'groups', 'match', 'pattern', 'scanner',
+                     'search', 'split', 'sub', 'subn'))
+        self.assertTrue(set(dir(jsi.call_function('x'))) > attrs)
 
         jsi = JSInterpreter('''
         function x() { let a=/,,[/,913,/](,)}/i; return a; }
         ''')
         self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
 
+        jsi = JSInterpreter(r'''
+        function x() { let a=[/[)\\]/]; return a[0]; }
+        ''')
+        self.assertEqual(jsi.call_function('x').pattern, r'[)\\]')
+
+        """  # fails
+        jsi = JSInterpreter(r'''
+        function x() { let a=100; a/=/[0-9]+/.exec('divide by 20 today')[0]; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 5)
+        """
+
     def test_char_code_at(self):
         jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
         self.assertEqual(jsi.call_function('x', 0), 116)
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 9a3b8d7f2..1e7b342ac 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -187,19 +187,6 @@ class LocalNameSpace(ChainMap):
 class JSInterpreter(object):
     __named_object_counter = 0
 
-    _RE_FLAGS = {
-        # special knowledge: Python's re flags are bitmask values, current max 128
-        # invent new bitmask values well above that for literal parsing
-        # TODO: new pattern class to execute matches with these flags
-        'd': 1024,  # Generate indices for substring matches
-        'g': 2048,  # Global search
-        'i': re.I,  # Case-insensitive search
-        'm': re.M,  # Multi-line search
-        's': re.S,  # Allows . to match newline characters
-        'u': re.U,  # Treat a pattern as a sequence of unicode code points
-        'y': 4096,  # Perform a "sticky" search that matches starting at the current position in the target string
-    }
-
     _OBJ_NAME = '__youtube_dl_jsinterp_obj'
 
     OP_CHARS = None
@@ -217,9 +204,48 @@ class JSInterpreter(object):
                 msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
             super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
 
+    class JS_RegExp(object):
+        _RE_FLAGS = {
+            # special knowledge: Python's re flags are bitmask values, current max 128
+            # invent new bitmask values well above that for literal parsing
+            # TODO: new pattern class to execute matches with these flags
+            'd': 1024,  # Generate indices for substring matches
+            'g': 2048,  # Global search
+            'i': re.I,  # Case-insensitive search
+            'm': re.M,  # Multi-line search
+            's': re.S,  # Allows . to match newline characters
+            'u': re.U,  # Treat a pattern as a sequence of unicode code points
+            'y': 4096,  # Perform a "sticky" search that matches starting at the current position in the target string
+        }
+
+        def __init__(self, pattern_txt, flags=''):
+            if isinstance(flags, compat_str):
+                flags, _ = self.regex_flags(flags)
+            # Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern
+            # First, avoid https://github.com/python/cpython/issues/74534
+            self.__self = re.compile(pattern_txt.replace('[[', r'[\['), flags)
+            for name in dir(self.__self):
+                # Only these? Obviously __class__, __init__.
+                # PyPy creates a __weakref__ attribute with value None
+                # that can't be setattr'd but also can't need to be copied.
+                if name in ('__class__', '__init__', '__weakref__'):
+                    continue
+                setattr(self, name, getattr(self.__self, name))
+
+        @classmethod
+        def regex_flags(cls, expr):
+            flags = 0
+            if not expr:
+                return flags, expr
+            for idx, ch in enumerate(expr):
+                if ch not in cls._RE_FLAGS:
+                    break
+                flags |= cls._RE_FLAGS[ch]
+            return flags, expr[idx + 1:]
+
     @classmethod
     def __op_chars(cls):
-        op_chars = set(';,')
+        op_chars = set(';,[')
         for op in cls._all_operators():
             for c in op[0]:
                 op_chars.add(c)
@@ -231,17 +257,6 @@ class JSInterpreter(object):
         namespace[name] = obj
         return name
 
-    @classmethod
-    def _regex_flags(cls, expr):
-        flags = 0
-        if not expr:
-            return flags, expr
-        for idx, ch in enumerate(expr):
-            if ch not in cls._RE_FLAGS:
-                break
-            flags |= cls._RE_FLAGS[ch]
-        return flags, expr[idx + 1:]
-
     @classmethod
     def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
         if not expr:
@@ -328,7 +343,7 @@ class JSInterpreter(object):
         try:
             return opfunc(left_val, right_val)
         except Exception as e:
-            raise self.Exception('Failed to evaluate {left_val!r} {op} {right_val!r}'.format(**locals()), expr, cause=e)
+            raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
 
     def _index(self, obj, idx, allow_undefined=False):
         if idx == 'length':
@@ -338,7 +353,7 @@ class JSInterpreter(object):
         except Exception as e:
             if allow_undefined:
                 return JS_Undefined
-            raise self.Exception('Cannot get index {idx}'.format(**locals()), expr=repr(obj), cause=e)
+            raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e)
 
     def _dump(self, obj, namespace):
         try:
@@ -352,6 +367,7 @@ class JSInterpreter(object):
         allow_recursion -= 1
 
         should_return = False
+        # fails on (eg) if (...) stmt1; else stmt2;
         sub_statements = list(self._separate(stmt, ';')) or ['']
         expr = stmt = sub_statements.pop().strip()
         for sub_stmt in sub_statements:
@@ -371,25 +387,30 @@ class JSInterpreter(object):
         if expr[0] in _QUOTES:
             inner, outer = self._separate(expr, expr[0], 1)
             if expr[0] == '/':
-                flags, outer = self._regex_flags(outer)
-                inner = re.compile(inner[1:], flags=flags)  # , strict=True))
+                flags, outer = self.JS_RegExp.regex_flags(outer)
+                inner = self.JS_RegExp(inner[1:], flags=flags)
             else:
                 inner = json.loads(js_to_json(inner + expr[0]))  # , strict=True))
             if not outer:
                 return inner, should_return
             expr = self._named_object(local_vars, inner) + outer
 
-        if expr.startswith('new '):
-            obj = expr[4:]
-            if obj.startswith('Date('):
-                left, right = self._separate_at_paren(obj[4:])
-                expr = unified_timestamp(
-                    self.interpret_expression(left, local_vars, allow_recursion), False)
+        new_kw, _, obj = expr.partition('new ')
+        if not new_kw:
+            for klass, konstr in (('Date', lambda x: int(unified_timestamp(x, False) * 1000)),
+                                  ('RegExp', self.JS_RegExp),
+                                  ('Error', self.Exception)):
+                if not obj.startswith(klass + '('):
+                    continue
+                left, right = self._separate_at_paren(obj[len(klass):])
+                argvals = self.interpret_iter(left, local_vars, allow_recursion)
+                expr = konstr(*argvals)
                 if not expr:
-                    raise self.Exception('Failed to parse date {left!r}'.format(**locals()), expr=expr)
-                expr = self._dump(int(expr * 1000), local_vars) + right
+                    raise self.Exception('Failed to parse {klass} {left!r:.100}'.format(**locals()), expr=expr)
+                expr = self._dump(expr, local_vars) + right
+                break
             else:
-                raise self.Exception('Unsupported object {obj}'.format(**locals()), expr=expr)
+                raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr)
 
         if expr.startswith('void '):
             left = self.interpret_expression(expr[5:], local_vars, allow_recursion)
@@ -430,24 +451,45 @@ class JSInterpreter(object):
                 (?P<try>try)\s*\{|
                 (?P<if>if)\s*\(|
                 (?P<switch>switch)\s*\(|
-                (?P<for>for)\s*\(
+                (?P<for>for)\s*\(|
+                (?P<while>while)\s*\(
                 ''', expr)
         md = m.groupdict() if m else {}
         if md.get('if'):
             cndn, expr = self._separate_at_paren(expr[m.end() - 1:])
-            if_expr, expr = self._separate_at_paren(expr.lstrip())
-            # TODO: "else if" is not handled
+            if expr.startswith('{'):
+                if_expr, expr = self._separate_at_paren(expr)
+            else:
+                # may lose ... else ... because of ll.368-374
+                if_expr, expr = self._separate_at_paren(expr, delim=';')
             else_expr = None
-            m = re.match(r'else\s*{', expr)
+            m = re.match(r'else\s*(?P<block>\{)?', expr)
             if m:
-                else_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+                if m.group('block'):
+                    else_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+                else:
+                    # handle subset ... else if (...) {...} else ...
+                    # TODO: make interpret_statement do this properly, if possible
+                    exprs = list(self._separate(expr[m.end():], delim='}', max_split=2))
+                    if len(exprs) > 1:
+                        if re.match(r'\s*if\s*\(', exprs[0]) and re.match(r'\s*else\b', exprs[1]):
+                            else_expr = exprs[0] + '}' + exprs[1]
+                            expr = (exprs[2] + '}') if len(exprs) == 3 else None
+                        else:
+                            else_expr = exprs[0]
+                            exprs.append('')
+                            expr = '}'.join(exprs[1:])
+                    else:
+                        else_expr = exprs[0]
+                        expr = None
+                    else_expr = else_expr.lstrip() + '}'
             cndn = _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion))
             ret, should_abort = self.interpret_statement(
                 if_expr if cndn else else_expr, local_vars, allow_recursion)
             if should_abort:
                 return ret, True
 
-        if md.get('try'):
+        elif md.get('try'):
             try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
             err = None
             try:
@@ -484,8 +526,8 @@ class JSInterpreter(object):
             if err:
                 raise err
 
-        elif md.get('for'):
-            constructor, remaining = self._separate_at_paren(expr[m.end() - 1:])
+        elif md.get('for') or md.get('while'):
+            init_or_cond, remaining = self._separate_at_paren(expr[m.end() - 1:])
             if remaining.startswith('{'):
                 body, expr = self._separate_at_paren(remaining)
             else:
@@ -496,11 +538,12 @@ class JSInterpreter(object):
                     body = 'switch(%s){%s}' % (switch_val, body)
                 else:
                     body, expr = remaining, ''
-            start, cndn, increment = self._separate(constructor, ';')
-            self.interpret_expression(start, local_vars, allow_recursion)
-            while True:
-                if not _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
-                    break
+            if md.get('for'):
+                start, cndn, increment = self._separate(init_or_cond, ';')
+                self.interpret_expression(start, local_vars, allow_recursion)
+            else:
+                cndn, increment = init_or_cond, None
+            while _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
                 try:
                     ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion)
                     if should_abort:
@@ -509,7 +552,8 @@ class JSInterpreter(object):
                     break
                 except JS_Continue:
                     pass
-                self.interpret_expression(increment, local_vars, allow_recursion)
+                if increment:
+                    self.interpret_expression(increment, local_vars, allow_recursion)
 
         elif md.get('switch'):
             switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:])
@@ -764,6 +808,10 @@ class JSInterpreter(object):
                     if idx >= len(obj):
                         return None
                     return ord(obj[idx])
+                elif member == 'replace':
+                    assertion(isinstance(obj, compat_str), 'must be applied on a string')
+                    assertion(len(argvals) == 2, 'takes exactly two arguments')
+                    return re.sub(argvals[0], argvals[1], obj)
 
                 idx = int(member) if isinstance(obj, list) else member
                 return obj[idx](argvals, allow_recursion=allow_recursion)
@@ -795,6 +843,10 @@ class JSInterpreter(object):
             raise self.Exception('Cannot return from an expression', expr)
         return ret
 
+    def interpret_iter(self, list_txt, local_vars, allow_recursion):
+        for v in self._separate(list_txt):
+            yield self.interpret_expression(v, local_vars, allow_recursion)
+
     def extract_object(self, objname):
         _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
         obj = {}

From 37cbdfa0e7c9d00d450af32dc9cdaf93cbfc4576 Mon Sep 17 00:00:00 2001
From: Brian Marks <bm1549@users.noreply.github.com>
Date: Thu, 2 Feb 2023 11:58:21 -0500
Subject: [PATCH 156/312] [americastestkitchen] Add support for downloading
 entire series (#31493)

Also
* support new sites and URL patterns
* back-port from yt-dlp

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/americastestkitchen.py | 115 +++++++++++++++-----
 1 file changed, 88 insertions(+), 27 deletions(-)

diff --git a/youtube_dl/extractor/americastestkitchen.py b/youtube_dl/extractor/americastestkitchen.py
index be960c0f9..08d3604e9 100644
--- a/youtube_dl/extractor/americastestkitchen.py
+++ b/youtube_dl/extractor/americastestkitchen.py
@@ -15,7 +15,7 @@ from ..utils import (
 
 
 class AmericasTestKitchenIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
     _TESTS = [{
         'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
         'md5': 'b861c3e365ac38ad319cfd509c30577f',
@@ -23,15 +23,20 @@ class AmericasTestKitchenIE(InfoExtractor):
             'id': '5b400b9ee338f922cb06450c',
             'title': 'Japanese Suppers',
             'ext': 'mp4',
+            'display_id': 'weeknight-japanese-suppers',
             'description': 'md5:64e606bfee910627efc4b5f050de92b3',
-            'thumbnail': r're:^https?://',
-            'timestamp': 1523318400,
-            'upload_date': '20180410',
-            'release_date': '20180410',
+            'timestamp': 1523304000,
+            'upload_date': '20180409',
+            'release_date': '20180409',
             'series': "America's Test Kitchen",
+            'season': 'Season 18',
             'season_number': 18,
             'episode': 'Japanese Suppers',
             'episode_number': 15,
+            'duration': 1376,
+            'thumbnail': r're:^https?://',
+            'average_rating': 0,
+            'view_count': int,
         },
         'params': {
             'skip_download': True,
@@ -44,15 +49,20 @@ class AmericasTestKitchenIE(InfoExtractor):
             'id': '5fbe8c61bda2010001c6763b',
             'title': 'Simple Chicken Dinner',
             'ext': 'mp4',
+            'display_id': 'atktv_2103_simple-chicken-dinner_full-episode_web-mp4',
             'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
-            'thumbnail': r're:^https?://',
-            'timestamp': 1610755200,
-            'upload_date': '20210116',
-            'release_date': '20210116',
+            'timestamp': 1610737200,
+            'upload_date': '20210115',
+            'release_date': '20210115',
             'series': "America's Test Kitchen",
+            'season': 'Season 21',
             'season_number': 21,
             'episode': 'Simple Chicken Dinner',
             'episode_number': 3,
+            'duration': 1397,
+            'thumbnail': r're:^https?://',
+            'view_count': int,
+            'average_rating': 0,
         },
         'params': {
             'skip_download': True,
@@ -60,6 +70,12 @@ class AmericasTestKitchenIE(InfoExtractor):
     }, {
         'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
         'only_matching': True,
+    }, {
+        'url': 'https://www.americastestkitchen.com/cookscountry/episode/564-when-only-chocolate-will-do',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
+        'only_matching': True,
     }, {
         'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
         'only_matching': True,
@@ -94,7 +110,7 @@ class AmericasTestKitchenIE(InfoExtractor):
 
 
 class AmericasTestKitchenSeasonIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|(?P<cooks>cooks(?:country|illustrated)))\.com(?:(?:/(?P<show2>cooks(?:country|illustrated)))?(?:/?$|(?<!ated)(?<!ated\.com)/episodes/browse/season_(?P<season>\d+)))'
     _TESTS = [{
         # ATK Season
         'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
@@ -105,48 +121,93 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
         'playlist_count': 13,
     }, {
         # Cooks Country Season
-        'url': 'https://www.cookscountry.com/episodes/browse/season_12',
+        'url': 'https://www.americastestkitchen.com/cookscountry/episodes/browse/season_12',
         'info_dict': {
             'id': 'season_12',
             'title': 'Season 12',
         },
         'playlist_count': 13,
+    }, {
+        # America's Test Kitchen Series
+        'url': 'https://www.americastestkitchen.com/',
+        'info_dict': {
+            'id': 'americastestkitchen',
+            'title': 'America\'s Test Kitchen',
+        },
+        'playlist_count': 558,
+    }, {
+        # Cooks Country Series
+        'url': 'https://www.americastestkitchen.com/cookscountry',
+        'info_dict': {
+            'id': 'cookscountry',
+            'title': 'Cook\'s Country',
+        },
+        'playlist_count': 199,
+    }, {
+        'url': 'https://www.americastestkitchen.com/cookscountry/',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.cookscountry.com/episodes/browse/season_12',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.cookscountry.com',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.americastestkitchen.com/cooksillustrated/',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.cooksillustrated.com',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        show_name, season_number = re.match(self._VALID_URL, url).groups()
-        season_number = int(season_number)
+        match = re.match(self._VALID_URL, url).groupdict()
+        show = match.get('show2')
+        show_path = ('/' + show) if show else ''
+        show = show or match['show']
+        season_number = int_or_none(match.get('season'))
 
-        slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
+        slug, title = {
+            'americastestkitchen': ('atk', 'America\'s Test Kitchen'),
+            'cookscountry': ('cco', 'Cook\'s Country'),
+            'cooksillustrated': ('cio', 'Cook\'s Illustrated'),
+        }[show]
 
-        season = 'Season %d' % season_number
+        facet_filters = [
+            'search_document_klass:episode',
+            'search_show_slug:' + slug,
+        ]
+
+        if season_number:
+            playlist_id = 'season_%d' % season_number
+            playlist_title = 'Season %d' % season_number
+            facet_filters.append('search_season_list:' + playlist_title)
+        else:
+            playlist_id = show
+            playlist_title = title
 
         season_search = self._download_json(
             'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
-            season, headers={
-                'Origin': 'https://www.%s.com' % show_name,
+            playlist_id, headers={
+                'Origin': 'https://www.americastestkitchen.com',
                 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
                 'X-Algolia-Application-Id': 'Y1FNZXUI30',
             }, query={
-                'facetFilters': json.dumps([
-                    'search_season_list:' + season,
-                    'search_document_klass:episode',
-                    'search_show_slug:' + slug,
-                ]),
-                'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
+                'facetFilters': json.dumps(facet_filters),
+                'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
                 'attributesToHighlight': '',
                 'hitsPerPage': 1000,
             })
 
         def entries():
             for episode in (season_search.get('hits') or []):
-                search_url = episode.get('search_url')
+                search_url = episode.get('search_url')  # always formatted like '/episode/123-title-of-episode'
                 if not search_url:
                     continue
                 yield {
                     '_type': 'url',
-                    'url': 'https://www.%s.com%s' % (show_name, search_url),
-                    'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
+                    'url': 'https://www.americastestkitchen.com%s%s' % (show_path, search_url),
+                    'id': try_get(episode, lambda e: e['objectID'].rsplit('_', 1)[-1]),
                     'title': episode.get('title'),
                     'description': episode.get('description'),
                     'timestamp': unified_timestamp(episode.get('search_document_date')),
@@ -156,4 +217,4 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
                 }
 
         return self.playlist_result(
-            entries(), 'season_%d' % season_number, season)
+            entries(), playlist_id, playlist_title)

From 297fbff23b347612a5f6002b40adba9dfad85413 Mon Sep 17 00:00:00 2001
From: Rodrigo Dias <roycocup@users.noreply.github.com>
Date: Thu, 2 Feb 2023 17:10:09 +0000
Subject: [PATCH 157/312] [doc] Fixed typo appearing to promise an example
 (#31489)

Resolves #31425

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index cd888c731..6e07ddb1c 100644
--- a/README.md
+++ b/README.md
@@ -632,7 +632,7 @@ To use percent literals in an output template use `%%`. To output to stdout use
 
 The current default template is `%(title)s-%(id)s.%(ext)s`.
 
-In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
+In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title.
 
 #### Output template and Windows batch files
 

From 807e593a32a1ace8fa0be8129fc5071d86516c99 Mon Sep 17 00:00:00 2001
From: JChris246 <43832407+JChris246@users.noreply.github.com>
Date: Thu, 2 Feb 2023 13:12:36 -0400
Subject: [PATCH 158/312] [cammodels] fix and improve extractor (#31453)

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/cammodels.py | 34 +++++++++----------------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py
index 1eb81b75e..d2e860b24 100644
--- a/youtube_dl/extractor/cammodels.py
+++ b/youtube_dl/extractor/cammodels.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
     int_or_none,
     url_or_none,
 )
@@ -20,32 +19,11 @@ class CamModelsIE(InfoExtractor):
     def _real_extract(self, url):
         user_id = self._match_id(url)
 
-        webpage = self._download_webpage(
-            url, user_id, headers=self.geo_verification_headers())
-
-        manifest_root = self._html_search_regex(
-            r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
-
-        if not manifest_root:
-            ERRORS = (
-                ("I'm offline, but let's stay connected", 'This user is currently offline'),
-                ('in a private show', 'This user is in a private show'),
-                ('is currently performing LIVE', 'This model is currently performing live'),
-            )
-            for pattern, message in ERRORS:
-                if pattern in webpage:
-                    error = message
-                    expected = True
-                    break
-            else:
-                error = 'Unable to find manifest URL root'
-                expected = False
-            raise ExtractorError(error, expected=expected)
-
         manifest = self._download_json(
-            '%s%s.json' % (manifest_root, user_id), user_id)
+            'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id)
 
         formats = []
+        thumbnails = []
         for format_id, format_dict in manifest['formats'].items():
             if not isinstance(format_dict, dict):
                 continue
@@ -85,6 +63,13 @@ class CamModelsIE(InfoExtractor):
                         'preference': -1,
                     })
                 else:
+                    if format_id == 'jpeg':
+                        thumbnails.append({
+                            'url': f['url'],
+                            'width': f['width'],
+                            'height': f['height'],
+                            'format_id': f['format_id'],
+                        })
                     continue
                 formats.append(f)
         self._sort_formats(formats)
@@ -92,6 +77,7 @@ class CamModelsIE(InfoExtractor):
         return {
             'id': user_id,
             'title': self._live_title(user_id),
+            'thumbnails': thumbnails,
             'is_live': True,
             'formats': formats,
             'age_limit': 18

From e9611a2a3603ee201d0c1ba99e8bfd8ec1e697cd Mon Sep 17 00:00:00 2001
From: Leon Etienne <40911701+Leonetienne@users.noreply.github.com>
Date: Thu, 2 Feb 2023 18:13:39 +0100
Subject: [PATCH 159/312] [pr0gramm] implement InfoExtractor, Resolves #31433
 (#31434)

* [pr0gramm] implement infoextractor

* [pr0gramm] remove misplaced comment, uncapture regex-group

* [pr0gramm]: specify utf-8 coding

* [pr0gramm]: add trailing comma to lists for maintainability

* [pr0gramm]: ie only sets upload_date attribute

* [pr0gramm]: add video_id to title

* [pr0gramm]: more forgiving _valid_url regex

* [pr0gramm]: add uploader to title, if set

* Discriminate URL pattern

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |   4 ++
 youtube_dl/extractor/pr0gramm.py   | 105 +++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+)
 create mode 100644 youtube_dl/extractor/pr0gramm.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 947cbe8fd..cf0388ed2 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1667,3 +1667,7 @@ from .zingmp3 import (
 )
 from .zoom import ZoomIE
 from .zype import ZypeIE
+from .pr0gramm import (
+    Pr0grammIE,
+    Pr0grammStaticIE,
+)
diff --git a/youtube_dl/extractor/pr0gramm.py b/youtube_dl/extractor/pr0gramm.py
new file mode 100644
index 000000000..b68224fd5
--- /dev/null
+++ b/youtube_dl/extractor/pr0gramm.py
@@ -0,0 +1,105 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+import re
+from ..utils import (
+    merge_dicts,
+)
+
+
+class Pr0grammStaticIE(InfoExtractor):
+    # Possible urls:
+    # https://pr0gramm.com/static/5466437
+    _VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'https://pr0gramm.com/static/5466437',
+        'md5': '52fa540d70d3edc286846f8ca85938aa',
+        'info_dict': {
+            'id': '5466437',
+            'ext': 'mp4',
+            'title': 'pr0gramm-5466437 by g11st',
+            'uploader': 'g11st',
+            'upload_date': '20221221',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        # Fetch media sources
+        entries = self._parse_html5_media_entries(url, webpage, video_id)
+        media_info = entries[0]
+
+        # this raises if there are no formats
+        self._sort_formats(media_info.get('formats') or [])
+
+        # Fetch author
+        uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader')
+
+        # Fetch approx upload timestamp from filename
+        # Have None-defaults in case the extraction fails
+        uploadDay = None
+        uploadMon = None
+        uploadYear = None
+        uploadTimestr = None
+        # (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4)
+        m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage)
+
+        if (m):
+            # Up to a day of accuracy should suffice...
+            uploadDay = m.groupdict().get('day')
+            uploadMon = m.groupdict().get('mon')
+            uploadYear = m.groupdict().get('year')
+            uploadTimestr = uploadYear + uploadMon + uploadDay
+
+        return merge_dicts({
+            'id': video_id,
+            'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''),
+            'uploader': uploader,
+            'upload_date': uploadTimestr
+        }, media_info)
+
+
+# This extractor is for the primary url (used for sharing, and appears in the
+# location bar) Since this page loads the DOM via JS, yt-dl can't find any
+# video information here. So let's redirect to a compatibility version of
+# the site, which does contain the <video>-element  by itself,  without requiring
+# js to be ran.
+class Pr0grammIE(InfoExtractor):
+    # Possible urls:
+    # https://pr0gramm.com/new/546637
+    # https://pr0gramm.com/new/video/546637
+    # https://pr0gramm.com/top/546637
+    # https://pr0gramm.com/top/video/546637
+    # https://pr0gramm.com/user/g11st/uploads/5466437
+    # https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290
+    # https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030
+    # https://pr0gramm.com/user/froschler/1elf/5232030
+    # https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id!
+    # https://pr0gramm.com/top/fruher war alles damals/5498175
+
+    _VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)'
+    _TEST = {
+        'url': 'https://pr0gramm.com/new/video/5466437',
+        'info_dict': {
+            'id': '5466437',
+            'ext': 'mp4',
+            'title': 'pr0gramm-5466437 by g11st',
+            'uploader': 'g11st',
+            'upload_date': '20221221',
+        }
+    }
+
+    def _generic_title():
+        return "oof"
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        return self.url_result(
+            'https://pr0gramm.com/static/' + video_id,
+            video_id=video_id,
+            ie=Pr0grammStaticIE.ie_key())

From 98b0cf1cd05c493eae0f37aaa599d25d2848c0b0 Mon Sep 17 00:00:00 2001
From: Ruowang Sun <91006887+JohnnySunUmich@users.noreply.github.com>
Date: Thu, 2 Feb 2023 12:21:05 -0500
Subject: [PATCH 160/312] [Callin] Add new extractor (#31414)

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/callin.py     | 74 ++++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |  1 +
 2 files changed, 75 insertions(+)
 create mode 100644 youtube_dl/extractor/callin.py

diff --git a/youtube_dl/extractor/callin.py b/youtube_dl/extractor/callin.py
new file mode 100644
index 000000000..341be479f
--- /dev/null
+++ b/youtube_dl/extractor/callin.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    traverse_obj,
+    try_get,
+)
+
+
+class CallinIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?:[^/#?-]+-)*(?P<id>[^/#?-]+)'
+    _TESTS = [{
+        'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
+        'md5': '14ede27ee2c957b7e4db93140fc0745c',
+        'info_dict': {
+            'id': 'PrumRdSQJW',
+            'ext': 'mp4',
+            'title': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
+            'description': 'Or, why the government doesn’t like SpaceX',
+            'channel': 'The Pull Request',
+            'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
+        }
+    }, {
+        'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
+        'md5': '16f704ddbf82a27e3930533b12062f07',
+        'info_dict': {
+            'id': 'lzxMidUnjA',
+            'ext': 'mp4',
+            'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
+            'description': 'Let’s talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
+            'channel': 'The DEBRIEF With Briahna Joy Gray',
+            'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
+        }
+    }]
+
+    def _search_nextjs_data(self, webpage, video_id, transform_source=None, fatal=True, **kw):
+        return self._parse_json(
+            self._search_regex(
+                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
+                webpage, 'next.js data', fatal=fatal, **kw),
+            video_id, transform_source=transform_source, fatal=fatal)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        next_data = self._search_nextjs_data(webpage, video_id)
+        episode = traverse_obj(next_data, ('props', 'pageProps', 'episode'), expected_type=dict)
+        if not episode:
+            raise ExtractorError('Failed to find episode data')
+
+        title = episode.get('title') or self._og_search_title(webpage)
+        description = episode.get('description') or self._og_search_description(webpage)
+
+        formats = []
+        formats.extend(self._extract_m3u8_formats(
+            episode.get('m3u8'), video_id, 'mp4',
+            entry_protocol='m3u8_native', fatal=False))
+        self._sort_formats(formats)
+
+        channel = try_get(episode, lambda x: x['show']['title'], compat_str)
+        channel_url = try_get(episode, lambda x: x['show']['linkObj']['resourceUrl'], compat_str)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'formats': formats,
+            'channel': channel,
+            'channel_url': channel_url,
+        }
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index cf0388ed2..f7bb4042f 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -158,6 +158,7 @@ from .businessinsider import BusinessInsiderIE
 from .buzzfeed import BuzzFeedIE
 from .byutv import BYUtvIE
 from .c56 import C56IE
+from .callin import CallinIE
 from .camdemy import (
     CamdemyIE,
     CamdemyFolderIE

From 6d829d811932b24be4d3cc8b6c1e0d46c2b1566c Mon Sep 17 00:00:00 2001
From: zhangeric-15 <71106422+zhangeric-15@users.noreply.github.com>
Date: Thu, 2 Feb 2023 12:26:31 -0500
Subject: [PATCH 161/312] [YouTube] Fix not finding videos listed under a
 channel's "shorts" subpage.  (#31409)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resolves #31336

Co-authored-by: Jouni Järvinen <rautamiekka@users.noreply.github.com>
Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/youtube.py | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 3d12e2e4a..28fdb086a 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -315,7 +315,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         title = try_get(
             renderer,
             (lambda x: x['title']['runs'][0]['text'],
-             lambda x: x['title']['simpleText']), compat_str)
+             lambda x: x['title']['simpleText'],
+             lambda x: x['headline']['simpleText']), compat_str)
         description = try_get(
             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
             compat_str)
@@ -2207,6 +2208,24 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
     IE_NAME = 'youtube:tab'
 
     _TESTS = [{
+        # Shorts
+        'url': 'https://www.youtube.com/@SuperCooperShorts/shorts',
+        'playlist_mincount': 5,
+        'info_dict': {
+            'description': 'Short clips from Super Cooper Sundays!',
+            'id': 'UCKMA8kHZ8bPYpnMNaUSxfEQ',
+            'title': 'Super Cooper Shorts - Shorts',
+        }
+    }, {
+        # Channel that does not have a Shorts tab. Test should just download videos on Home tab instead
+        'url': 'https://www.youtube.com/@emergencyawesome/shorts',
+        'info_dict': {
+            'description': 'md5:592c080c06fef4de3c902c4a8eecd850',
+            'id': 'UCDiFRMQWpcp8_KD4vwIVicw',
+            'title': 'Emergency Awesome - Home',
+        },
+        'playlist_mincount': 5,
+    }, {
         # playlists, multipage
         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
         'playlist_mincount': 94,
@@ -2680,7 +2699,11 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 
     def _rich_grid_entries(self, contents):
         for content in contents:
-            video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
+            video_renderer = try_get(
+                content,
+                (lambda x: x['richItemRenderer']['content']['videoRenderer'],
+                 lambda x: x['richItemRenderer']['content']['reelItemRenderer']),
+                dict)
             if video_renderer:
                 entry = self._video_entry(video_renderer)
                 if entry:

From be3392a0d491af81f353b4372d47d589fda54b0c Mon Sep 17 00:00:00 2001
From: Epsilonator <28658223+clueless-skywatcher@users.noreply.github.com>
Date: Thu, 2 Feb 2023 23:03:09 +0530
Subject: [PATCH 162/312] [Blerp] Add new extractor (#31398)

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/blerp.py      | 173 +++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |   1 +
 2 files changed, 174 insertions(+)
 create mode 100644 youtube_dl/extractor/blerp.py

diff --git a/youtube_dl/extractor/blerp.py b/youtube_dl/extractor/blerp.py
new file mode 100644
index 000000000..355daef6e
--- /dev/null
+++ b/youtube_dl/extractor/blerp.py
@@ -0,0 +1,173 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from ..utils import (
+    strip_or_none,
+    traverse_obj,
+)
+from .common import InfoExtractor
+
+
+class BlerpIE(InfoExtractor):
+    IE_NAME = 'blerp'
+    _VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
+    _TESTS = [{
+        'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
+        'info_dict': {
+            'id': '6320fe8745636cb4dd677a5a',
+            'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
+            'uploader': 'luminousaj',
+            'uploader_id': '5fb81e51aa66ae000c395478',
+            'ext': 'mp3',
+            'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
+        }
+    }, {
+        'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
+        'info_dict': {
+            'id': '5bc94ef4796001000498429f',
+            'title': 'Yee',
+            'uploader': '179617322678353920',
+            'uploader_id': '5ba99cf71386730004552c42',
+            'ext': 'mp3',
+            'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
+        }
+    }]
+
+    _GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
+    _GRAPHQL_QUERY = (
+        '''query webBitePageGetBite($_id: MongoID!) {
+            web {
+                biteById(_id: $_id) {
+                    ...bitePageFrag
+                    __typename
+                }
+                __typename
+            }
+        }
+
+        fragment bitePageFrag on Bite {
+            _id
+            title
+            userKeywords
+            keywords
+            color
+            visibility
+            isPremium
+            owned
+            price
+            extraReview
+            isAudioExists
+            image {
+                filename
+                original {
+                    url
+                    __typename
+                }
+                __typename
+            }
+            userReactions {
+                _id
+                reactions
+                createdAt
+                __typename
+            }
+            topReactions
+            totalSaveCount
+            saved
+            blerpLibraryType
+            license
+            licenseMetaData
+            playCount
+            totalShareCount
+            totalFavoriteCount
+            totalAddedToBoardCount
+            userCategory
+            userAudioQuality
+            audioCreationState
+            transcription
+            userTranscription
+            description
+            createdAt
+            updatedAt
+            author
+            listingType
+            ownerObject {
+                _id
+                username
+                profileImage {
+                    filename
+                    original {
+                        url
+                        __typename
+                    }
+                    __typename
+                }
+                __typename
+            }
+            transcription
+            favorited
+            visibility
+            isCurated
+            sourceUrl
+            audienceRating
+            strictAudienceRating
+            ownerId
+            reportObject {
+                reportedContentStatus
+                __typename
+            }
+            giphy {
+                mp4
+                gif
+                __typename
+            }
+            audio {
+                filename
+                original {
+                    url
+                    __typename
+                }
+                mp3 {
+                    url
+                    __typename
+                }
+                __typename
+            }
+            __typename
+        }
+
+        ''')
+
+    def _real_extract(self, url):
+        audio_id = self._match_id(url)
+
+        data = {
+            'operationName': self._GRAPHQL_OPERATIONNAME,
+            'query': self._GRAPHQL_QUERY,
+            'variables': {
+                '_id': audio_id
+            }
+        }
+
+        headers = {
+            'Content-Type': 'application/json'
+        }
+
+        json_result = self._download_json('https://api.blerp.com/graphql',
+                                          audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
+
+        bite_json = json_result['data']['web']['biteById']
+
+        info_dict = {
+            'id': bite_json['_id'],
+            'url': bite_json['audio']['mp3']['url'],
+            'title': bite_json['title'],
+            'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
+            'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
+            'ext': 'mp3',
+            'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
+        }
+
+        return info_dict
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index f7bb4042f..b8db4c818 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -138,6 +138,7 @@ from .bleacherreport import (
     BleacherReportIE,
     BleacherReportCMSIE,
 )
+from .blerp import BlerpIE
 from .bloomberg import BloombergIE
 from .bokecc import BokeCCIE
 from .bongacams import BongaCamsIE

From bc6f94e459694f541a2a1078fad59b02f2fc9d4c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 2 Feb 2023 23:19:03 +0000
Subject: [PATCH 163/312] [FIFA] Back-port extractor from yt-dlp (#31385)

---
 youtube_dl/extractor/extractors.py |   1 +
 youtube_dl/extractor/fifa.py       | 101 +++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+)
 create mode 100644 youtube_dl/extractor/fifa.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index b8db4c818..31a3e588e 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -376,6 +376,7 @@ from .fc2 import (
     FC2EmbedIE,
 )
 from .fczenit import FczenitIE
+from .fifa import FifaIE
 from .filmon import (
     FilmOnIE,
     FilmOnChannelIE,
diff --git a/youtube_dl/extractor/fifa.py b/youtube_dl/extractor/fifa.py
new file mode 100644
index 000000000..15157774e
--- /dev/null
+++ b/youtube_dl/extractor/fifa.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+    int_or_none,
+    traverse_obj,
+    unified_timestamp,
+)
+
+if not callable(getattr(InfoExtractor, '_match_valid_url', None)):
+
+    BaseInfoExtractor = InfoExtractor
+
+    import re
+
+    class InfoExtractor(BaseInfoExtractor):
+
+        @classmethod
+        def _match_valid_url(cls, url):
+            return re.match(cls._VALID_URL, url)
+
+
+class FifaIE(InfoExtractor):
+    _VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
+    _TESTS = [{
+        'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y',
+        'info_dict': {
+            'id': '7on10qPcnyLajDDU3ntg6y',
+            'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay',
+            'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b',
+            'ext': 'mp4',
+            'categories': ['FIFA Tournaments'],
+            'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero',
+            'duration': 8165,
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV',
+        'info_dict': {
+            'id': '1cg5r5Qt6Qt12ilkDgb1sV',
+            'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights',
+            'description': 'md5:d908c74ee66322b804ae2e521b02a855',
+            'ext': 'mp4',
+            'categories': ['FIFA Tournaments', 'Highlights'],
+            'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB',
+            'duration': 902,
+            'release_timestamp': 1404777600,
+            'release_date': '20140708',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp',
+        'info_dict': {
+            'id': '3C6gQH9C2DLwzNx7BMRQdp',
+            'title': 'Josimar goal against Northern Ireland | Classic Goals',
+            'description': 'md5:cbe7e7bb52f603c9f1fe9a4780fe983b',
+            'ext': 'mp4',
+            'categories': ['FIFA Tournaments', 'Goal'],
+            'duration': 28,
+            'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
+
+    def _real_extract(self, url):
+        video_id, locale = self._match_valid_url(url).group('id', 'locale')
+        webpage = self._download_webpage(url, video_id)
+
+        preconnect_link = self._search_regex(
+            r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link')
+
+        video_details = self._download_json(
+            '{preconnect_link}/sections/videoDetails/{video_id}'.format(**locals()), video_id, 'Downloading Video Details', fatal=False)
+
+        preplay_parameters = self._download_json(
+            '{preconnect_link}/videoPlayerData/{video_id}'.format(**locals()), video_id, 'Downloading Preplay Parameters')['preplayParameters']
+
+        content_data = self._download_json(
+            # 1. query string is expected to be sent as-is
+            # 2. `sig` must be appended
+            # 3. if absent, the call appears to work but the manifest is bad (404)
+            'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters),
+            video_id, 'Downloading Content Data')
+
+        # formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)
+        formats, subtitles = self._extract_m3u8_formats(content_data['playURL'], video_id, ext='mp4', entry_protocol='m3u8_native'), None
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_details['title'],
+            'description': video_details.get('description'),
+            'duration': int_or_none(video_details.get('duration')),
+            'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
+            'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
+            'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
+            'formats': formats,
+            'subtitles': subtitles,
+        }

From f316f5d4e391ca40273bce65c67bedc16ae99172 Mon Sep 17 00:00:00 2001
From: afterdelight <39585663+afterdelight@users.noreply.github.com>
Date: Fri, 3 Feb 2023 06:20:14 +0700
Subject: [PATCH 164/312] [xhamster] add support for new domain xhvid.com
 (#31370)

---
 youtube_dl/extractor/xhamster.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index f764021ba..e17947fc6 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -24,7 +24,7 @@ from ..utils import (
 
 
 class XHamsterIE(InfoExtractor):
-    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)'
+    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com|xhvid\.com)'
     _VALID_URL = r'''(?x)
                     https?://
                         (?:.+?\.)?%s/
@@ -123,6 +123,9 @@ class XHamsterIE(InfoExtractor):
     }, {
         'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
         'only_matching': True,
+    }, {
+        'url': 'https://xhvid.com/videos/lk-mm-xhc6wn6',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -433,6 +436,9 @@ class XHamsterUserIE(InfoExtractor):
     }, {
         'url': 'https://xhday.com/users/mobhunter',
         'only_matching': True,
+    }, {
+        'url': 'https://xhvid.com/users/pelushe21',
+        'only_matching': True,
     }]
 
     def _entries(self, user_id):

From 9d17948b5a1cc48bd526b1163292415577131c31 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 2 Feb 2023 23:25:44 +0000
Subject: [PATCH 165/312] [myvideoge] Add new extractor (#31360)

NB download tests on CI servers blocked

Co-authored-by: Alfonso Solbes <fonk666@gmail.com>
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/myvideoge.py  | 87 ++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)
 create mode 100644 youtube_dl/extractor/myvideoge.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 31a3e588e..96b27b179 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -728,6 +728,7 @@ from .myvi import (
     MyviIE,
     MyviEmbedIE,
 )
+from .myvideoge import MyVideoGeIE
 from .myvidster import MyVidsterIE
 from .nationalgeographic import (
     NationalGeographicVideoIE,
diff --git a/youtube_dl/extractor/myvideoge.py b/youtube_dl/extractor/myvideoge.py
new file mode 100644
index 000000000..efbfda7a6
--- /dev/null
+++ b/youtube_dl/extractor/myvideoge.py
@@ -0,0 +1,87 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    get_element_by_id,
+    get_element_by_class,
+    int_or_none,
+    js_to_json,
+    MONTH_NAMES,
+    qualities,
+    unified_strdate,
+)
+
+
+class MyVideoGeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?myvideo\.ge/v/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'https://www.myvideo.ge/v/3941048',
+        'md5': '8c192a7d2b15454ba4f29dc9c9a52ea9',
+        'info_dict': {
+            'id': '3941048',
+            'ext': 'mp4',
+            'title': 'The best prikol',
+            'upload_date': '20200611',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'uploader': 'chixa33',
+            'description': 'md5:5b067801318e33c2e6eea4ab90b1fdd3',
+        },
+        # working from local dev system
+        'skip': 'site blocks CI servers',
+    }
+    _MONTH_NAMES_KA = ['იანვარი', 'თებერვალი', 'მარტი', 'აპრილი', 'მაისი', 'ივნისი', 'ივლისი', 'აგვისტო', 'სექტემბერი', 'ოქტომბერი', 'ნოემბერი', 'დეკემბერი']
+
+    _quality = staticmethod(qualities(('SD', 'HD')))
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = (
+            self._og_search_title(webpage, default=None)
+            or clean_html(get_element_by_class('my_video_title', webpage))
+            or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title\b', webpage, 'title'))
+
+        jwplayer_sources = self._parse_json(
+            self._search_regex(
+                r'''(?s)jwplayer\s*\(\s*['"]mvplayer['"]\s*\)\s*\.\s*setup\s*\(.*?\bsources\s*:\s*(\[.*?])\s*[,});]''', webpage, 'jwplayer sources', fatal=False)
+            or '',
+            video_id, transform_source=js_to_json, fatal=False)
+
+        formats = self._parse_jwplayer_formats(jwplayer_sources or [], video_id)
+        for f in formats or []:
+            f['preference'] = self._quality(f['format_id'])
+        self._sort_formats(formats)
+
+        description = (
+            self._og_search_description(webpage)
+            or get_element_by_id('long_desc_holder', webpage)
+            or self._html_search_meta('description', webpage))
+
+        uploader = self._search_regex(r'<a[^>]+class="mv_user_name"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
+
+        upload_date = get_element_by_class('mv_vid_upl_date', webpage)
+        # as ka locale may not be present roll a local date conversion
+        upload_date = (unified_strdate(
+            # translate any ka month to an en one
+            re.sub('|'.join(self._MONTH_NAMES_KA),
+                   lambda m: MONTH_NAMES['en'][self._MONTH_NAMES_KA.index(m.group(0))],
+                   upload_date, re.I))
+            if upload_date else None)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'uploader': uploader,
+            'formats': formats,
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'upload_date': upload_date,
+            'view_count': int_or_none(get_element_by_class('mv_vid_views', webpage)),
+            'like_count': int_or_none(get_element_by_id('likes_count', webpage)),
+            'dislike_count': int_or_none(get_element_by_id('dislikes_count', webpage)),
+        }

From 384f632e8a9b61e864a26678d85b2b39933b9bae Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 3 Feb 2023 21:10:07 +0000
Subject: [PATCH 166/312] [ITV] Overhaul ITV extractor (#30266)

* support ITVX URLs (thanks Vangelis66)
* support legacy ITV Hub URLs
* include extraction fix 4c57dd2 from sleaux-meaux 3 May 2021
* include extraction fix 6fbcc16, fix by staubichsauger & pukkandan
* work-around duration parsing pending fix to utils.parse_duration
* apply default vanilla UA for pages and media to avoid site blocking
* also detect and report `Episode not found` instead of generic 404
* rework ITVBTCCIE with geo-block detection, best effort geo-restriction handling, news article support
* fix tests
---
 youtube_dl/extractor/itv.py | 382 ++++++++++++++++++++++++++++--------
 1 file changed, 299 insertions(+), 83 deletions(-)

diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py
index e86c40b42..7026139ea 100644
--- a/youtube_dl/extractor/itv.py
+++ b/youtube_dl/extractor/itv.py
@@ -3,123 +3,266 @@ from __future__ import unicode_literals
 
 import json
 import re
+import sys
 
 from .common import InfoExtractor
 from .brightcove import BrightcoveNewIE
+from ..compat import (
+    compat_HTTPError,
+    compat_integer_types,
+    compat_kwargs,
+    compat_urlparse,
+)
 from ..utils import (
     clean_html,
     determine_ext,
+    error_to_compat_str,
     extract_attributes,
-    get_element_by_class,
-    JSON_LD_RE,
+    ExtractorError,
+    get_element_by_attribute,
+    int_or_none,
     merge_dicts,
     parse_duration,
+    parse_iso8601,
+    remove_start,
     smuggle_url,
+    strip_or_none,
+    traverse_obj,
     url_or_none,
+    urljoin,
 )
 
 
-class ITVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
-    _GEO_COUNTRIES = ['GB']
+class ITVBaseIE(InfoExtractor):
+
+    def _search_nextjs_data(self, webpage, video_id, **kw):
+        transform_source = kw.pop('transform_source', None)
+        fatal = kw.pop('fatal', True)
+        return self._parse_json(
+            self._search_regex(
+                r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
+                webpage, 'next.js data', group='js', fatal=fatal, **kw),
+            video_id, transform_source=transform_source, fatal=fatal)
+
+    def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
+        if errnote is False:
+            return False
+        if errnote is None:
+            errnote = 'Unable to download webpage'
+
+        errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
+        if fatal:
+            raise ExtractorError(errmsg, sys.exc_info()[2], cause=err, video_id=video_id)
+        else:
+            self._downloader.report_warning(errmsg)
+            return False
+
+    @staticmethod
+    def _vanilla_ua_header():
+        return {'User-agent': 'Mozilla/5.0'}
+
+    def _download_webpage_handle(self, url, video_id, *args, **kwargs):
+        # specialised to (a) use vanilla UA (b) detect geo-block
+        params = self._downloader.params
+        nkwargs = {}
+        if (
+                'user_agent' not in params
+                and not any(re.match(r'(?i)user-agent\s*:', h)
+                            for h in (params.get('headers') or []))
+                and 'User-agent' not in (kwargs.get('headers') or {})):
+
+            kwargs.setdefault('headers', {})
+            kwargs['headers'] = self._vanilla_ua_header()
+            nkwargs = kwargs
+        if kwargs.get('expected_status') is not None:
+            exp = kwargs['expected_status']
+            if isinstance(exp, compat_integer_types):
+                exp = [exp]
+            if isinstance(exp, (list, tuple)) and 403 not in exp:
+                kwargs['expected_status'] = [403]
+                kwargs['expected_status'].extend(exp)
+                nkwargs = kwargs
+        else:
+            kwargs['expected_status'] = 403
+            nkwargs = kwargs
+
+        if nkwargs:
+            kwargs = compat_kwargs(kwargs)
+
+        ret = super(ITVBaseIE, self)._download_webpage_handle(url, video_id, *args, **kwargs)
+        if ret is False:
+            return ret
+        webpage, urlh = ret
+
+        if urlh.getcode() == 403:
+            # geo-block error is like this, with an unnecessary 'Of':
+            # '{\n  "Message" : "Request Originated Outside Of Allowed Geographic Region",\
+            # \n  "TransactionId" : "oas-magni-475082-xbYF0W"\n}'
+            if '"Request Originated Outside Of Allowed Geographic Region"' in webpage:
+                self.raise_geo_restricted(countries=['GB'])
+            ret = self.__handle_request_webpage_error(
+                compat_HTTPError(urlh.geturl(), 403, 'HTTP Error 403: Forbidden', urlh.headers, urlh),
+                fatal=kwargs.get('fatal'))
+
+        return ret
+
+
+class ITVIE(ITVBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
+    _IE_DESC = 'ITVX'
     _TESTS = [{
+        'note': 'Hub URLs redirect to ITVX',
         'url': 'https://www.itv.com/hub/liar/2a4547a0012',
-        'info_dict': {
-            'id': '2a4547a0012',
-            'ext': 'mp4',
-            'title': 'Liar - Series 2 - Episode 6',
-            'description': 'md5:d0f91536569dec79ea184f0a44cca089',
-            'series': 'Liar',
-            'season_number': 2,
-            'episode_number': 6,
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
+        'only_matching': True,
     }, {
-        # unavailable via data-playlist-url
+        'note': 'Hub page unavailable via data-playlist-url (404 now)',
         'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
         'only_matching': True,
     }, {
-        # InvalidVodcrid
+        'note': 'Hub page with InvalidVodcrid (404 now)',
         'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034',
         'only_matching': True,
     }, {
-        # ContentUnavailable
+        'note': 'Hub page with ContentUnavailable (404 now)',
         'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024',
         'only_matching': True,
-    }]
+    }, {
+        'note': 'ITVX, or itvX, show',
+        'url': 'https://www.itv.com/watch/vera/1a7314/1a7314a0014',
+        'md5': 'bd0ad666b2c058fffe7d036785880064',
+        'info_dict': {
+            'id': '1a7314a0014',
+            'ext': 'mp4',
+            'title': 'Vera - Series 3 - Episode 4 - Prodigal Son',
+            'description': 'Vera and her team investigate the fatal stabbing of an ex-Met police officer outside a busy Newcastle nightclub - but there aren\'t many clues.',
+            'timestamp': 1653591600,
+            'upload_date': '20220526',
+            'uploader': 'ITVX',
+            'thumbnail': r're:https://\w+\.itv\.com/images/(?:\w+/)+\d+x\d+\?',
+            'duration': 5340.8,
+            'age_limit': 16,
+            'series': 'Vera',
+            'series_number': 3,
+            'episode': 'Prodigal Son',
+            'episode_number': 4,
+            'channel': 'ITV3',
+            'categories': list,
+        },
+        'params': {
+            # m3u8 download
+            # 'skip_download': True,
+        },
+        'skip': 'only available in UK',
+    }, {
+        'note': 'Latest ITV news bulletin: details change daily',
+        'url': 'https://www.itv.com/watch/news/varies-but-is-not-checked/6js5d0f',
+        'info_dict': {
+            'id': '6js5d0f',
+            'ext': 'mp4',
+            'title': r're:The latest ITV News headlines - \S.+',
+            'description': r'''re:.* today's top stories from the ITV News team.$''',
+            'timestamp': int,
+            'upload_date': r're:2\d\d\d(?:0[1-9]|1[0-2])(?:[012][1-9]|3[01])',
+            'uploader': 'ITVX',
+            'thumbnail': r're:https://images\.ctfassets\.net/(?:\w+/)+[\w.]+\.(?:jpg|png)',
+            'duration': float,
+            'age_limit': None,
+        },
+        'params': {
+            # variable download
+            # 'skip_download': True,
+        },
+        'skip': 'only available in UK',
+    }
+    ]
+
+    def _og_extract(self, webpage, require_title=False):
+        return {
+            'title': self._og_search_title(webpage, fatal=require_title),
+            'description': self._og_search_description(webpage, default=None),
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
+            'uploader': self._og_search_property('site_name', webpage, default=None),
+        }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        params = extract_attributes(self._search_regex(
-            r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
 
-        ios_playlist_url = params.get('data-video-playlist') or params['data-video-id']
-        hmac = params['data-video-hmac']
+        webpage = self._download_webpage(url, video_id)
+
+        # now quite different params!
+        params = extract_attributes(self._search_regex(
+            r'''(<[^>]+\b(?:class|data-testid)\s*=\s*("|')genie-container\2[^>]*>)''',
+            webpage, 'params'))
+
+        ios_playlist_url = traverse_obj(
+            params, 'data-video-id', 'data-video-playlist',
+            get_all=False, expected_type=url_or_none)
+
         headers = self.geo_verification_headers()
         headers.update({
             'Accept': 'application/vnd.itv.vod.playlist.v2+json',
             'Content-Type': 'application/json',
-            'hmac': hmac.upper(),
         })
         ios_playlist = self._download_json(
             ios_playlist_url, video_id, data=json.dumps({
                 'user': {
-                    'itvUserId': '',
                     'entitlements': [],
-                    'token': ''
                 },
                 'device': {
-                    'manufacturer': 'Safari',
-                    'model': '5',
+                    'manufacturer': 'Mobile Safari',
+                    'model': '5.1',
                     'os': {
-                        'name': 'Windows NT',
-                        'version': '6.1',
-                        'type': 'desktop'
+                        'name': 'iOS',
+                        'version': '5.0',
+                        'type': ' mobile'
                     }
                 },
                 'client': {
                     'version': '4.1',
-                    'id': 'browser'
+                    'id': 'browser',
+                    'supportsAdPods': True,
+                    'service': 'itv.x',
+                    'appversion': '2.43.28',
                 },
                 'variantAvailability': {
+                    'player': 'hls',
                     'featureset': {
                         'min': ['hls', 'aes', 'outband-webvtt'],
                         'max': ['hls', 'aes', 'outband-webvtt']
                     },
-                    'platformTag': 'dotcom'
+                    'platformTag': 'mobile'
                 }
             }).encode(), headers=headers)
         video_data = ios_playlist['Playlist']['Video']
-        ios_base_url = video_data.get('Base')
+        ios_base_url = traverse_obj(video_data, 'Base', expected_type=url_or_none)
+
+        media_url = (
+            (lambda u: url_or_none(urljoin(ios_base_url, u)))
+            if ios_base_url else url_or_none)
 
         formats = []
-        for media_file in (video_data.get('MediaFiles') or []):
-            href = media_file.get('Href')
+        for media_file in traverse_obj(video_data, 'MediaFiles', expected_type=list) or []:
+            href = traverse_obj(media_file, 'Href', expected_type=media_url)
             if not href:
                 continue
-            if ios_base_url:
-                href = ios_base_url + href
             ext = determine_ext(href)
             if ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
-                    href, video_id, 'mp4', entry_protocol='m3u8_native',
+                    href, video_id, 'mp4', entry_protocol='m3u8',
                     m3u8_id='hls', fatal=False))
+
             else:
                 formats.append({
                     'url': href,
                 })
         self._sort_formats(formats)
+        for f in formats:
+            f.setdefault('http_headers', {})
+            f['http_headers'].update(self._vanilla_ua_header())
 
         subtitles = {}
-        subs = video_data.get('Subtitles') or []
-        for sub in subs:
-            if not isinstance(sub, dict):
-                continue
-            href = url_or_none(sub.get('Href'))
+        for sub in traverse_obj(video_data, 'Subtitles', expected_type=list) or []:
+            href = traverse_obj(sub, 'Href', expected_type=url_or_none)
             if not href:
                 continue
             subtitles.setdefault('en', []).append({
@@ -127,59 +270,132 @@ class ITVIE(InfoExtractor):
                 'ext': determine_ext(href, 'vtt'),
             })
 
-        info = self._search_json_ld(webpage, video_id, default={})
-        if not info:
-            json_ld = self._parse_json(self._search_regex(
-                JSON_LD_RE, webpage, 'JSON-LD', '{}',
-                group='json_ld'), video_id, fatal=False)
-            if json_ld and json_ld.get('@type') == 'BreadcrumbList':
-                for ile in (json_ld.get('itemListElement:') or []):
-                    item = ile.get('item:') or {}
-                    if item.get('@type') == 'TVEpisode':
-                        item['@context'] = 'http://schema.org'
-                        info = self._json_ld(item, video_id, fatal=False) or {}
-                        break
+        next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}')
+        video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
+        title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
+        info = self._og_extract(webpage, require_title=not title)
+        tn = info.pop('thumbnail', None)
+        if tn:
+            info['thumbnails'] = [{'url': tn}]
+
+        # num. episode title
+        num_ep_title = video_data.get('numberedEpisodeTitle')
+        if not num_ep_title:
+            num_ep_title = clean_html(get_element_by_attribute('data-testid', 'episode-hero-description-strong', webpage))
+            num_ep_title = num_ep_title and num_ep_title.rstrip(' -')
+        ep_title = strip_or_none(
+            video_data.get('episodeTitle')
+            or (num_ep_title.split('.', 1)[-1] if num_ep_title else None))
+        title = title or re.sub(r'\s+-\s+ITVX$', '', info['title'])
+        if ep_title and ep_title != title:
+            title = title + ' - ' + ep_title
+
+        def get_thumbnails():
+            tns = []
+            for w, x in (traverse_obj(video_data, ('imagePresets'), expected_type=dict) or {}).items():
+                if isinstance(x, dict):
+                    for y, z in x.items():
+                        tns.append({'id': w + '_' + y, 'url': z})
+            return tns or None
+
+        video_str = lambda *x: traverse_obj(
+            video_data, *x, get_all=False, expected_type=strip_or_none)
 
         return merge_dicts({
             'id': video_id,
-            'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
+            'title': title,
             'formats': formats,
             'subtitles': subtitles,
-            'duration': parse_duration(video_data.get('Duration')),
-            'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)),
+            # parsing hh:mm:ss:nnn not yet patched
+            'duration': parse_duration(re.sub(r'(\d{2})(:)(\d{3}$)', r'\1.\3', video_data.get('Duration') or '')),
+            'description': video_str('synopsis'),
+            'timestamp': traverse_obj(video_data, 'broadcastDateTime', 'dateTime', expected_type=parse_iso8601),
+            'thumbnails': get_thumbnails(),
+            'series': video_str('showTitle', 'programmeTitle'),
+            'series_number': int_or_none(video_data.get('seriesNumber')),
+            'episode': ep_title,
+            'episode_number': int_or_none((num_ep_title or '').split('.')[0]),
+            'channel': video_str('channel'),
+            'categories': traverse_obj(video_data, ('categories', 'formatted'), expected_type=list),
+            'age_limit': {False: 16, True: 0}.get(video_data.get('isChildrenCategory')),
         }, info)
 
 
-class ITVBTCCIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-    _TEST = {
-        'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
+class ITVBTCCIE(ITVBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
+    _IE_DESC = 'ITV articles: News, British Touring Car Championship'
+    _TESTS = [{
+        'note': 'British Touring Car Championship',
+        'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',
         'info_dict': {
             'id': 'btcc-2018-all-the-action-from-brands-hatch',
             'title': 'BTCC 2018: All the action from Brands Hatch',
         },
         'playlist_mincount': 9,
-    }
-    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
+    }, {
+        'note': 'redirects to /btcc/articles/...',
+        'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
+        'only_matching': True,
+    }, {
+        'note': 'news article',
+        'url': 'https://www.itv.com/news/wales/2020-07-23/sean-fletcher-shows-off-wales-coastline-in-new-itv-series-as-british-tourists-opt-for-staycations',
+        'info_dict': {
+            'id': 'sean-fletcher-shows-off-wales-coastline-in-new-itv-series-as-british-tourists-opt-for-staycations',
+            'title': '''Sean Fletcher on why Wales' coastline should be your 'staycation' destination | ITV News''',
+        },
+        'playlist_mincount': 1,
+    }]
+
+    # should really be a class var of the BC IE
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
+    BRIGHTCOVE_ACCOUNT = '1582188683001'
+    BRIGHTCOVE_PLAYER = 'HkiHLnNRx'
 
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, playlist_id)
+        webpage, urlh = self._download_webpage_handle(url, playlist_id)
+        link = compat_urlparse.urlparse(urlh.geturl()).path.strip('/')
 
-        entries = [
-            self.url_result(
-                smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
-                    # ITV does not like some GB IP ranges, so here are some
-                    # IP blocks it accepts
-                    'geo_ip_blocks': [
-                        '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
-                    ],
-                    'referrer': url,
-                }),
-                ie=BrightcoveNewIE.ie_key(), video_id=video_id)
-            for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)]
+        next_data = self._search_nextjs_data(webpage, playlist_id, fatal=False, default='{}')
+        path_prefix = compat_urlparse.urlparse(next_data.get('assetPrefix') or '').path.strip('/')
+        link = remove_start(link, path_prefix).strip('/')
+
+        content = traverse_obj(
+            next_data, ('props', 'pageProps', Ellipsis),
+            expected_type=lambda x: x if x['link'] == link else None,
+            get_all=False, default={})
+        content = traverse_obj(
+            content, ('body', 'content', Ellipsis, 'data'),
+            expected_type=lambda x: x if x.get('name') == 'Brightcove' or x.get('type') == 'Brightcove' else None)
+
+        contraband = {
+            # ITV does not like some GB IP ranges, so here are some
+            # IP blocks it accepts
+            'geo_ip_blocks': [
+                '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
+            ],
+            'referrer': urlh.geturl(),
+        }
+
+        def entries():
+
+            for data in content or []:
+                video_id = data.get('id')
+                if not video_id:
+                    continue
+                account = data.get('accountId') or self.BRIGHTCOVE_ACCOUNT
+                player = data.get('playerId') or self.BRIGHTCOVE_PLAYER
+                yield self.url_result(
+                    smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account, player, video_id), contraband),
+                    ie=BrightcoveNewIE.ie_key(), video_id=video_id)
+
+            # obsolete ?
+            for video_id in re.findall(r'''data-video-id=["'](\d+)''', webpage):
+                yield self.url_result(
+                    smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (self.BRIGHTCOVE_ACCOUNT, self.BRIGHTCOVE_PLAYER, video_id), contraband),
+                    ie=BrightcoveNewIE.ie_key(), video_id=video_id)
 
         title = self._og_search_title(webpage, fatal=False)
 
-        return self.playlist_result(entries, playlist_id, title)
+        return self.playlist_result(entries(), playlist_id, title)

From d947ffe8e385a541f44c6125b4cbc269de6055a4 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 4 Feb 2023 00:19:48 +0000
Subject: [PATCH 167/312] [IGN] Overhaul extractor to avoid URL redirection
 loop

Consequently/also:
* centralise video data extraction
* detect 404 and 503 expected errors
* handle the test video in IGNVideo
* handle two additional page formats for the tests in IGNArticle
---
 youtube_dl/extractor/ign.py | 347 ++++++++++++++++++++++++++----------
 1 file changed, 252 insertions(+), 95 deletions(-)

diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py
index 0d9f50ed2..c7daa30e5 100644
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@@ -1,19 +1,29 @@
+# coding: utf-8
+
 from __future__ import unicode_literals
 
 import re
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_filter as filter,
+    compat_HTTPError,
     compat_parse_qs,
-    compat_urllib_parse_urlparse,
+    compat_urlparse,
 )
 from ..utils import (
-    HEADRequest,
     determine_ext,
+    error_to_compat_str,
+    extract_attributes,
+    ExtractorError,
     int_or_none,
+    merge_dicts,
+    orderedSet,
     parse_iso8601,
     strip_or_none,
-    try_get,
+    traverse_obj,
+    url_or_none,
+    urljoin,
 )
 
 
@@ -22,14 +32,102 @@ class IGNBaseIE(InfoExtractor):
         return self._download_json(
             'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
 
+    def _checked_call_api(self, slug):
+        try:
+            return self._call_api(slug)
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+                e.cause.args = e.cause.args or [
+                    e.cause.geturl(), e.cause.getcode(), e.cause.reason]
+                raise ExtractorError(
+                    'Content not found: expired?', cause=e.cause,
+                    expected=True)
+            raise
+
+    def _extract_video_info(self, video, fatal=True):
+        video_id = video['videoId']
+
+        formats = []
+        refs = traverse_obj(video, 'refs', expected_type=dict) or {}
+
+        m3u8_url = url_or_none(refs.get('m3uUrl'))
+        if m3u8_url:
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, video_id, 'mp4', 'm3u8_native',
+                m3u8_id='hls', fatal=False))
+
+        f4m_url = url_or_none(refs.get('f4mUrl'))
+        if f4m_url:
+            formats.extend(self._extract_f4m_formats(
+                f4m_url, video_id, f4m_id='hds', fatal=False))
+
+        for asset in (video.get('assets') or []):
+            asset_url = url_or_none(asset.get('url'))
+            if not asset_url:
+                continue
+            formats.append({
+                'url': asset_url,
+                'tbr': int_or_none(asset.get('bitrate'), 1000),
+                'fps': int_or_none(asset.get('frame_rate')),
+                'height': int_or_none(asset.get('height')),
+                'width': int_or_none(asset.get('width')),
+            })
+
+        mezzanine_url = traverse_obj(
+            video, ('system', 'mezzanineUrl'), expected_type=url_or_none)
+        if mezzanine_url:
+            formats.append({
+                'ext': determine_ext(mezzanine_url, 'mp4'),
+                'format_id': 'mezzanine',
+                'preference': 1,
+                'url': mezzanine_url,
+            })
+
+        if formats or fatal:
+            self._sort_formats(formats)
+        else:
+            return
+
+        thumbnails = traverse_obj(
+            video, ('thumbnails', Ellipsis, {'url': 'url'}), expected_type=url_or_none)
+        tags = traverse_obj(
+            video, ('tags', Ellipsis, 'displayName'),
+            expected_type=lambda x: x.strip() or None)
+
+        metadata = traverse_obj(video, 'metadata', expected_type=dict) or {}
+        title = traverse_obj(
+            metadata, 'longTitle', 'title', 'name',
+            expected_type=lambda x: x.strip() or None)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': strip_or_none(metadata.get('description')),
+            'timestamp': parse_iso8601(metadata.get('publishDate')),
+            'duration': int_or_none(metadata.get('duration')),
+            'thumbnails': thumbnails,
+            'formats': formats,
+            'tags': tags,
+        }
+
+    # yt-dlp shim
+    @classmethod
+    def _extract_from_webpage(cls, url, webpage):
+        for embed_url in orderedSet(
+                cls._extract_embed_urls(url, webpage) or [], lazy=True):
+            yield cls.url_result(embed_url, None if cls._VALID_URL is False else cls)
+
 
 class IGNIE(IGNBaseIE):
     """
     Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
     Some videos of it.ign.com are also supported
     """
-
-    _VALID_URL = r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[^/?&#]+)'
+    _VIDEO_PATH_RE = r'/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>.+?)'
+    _PLAYLIST_PATH_RE = r'(?:/?\?(?P<filt>[^&#]+))?'
+    _VALID_URL = (
+        r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos(?:%s)'
+        % '|'.join((_VIDEO_PATH_RE + r'(?:[/?&#]|$)', _PLAYLIST_PATH_RE)))
     IE_NAME = 'ign.com'
     _PAGE_TYPE = 'video'
 
@@ -44,7 +142,10 @@ class IGNIE(IGNBaseIE):
             'timestamp': 1370440800,
             'upload_date': '20130605',
             'tags': 'count:9',
-        }
+        },
+        'params': {
+            'nocheckcertificate': True,
+        },
     }, {
         'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
         'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
@@ -56,86 +157,51 @@ class IGNIE(IGNBaseIE):
             'timestamp': 1420571160,
             'upload_date': '20150106',
             'tags': 'count:4',
-        }
+        },
+        'skip': '404 Not Found',
     }, {
         'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
         'only_matching': True,
     }]
 
+    @classmethod
+    def _extract_embed_urls(cls, url, webpage):
+        grids = re.findall(
+            r'''(?s)<section\b[^>]+\bclass\s*=\s*['"](?:[\w-]+\s+)*?content-feed-grid(?!\B|-)[^>]+>(.+?)</section[^>]*>''',
+            webpage)
+        return filter(None,
+                      (urljoin(url, m.group('path')) for m in re.finditer(
+                          r'''<a\b[^>]+\bhref\s*=\s*('|")(?P<path>/videos%s)\1'''
+                          % cls._VIDEO_PATH_RE, grids[0] if grids else '')))
+
     def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        display_id = m.group('id')
+        if display_id:
+            return self._extract_video(url, display_id)
+        display_id = m.group('filt') or 'all'
+        return self._extract_playlist(url, display_id)
+
+    def _extract_playlist(self, url, display_id):
+        webpage = self._download_webpage(url, display_id)
+
+        return self.playlist_result(
+            (self.url_result(u, ie=self.ie_key())
+             for u in self._extract_embed_urls(url, webpage)),
+            playlist_id=display_id)
+
+    def _extract_video(self, url, display_id):
         display_id = self._match_id(url)
-        video = self._call_api(display_id)
-        video_id = video['videoId']
-        metadata = video['metadata']
-        title = metadata.get('longTitle') or metadata.get('title') or metadata['name']
+        video = self._checked_call_api(display_id)
 
-        formats = []
-        refs = video.get('refs') or {}
+        info = self._extract_video_info(video)
 
-        m3u8_url = refs.get('m3uUrl')
-        if m3u8_url:
-            formats.extend(self._extract_m3u8_formats(
-                m3u8_url, video_id, 'mp4', 'm3u8_native',
-                m3u8_id='hls', fatal=False))
-
-        f4m_url = refs.get('f4mUrl')
-        if f4m_url:
-            formats.extend(self._extract_f4m_formats(
-                f4m_url, video_id, f4m_id='hds', fatal=False))
-
-        for asset in (video.get('assets') or []):
-            asset_url = asset.get('url')
-            if not asset_url:
-                continue
-            formats.append({
-                'url': asset_url,
-                'tbr': int_or_none(asset.get('bitrate'), 1000),
-                'fps': int_or_none(asset.get('frame_rate')),
-                'height': int_or_none(asset.get('height')),
-                'width': int_or_none(asset.get('width')),
-            })
-
-        mezzanine_url = try_get(video, lambda x: x['system']['mezzanineUrl'])
-        if mezzanine_url:
-            formats.append({
-                'ext': determine_ext(mezzanine_url, 'mp4'),
-                'format_id': 'mezzanine',
-                'preference': 1,
-                'url': mezzanine_url,
-            })
-
-        self._sort_formats(formats)
-
-        thumbnails = []
-        for thumbnail in (video.get('thumbnails') or []):
-            thumbnail_url = thumbnail.get('url')
-            if not thumbnail_url:
-                continue
-            thumbnails.append({
-                'url': thumbnail_url,
-            })
-
-        tags = []
-        for tag in (video.get('tags') or []):
-            display_name = tag.get('displayName')
-            if not display_name:
-                continue
-            tags.append(display_name)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': strip_or_none(metadata.get('description')),
-            'timestamp': parse_iso8601(metadata.get('publishDate')),
-            'duration': int_or_none(metadata.get('duration')),
+        return merge_dicts({
             'display_id': display_id,
-            'thumbnails': thumbnails,
-            'formats': formats,
-            'tags': tags,
-        }
+        }, info)
 
 
-class IGNVideoIE(InfoExtractor):
+class IGNVideoIE(IGNBaseIE):
     _VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
     _TESTS = [{
         'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
@@ -147,7 +213,8 @@ class IGNVideoIE(InfoExtractor):
             'description': 'Taking out assassination targets in Hitman has never been more stylish.',
             'timestamp': 1444665600,
             'upload_date': '20151012',
-        }
+        },
+        'expected_warnings': ['HTTP Error 400: Bad Request'],
     }, {
         'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
         'only_matching': True,
@@ -167,22 +234,38 @@ class IGNVideoIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        req = HEADRequest(url.rsplit('/', 1)[0] + '/embed')
-        url = self._request_webpage(req, video_id).geturl()
+        parsed_url = compat_urlparse.urlparse(url)
+        embed_url = compat_urlparse.urlunparse(
+            parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed'))
+
+        webpage, urlh = self._download_webpage_handle(embed_url, video_id)
+        new_url = urlh.geturl()
         ign_url = compat_parse_qs(
-            compat_urllib_parse_urlparse(url).query).get('url', [None])[0]
+            compat_urlparse.urlparse(new_url).query).get('url', [None])[-1]
         if ign_url:
             return self.url_result(ign_url, IGNIE.ie_key())
-        return self.url_result(url)
+        video = self._search_regex(r'(<div\b[^>]+\bdata-video-id\s*=\s*[^>]+>)', webpage, 'video element', fatal=False)
+        if not video:
+            if new_url == url:
+                raise ExtractorError('Redirect loop: ' + url)
+            return self.url_result(new_url)
+        video = extract_attributes(video)
+        video_data = video.get('data-settings') or '{}'
+        video_data = self._parse_json(video_data, video_id)['video']
+        info = self._extract_video_info(video_data)
+
+        return merge_dicts({
+            'display_id': video_id,
+        }, info)
 
 
 class IGNArticleIE(IGNBaseIE):
-    _VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?feature/\d+)/(?P<id>[^/?&#]+)'
+    _VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?(?:[\w-]+/)*?feature/\d+)/(?P<id>[^/?&#]+)'
     _PAGE_TYPE = 'article'
     _TESTS = [{
         'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
         'info_dict': {
-            'id': '524497489e4e8ff5848ece34',
+            'id': '72113',
             'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
         },
         'playlist': [
@@ -190,7 +273,7 @@ class IGNArticleIE(IGNBaseIE):
                 'info_dict': {
                     'id': '5ebbd138523268b93c9141af17bec937',
                     'ext': 'mp4',
-                    'title': 'GTA 5 Video Review',
+                    'title': 'Grand Theft Auto V Video Review',
                     'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
                     'timestamp': 1379339880,
                     'upload_date': '20130916',
@@ -200,7 +283,7 @@ class IGNArticleIE(IGNBaseIE):
                 'info_dict': {
                     'id': '638672ee848ae4ff108df2a296418ee2',
                     'ext': 'mp4',
-                    'title': '26 Twisted Moments from GTA 5 in Slow Motion',
+                    'title': 'GTA 5 In Slow Motion',
                     'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
                     'timestamp': 1386878820,
                     'upload_date': '20131212',
@@ -208,16 +291,17 @@ class IGNArticleIE(IGNBaseIE):
             },
         ],
         'params': {
-            'playlist_items': '2-3',
             'skip_download': True,
         },
+        'expected_warnings': ['Backend fetch failed'],
     }, {
         'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
         'info_dict': {
             'id': '53ee806780a81ec46e0790f8',
             'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
         },
-        'playlist_count': 2,
+        'playlist_count': 1,
+        'expected_warnings': ['Backend fetch failed'],
     }, {
         # videoId pattern
         'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
@@ -240,18 +324,91 @@ class IGNArticleIE(IGNBaseIE):
         'only_matching': True,
     }]
 
+    def _checked_call_api(self, slug):
+        try:
+            return self._call_api(slug)
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError):
+                e.cause.args = e.cause.args or [
+                    e.cause.geturl(), e.cause.getcode(), e.cause.reason]
+                if e.cause.code == 404:
+                    raise ExtractorError(
+                        'Content not found: expired?', cause=e.cause,
+                        expected=True)
+                elif e.cause.code == 503:
+                    self.report_warning(error_to_compat_str(e.cause))
+                    return
+            raise
+
+    def _search_nextjs_data(self, webpage, video_id, **kw):
+        return self._parse_json(
+            self._search_regex(
+                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
+                webpage, 'next.js data', **kw),
+            video_id, **kw)
+
     def _real_extract(self, url):
         display_id = self._match_id(url)
-        article = self._call_api(display_id)
+        article = self._checked_call_api(display_id)
 
-        def entries():
-            media_url = try_get(article, lambda x: x['mediaRelations'][0]['media']['metadata']['url'])
-            if media_url:
-                yield self.url_result(media_url, IGNIE.ie_key())
-            for content in (article.get('content') or []):
-                for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
-                    yield self.url_result(video_url)
+        if article:
+            # obsolete ?
+            def entries():
+                media_url = traverse_obj(
+                    article, ('mediaRelations', 0, 'media', 'metadata', 'url'),
+                    expected_type=url_or_none)
+                if media_url:
+                    yield self.url_result(media_url, IGNIE.ie_key())
+                for content in (article.get('content') or []):
+                    for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
+                        if url_or_none(video_url):
+                            yield self.url_result(video_url)
+
+            return self.playlist_result(
+                entries(), article.get('articleId'),
+                traverse_obj(
+                    article, ('metadata', 'headline'),
+                    expected_type=lambda x: x.strip() or None))
+
+        webpage = self._download_webpage(url, display_id)
+
+        playlist_id = self._html_search_meta('dable:item_id', webpage, default=None)
+        if playlist_id:
+
+            def entries():
+                for m in re.finditer(
+                        r'''(?s)<object\b[^>]+\bclass\s*=\s*("|')ign-videoplayer\1[^>]*>(?P<params>.+?)</object''',
+                        webpage):
+                    flashvars = self._search_regex(
+                        r'''(<param\b[^>]+\bname\s*=\s*("|')flashvars\2[^>]*>)''',
+                        m.group('params'), 'flashvars', default='')
+                    flashvars = compat_parse_qs(extract_attributes(flashvars).get('value') or '')
+                    v_url = url_or_none((flashvars.get('url') or [None])[-1])
+                    if v_url:
+                        yield self.url_result(v_url)
+        else:
+            playlist_id = self._search_regex(
+                r'''\bdata-post-id\s*=\s*("|')(?P<id>[\da-f]+)\1''',
+                webpage, 'id', group='id', default=None)
+
+            nextjs_data = self._search_nextjs_data(webpage, display_id)
+
+            def entries():
+                for player in traverse_obj(
+                        nextjs_data,
+                        ('props', 'apolloState', 'ROOT_QUERY', lambda k, _: k.startswith('videoPlayerProps('), '__ref')):
+                    # skip promo links (which may not always be served, eg GH CI servers)
+                    if traverse_obj(nextjs_data,
+                                    ('props', 'apolloState', player.replace('PlayerProps', 'ModernContent')),
+                                    expected_type=dict):
+                        continue
+                    video = traverse_obj(nextjs_data, ('props', 'apolloState', player), expected_type=dict) or {}
+                    info = self._extract_video_info(video, fatal=False)
+                    if info:
+                        yield merge_dicts({
+                            'display_id': display_id,
+                        }, info)
 
         return self.playlist_result(
-            entries(), article.get('articleId'),
-            strip_or_none(try_get(article, lambda x: x['metadata']['headline'])))
+            entries(), playlist_id or display_id,
+            re.sub(r'\s+-\s+IGN\s*$', '', self._og_search_title(webpage, default='')) or None)

From cd987e6fca336cf6570b4938442c23cd0bdf7256 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 4 Feb 2023 01:53:47 +0000
Subject: [PATCH 168/312] [jsinterp] Nits

---
 youtube_dl/jsinterp.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 1e7b342ac..60fa2b1b9 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -201,7 +201,7 @@ class JSInterpreter(object):
         def __init__(self, msg, *args, **kwargs):
             expr = kwargs.pop('expr', None)
             if expr is not None:
-                msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
+                msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
             super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
 
     class JS_RegExp(object):
@@ -699,7 +699,7 @@ class JSInterpreter(object):
                 """ assert, but without risk of getting optimized out """
                 if not cndn:
                     memb = member
-                    raise self.Exception('{member} {msg}'.format(**locals()), expr=expr)
+                    raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr)
 
             def eval_method():
                 if (variable, member) == ('console', 'debug'):

From f2f90887ca7a452dfafa7ca221fe981a4ec56707 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 4 Feb 2023 00:21:35 +0000
Subject: [PATCH 169/312] [Vimeo] Fix `Unable to extract info section` redux *
 as reported in yt-dlp/yt-dlp#6149 * also allow newline in target JSON object

---
 youtube_dl/extractor/vimeo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 853b38402..14f8dd034 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -663,7 +663,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
 
         if '//player.vimeo.com/video/' in url:
             config = self._parse_json(self._search_regex(
-                r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
+                r'(?s)\b(?:playerC|c)onfig\s*=\s*({.+?})\s*[;\n]', webpage, 'info section'), video_id)
             if config.get('view') == 4:
                 config = self._verify_player_video_password(
                     redirect_url, video_id, headers)

From e19ec5232216fd801ded88728df5b50bfb05c1cc Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 11 Feb 2023 03:25:14 +0000
Subject: [PATCH 170/312] [Vimeo] Support /user{video_id}/{slug} URL format

---
 youtube_dl/extractor/vimeo.py | 55 +++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 22 deletions(-)

diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 14f8dd034..7f2731d83 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -261,27 +261,33 @@ class VimeoIE(VimeoBaseInfoExtractor):
 
     # _VALID_URL matches Vimeo URLs
     _VALID_URL = r'''(?x)
-                    https?://
-                        (?:
-                            (?:
-                                www|
-                                player
-                            )
-                            \.
-                        )?
-                        vimeo(?:pro)?\.com/
-                        (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
-                        (?:.*?/)??
-                        (?:
-                            (?:
-                                play_redirect_hls|
-                                moogaloop\.swf)\?clip_id=
-                            )?
-                        (?:videos?/)?
-                        (?P<id>[0-9]+)
-                        (?:/(?P<unlisted_hash>[\da-f]{10}))?
-                        /?(?:[?&].*)?(?:[#].*)?$
-                    '''
+                     https?://
+                         (?:
+                             (?:
+                                 www|
+                                 player
+                             )
+                             \.
+                         )?
+                         vimeo(?:pro)?\.com/
+                         (?:
+                             (?P<u>user)|
+                             (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
+                             (?:.*?/)??
+                             (?P<q>
+                                 (?:
+                                     play_redirect_hls|
+                                     moogaloop\.swf)\?clip_id=
+                             )?
+                             (?:videos?/)?
+                         )
+                         (?P<id>[0-9]+)
+                         (?(u)
+                             /(?!videos|likes)[^/?#]+/?|
+                             (?(q)|/(?P<unlisted_hash>[\da-f]{10}))?
+                         )
+                         (?:(?(q)[&]|(?(u)|/?)[?]).*?)?(?:[#].*)?$
+                 '''
     IE_NAME = 'vimeo'
     _TESTS = [
         {
@@ -539,7 +545,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
             'params': {
                 'skip_download': True,
             },
-        }
+        },
+        {
+            # user playlist alias -> https://vimeo.com/258705797
+            'url': 'https://vimeo.com/user26785108/newspiritualguide',
+            'only_matching': True,
+        },
         # https://gettingthingsdone.com/workflowmap/
         # vimeo embed with check-password page protected by Referer header
     ]

From 58988c1421b88875a33015b08e4d2ada43021e09 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 31 Jan 2022 04:28:54 +0000
Subject: [PATCH 171/312] [YouTube] Bypass age-gating for certain restricted
 videos

* Use TVHTML5_SIMPLY_EMBEDDED_PLAYER client

* Also add and fix tests

* Introduce and use new utility function `update_url()`
---
 youtube_dl/extractor/youtube.py | 202 +++++++++++++++++++++++++-------
 youtube_dl/utils.py             |  11 ++
 2 files changed, 168 insertions(+), 45 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 28fdb086a..65428528d 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -42,6 +42,7 @@ from ..utils import (
     unescapeHTML,
     unified_strdate,
     unsmuggle_url,
+    update_url,
     update_url_query,
     url_or_none,
     urlencode_postdata,
@@ -286,15 +287,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 
-    def _call_api(self, ep, query, video_id, fatal=True):
+    def _call_api(self, ep, query, video_id, fatal=True, headers=None):
         data = self._DEFAULT_API_DATA.copy()
         data.update(query)
+        real_headers = {'content-type': 'application/json'}
+        if headers:
+            real_headers.update(headers)
 
         return self._download_json(
             'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
             note='Downloading API JSON', errnote='Unable to download API page',
             data=json.dumps(data).encode('utf8'), fatal=fatal,
-            headers={'content-type': 'application/json'},
+            headers=real_headers,
             query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
 
     def _extract_yt_initial_data(self, video_id, webpage):
@@ -515,6 +519,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'Philipp Hagemeister',
                 'uploader_id': 'phihag',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
+                'channel': 'Philipp Hagemeister',
                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
                 'upload_date': '20121002',
@@ -524,10 +529,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'duration': 10,
                 'view_count': int,
                 'like_count': int,
-                'dislike_count': int,
+                'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
                 'start_time': 1,
                 'end_time': 9,
-            }
+            },
         },
         {
             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
@@ -562,7 +567,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'duration': 10,
                 'view_count': int,
                 'like_count': int,
-                'dislike_count': int,
             },
             'params': {
                 'skip_download': True,
@@ -621,8 +625,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
             }
         },
-        # Normal age-gate video (No vevo, embed allowed), available via embed page
+        # Age-gated videos
         {
+            'note': 'Age-gated video (No vevo, embed allowed)',
             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
             'info_dict': {
                 'id': 'HtVdAasjOgU',
@@ -631,17 +636,97 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
                 'duration': 142,
                 'uploader': 'The Witcher',
-                'uploader_id': 'WitcherGame',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
                 'upload_date': '20140605',
+                'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
                 'age_limit': 18,
+                'categories': ['Gaming'],
+                'tags': 'count:17',
+                'channel': 'The Witcher',
+                'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
+                'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
+                'view_count': int,
+                'like_count': int,
             },
         },
         {
-            # Age-gated video only available with authentication (unavailable
-            # via embed page workaround)
+            'note': 'Age-gated video with embed allowed in public site',
+            'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
+            'info_dict': {
+                'id': 'HsUATh_Nc2U',
+                'ext': 'mp4',
+                'title': 'Godzilla 2 (Official Video)',
+                'description': 'md5:bf77e03fcae5529475e500129b05668a',
+                'duration': 177,
+                'uploader': 'FlyingKitty',
+                'upload_date': '20200408',
+                'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
+                'age_limit': 18,
+                'categories': ['Entertainment'],
+                'tags': ['Flyingkitty', 'godzilla 2'],
+                'channel': 'FlyingKitty',
+                'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
+                'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
+                'view_count': int,
+                'like_count': int,
+            },
+        },
+        {
+            'note': 'Age-gated video embedable only with clientScreen=EMBED',
+            'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
+            'info_dict': {
+                'id': 'Tq92D6wQ1mg',
+                'ext': 'mp4',
+                'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
+                'description': 'md5:17eccca93a786d51bc67646756894066',
+                'duration': 106,
+                'uploader': 'Projekt Melody',
+                'upload_date': '20191227',
+                'age_limit': 18,
+                'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
+                'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
+                'categories': ['Entertainment'],
+                'channel': 'Projekt Melody',
+                'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
+                'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
+                'view_count': int,
+                'like_count': int,
+            },
+        },
+        {
+            'note': 'Non-Age-gated non-embeddable video',
+            'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
+            'info_dict': {
+                'id': 'MeJVWBSsPAY',
+                'ext': 'mp4',
+                'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
+                'description': 'Fan Video. Music & Lyrics by OOMPH!.',
+                'duration': 210,
+                'uploader': 'Herr Lurik',
+                'uploader_id': 'st3in234',
+                'upload_date': '20130730',
+                'uploader_url': 'http://www.youtube.com/user/st3in234',
+                'age_limit': 0,
+                'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/hqdefault.jpg',
+                'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
+                'categories': ['Music'],
+                'channel': 'Herr Lurik',
+                'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
+                'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
+                'artist': 'OOMPH!',
+                'view_count': int,
+                'like_count': int,
+            },
+        },
+        {
+            'note': 'Non-bypassable age-gated video',
+            'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
+            'only_matching': True,
+        },
+        {
+            'note': 'Age-gated video only available with authentication (not via embed workaround)',
             'url': 'XgnwCQzjau8',
             'only_matching': True,
+            'skip': '''This video has been removed for violating YouTube's Community Guidelines''',
         },
         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
         # YouTube Red ad is not captured for creator
@@ -670,17 +755,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'info_dict': {
                 'id': 'lqQg6PlCWgI',
                 'ext': 'mp4',
+                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
+                'description': r're:(?s)(?:.+\s)?HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games\s*',
                 'duration': 6085,
                 'upload_date': '20150827',
                 'uploader_id': 'olympic',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
-                'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
-                'uploader': 'Olympic',
-                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
+                'uploader': r're:Olympics?',
+                'age_limit': 0,
+                'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
+                'categories': ['Sports'],
+                'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
+                'channel': 'Olympics',
+                'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
+                'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
+                'view_count': int,
+                'like_count': int,
             },
-            'params': {
-                'skip_download': 'requires avconv',
-            }
         },
         # Non-square pixels
         {
@@ -1683,27 +1774,52 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             player_response = self._call_api(
                 'player', {'videoId': video_id}, video_id)
 
-        playability_status = player_response.get('playabilityStatus') or {}
-        if playability_status.get('reason') == 'Sign in to confirm your age':
-            video_info = self._download_webpage(
-                base_url + 'get_video_info', video_id,
-                'Refetching age-gated info webpage',
-                'unable to download video info webpage', query={
-                    'video_id': video_id,
-                    'eurl': 'https://youtube.googleapis.com/v/' + video_id,
-                    'html5': 1,
-                    # See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544
-                    'c': 'TVHTML5',
-                    'cver': '6.20180913',
-                }, fatal=False)
-            if video_info:
-                pr = self._parse_json(
-                    try_get(
-                        compat_parse_qs(video_info),
-                        lambda x: x['player_response'][0], compat_str) or '{}',
-                    video_id, fatal=False)
-                if pr and isinstance(pr, dict):
-                    player_response = pr
+        def is_agegated(playability):
+            if not isinstance(playability, dict):
+                return
+
+            if playability.get('desktopLegacyAgeGateReason'):
+                return True
+
+            reasons = filter(None, (playability.get(r) for r in ('status', 'reason')))
+            AGE_GATE_REASONS = (
+                'confirm your age', 'age-restricted', 'inappropriate',  # reason
+                'age_verification_required', 'age_check_required',  # status
+            )
+            return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
+
+        def get_playability_status(response):
+            return try_get(response, lambda x: x['playabilityStatus'], dict) or {}
+
+        playability_status = get_playability_status(player_response)
+        if (is_agegated(playability_status)
+                and int_or_none(self._downloader.params.get('age_limit'), default=18) >= 18):
+
+            self.report_age_confirmation()
+
+            # Thanks: https://github.com/yt-dlp/yt-dlp/pull/3233
+            pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
+            query = {
+                'playbackContext': {'contentPlaybackContext': {'html5Preference': 'HTML5_PREF_WANTS'}},
+                'contentCheckOk': True,
+                'racyCheckOk': True,
+                'context': {
+                    'client': {'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'clientVersion': '2.0', 'hl': 'en', 'clientScreen': 'EMBED'},
+                    'thirdParty': {'embedUrl': 'https://google.com'},
+                },
+                'videoId': video_id,
+            }
+            headers = {
+                'X-YouTube-Client-Name': '85',
+                'X-YouTube-Client-Version': '2.0',
+                'Origin': 'https://www.youtube.com'
+            }
+
+            video_info = self._call_api('player', query, video_id, fatal=False, headers=headers)
+            age_gate_status = get_playability_status(video_info)
+            if age_gate_status.get('status') == 'OK':
+                player_response = video_info
+                playability_status = age_gate_status
 
         trailer_video_id = try_get(
             playability_status,
@@ -1932,12 +2048,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             for thumbnail in (try_get(
                     container,
                     lambda x: x['thumbnail']['thumbnails'], list) or []):
-                thumbnail_url = thumbnail.get('url')
+                thumbnail_url = url_or_none(thumbnail.get('url'))
                 if not thumbnail_url:
                     continue
                 thumbnails.append({
                     'height': int_or_none(thumbnail.get('height')),
-                    'url': thumbnail_url,
+                    'url': update_url(thumbnail_url, query=None, fragment=None),
                     'width': int_or_none(thumbnail.get('width')),
                 })
             if thumbnails:
@@ -2142,6 +2258,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     sbr_tooltip = try_get(
                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
                     if sbr_tooltip:
+                        # however dislike_count was hidden by YT, as if there could ever be dislikable content on YT
                         like_count, dislike_count = sbr_tooltip.split(' / ')
                         info.update({
                             'like_count': str_to_int(like_count),
@@ -2411,7 +2528,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'tags': list,
             'view_count': int,
             'like_count': int,
-            'dislike_count': int,
         },
         'params': {
             'skip_download': True,
@@ -2438,7 +2554,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'categories': ['News & Politics'],
             'tags': list,
             'like_count': int,
-            'dislike_count': int,
         },
         'params': {
             'skip_download': True,
@@ -2458,7 +2573,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'categories': ['News & Politics'],
             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
             'like_count': int,
-            'dislike_count': int,
         },
         'params': {
             'skip_download': True,
@@ -3043,8 +3157,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 
     def _real_extract(self, url):
         item_id = self._match_id(url)
-        url = compat_urlparse.urlunparse(
-            compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
+        url = update_url(url, netloc='www.youtube.com')
         # Handle both video/playlist URLs
         qs = parse_qs(url)
         video_id = qs.get('v', [None])[0]
@@ -3178,7 +3291,6 @@ class YoutubeYtBeIE(InfoExtractor):
             'categories': ['Nonprofits & Activism'],
             'tags': list,
             'like_count': int,
-            'dislike_count': int,
         },
         'params': {
             'noplaylist': True,
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index e3c3ccff9..d5cc6386d 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4121,6 +4121,17 @@ def update_url_query(url, query):
         query=compat_urllib_parse_urlencode(qs, True)))
 
 
+def update_url(url, **kwargs):
+    """Replace URL components specified by kwargs
+       url: compat_str or parsed URL tuple
+       returns: compat_str"""
+    if not kwargs:
+        return compat_urlparse.urlunparse(url) if isinstance(url, tuple) else url
+    if not isinstance(url, tuple):
+        url = compat_urlparse.urlparse(url)
+    return compat_urlparse.urlunparse(url._replace(**kwargs))
+
+
 def update_Request(req, url=None, data=None, headers={}, query={}):
     req_headers = req.headers.copy()
     req_headers.update(headers)

From 30e986b83493f68bd4c2405b5f4d801891c9bdde Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 20 Jun 2022 23:15:20 +0100
Subject: [PATCH 172/312] [YouTube] Add `signatureTimestamp` for age-gate
 bypass

---
 youtube_dl/extractor/youtube.py | 34 +++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 65428528d..6c1cfe7f2 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1642,6 +1642,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             fmt['url'] = compat_urlparse.urlunparse(
                 parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
 
+    # from yt-dlp, with tweaks
+    def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
+        """
+        Extract signatureTimestamp (sts)
+        Required to tell API what sig/player version is in use.
+        """
+        sts = int_or_none(ytcfg.get('STS')) if isinstance(ytcfg, dict) else None
+        if not sts:
+            # Attempt to extract from player
+            if player_url is None:
+                error_msg = 'Cannot extract signature timestamp without player_url.'
+                if fatal:
+                    raise ExtractorError(error_msg)
+                self._downloader.report_warning(error_msg)
+                return
+            code = self._get_player_code(video_id, player_url)
+            sts = int_or_none(self._search_regex(
+                r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '',
+                'JS player signature timestamp', group='sts', fatal=fatal))
+        return sts
+
     def _mark_watched(self, video_id, player_response):
         playback_url = url_or_none(try_get(
             player_response,
@@ -1766,6 +1787,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
 
         player_response = None
+        player_url = None
         if webpage:
             player_response = self._extract_yt_initial_variable(
                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
@@ -1799,8 +1821,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
             # Thanks: https://github.com/yt-dlp/yt-dlp/pull/3233
             pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
+
+            # Use signatureTimestamp if available
+            # Thanks https://github.com/ytdl-org/youtube-dl/issues/31034#issuecomment-1160718026
+            player_url = self._extract_player_url(webpage)
+            ytcfg = self._extract_ytcfg(video_id, webpage)
+            sts = self._extract_signature_timestamp(video_id, player_url, ytcfg)
+            if sts:
+                pb_context['signatureTimestamp'] = sts
+
             query = {
-                'playbackContext': {'contentPlaybackContext': {'html5Preference': 'HTML5_PREF_WANTS'}},
+                'playbackContext': {'contentPlaybackContext': pb_context},
                 'contentCheckOk': True,
                 'racyCheckOk': True,
                 'context': {
@@ -1901,7 +1932,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         formats = []
         itags = []
         itag_qualities = {}
-        player_url = None
         q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
         streaming_data = player_response.get('streamingData') or {}
         streaming_formats = streaming_data.get('formats') or []

From d6b14ba3163b255d0dd8d3b9ddf25d977b8262e7 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 4 Feb 2023 23:18:24 +0000
Subject: [PATCH 173/312] [test] Fix TestAgeRestriction

* age restriction may cause DownloadError
* update obsolete test URLs
[skip ci]
---
 test/test_age_restriction.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py
index 6f5513faa..db98494ab 100644
--- a/test/test_age_restriction.py
+++ b/test/test_age_restriction.py
@@ -11,6 +11,7 @@ from test.helper import try_rm
 
 
 from youtube_dl import YoutubeDL
+from youtube_dl.utils import DownloadError
 
 
 def _download_restricted(url, filename, age):
@@ -26,7 +27,10 @@ def _download_restricted(url, filename, age):
     ydl.add_default_info_extractors()
     json_filename = os.path.splitext(filename)[0] + '.info.json'
     try_rm(json_filename)
-    ydl.download([url])
+    try:
+        ydl.download([url])
+    except DownloadError:
+        try_rm(json_filename)
     res = os.path.exists(json_filename)
     try_rm(json_filename)
     return res
@@ -38,12 +42,12 @@ class TestAgeRestriction(unittest.TestCase):
         self.assertFalse(_download_restricted(url, filename, age))
 
     def test_youtube(self):
-        self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
+        self._assert_restricted('HtVdAasjOgU', 'HtVdAasjOgU.mp4', 10)
 
     def test_youporn(self):
         self._assert_restricted(
-            'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
-            '505835.mp4', 2, old_age=25)
+            'https://www.youporn.com/watch/16715086/sex-ed-in-detention-18-asmr/',
+            '16715086.mp4', 2, old_age=25)
 
 
 if __name__ == '__main__':

From 249f2b631629471af5cfee2993e62de58c8f5990 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 5 Feb 2023 15:43:43 +0000
Subject: [PATCH 174/312] [compat] Systematise compat_ naming

[skip ci]
---
 test/test_compat.py  |   3 +-
 youtube_dl/compat.py | 221 +++++++++++++++++++++++++++----------------
 2 files changed, 139 insertions(+), 85 deletions(-)

diff --git a/test/test_compat.py b/test/test_compat.py
index 0986cff37..4dddd9a38 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -48,7 +48,8 @@ class TestCompat(unittest.TestCase):
 
     def test_all_present(self):
         import youtube_dl.compat
-        all_names = youtube_dl.compat.__all__
+        all_names = sorted(
+            youtube_dl.compat.__all__ + youtube_dl.compat.legacy)
         present_names = set(filter(
             lambda c: '_' in c and not c.startswith('_'),
             dir(youtube_dl.compat))) - set(['unicode_literals'])
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 28942a8c1..39551f810 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -21,6 +21,10 @@ import subprocess
 import sys
 import xml.etree.ElementTree
 
+# naming convention
+# 'compat_' + Python3_name.replace('.', '_')
+# other aliases exist for convenience and/or legacy
+
 # deal with critical unicode/str things first
 try:
     # Python 2
@@ -28,6 +32,7 @@ try:
         unicode, basestring, unichr
     )
     from .casefold import casefold as compat_casefold
+
 except NameError:
     compat_str, compat_basestring, compat_chr = (
         str, str, chr
@@ -53,16 +58,15 @@ try:
     import urllib.parse as compat_urllib_parse
 except ImportError:  # Python 2
     import urllib as compat_urllib_parse
+    import urlparse as _urlparse
+    for a in dir(_urlparse):
+        if not hasattr(compat_urllib_parse, a):
+            setattr(compat_urllib_parse, a, getattr(_urlparse, a))
+    del _urlparse
 
-try:
-    from urllib.parse import urlparse as compat_urllib_parse_urlparse
-except ImportError:  # Python 2
-    from urlparse import urlparse as compat_urllib_parse_urlparse
-
-try:
-    import urllib.parse as compat_urlparse
-except ImportError:  # Python 2
-    import urlparse as compat_urlparse
+# unfavoured aliases
+compat_urlparse = compat_urllib_parse
+compat_urllib_parse_urlparse = compat_urllib_parse.urlparse
 
 try:
     import urllib.response as compat_urllib_response
@@ -73,6 +77,7 @@ try:
     import http.cookiejar as compat_cookiejar
 except ImportError:  # Python 2
     import cookielib as compat_cookiejar
+compat_http_cookiejar = compat_cookiejar
 
 if sys.version_info[0] == 2:
     class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
@@ -84,11 +89,13 @@ if sys.version_info[0] == 2:
             compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
 else:
     compat_cookiejar_Cookie = compat_cookiejar.Cookie
+compat_http_cookiejar_Cookie = compat_cookiejar_Cookie
 
 try:
     import http.cookies as compat_cookies
 except ImportError:  # Python 2
     import Cookie as compat_cookies
+compat_http_cookies = compat_cookies
 
 if sys.version_info[0] == 2:
     class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
@@ -98,6 +105,7 @@ if sys.version_info[0] == 2:
             return super(compat_cookies_SimpleCookie, self).load(rawdata)
 else:
     compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
+compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
 
 try:
     import html.entities as compat_html_entities
@@ -2351,16 +2359,19 @@ try:
     from urllib.error import HTTPError as compat_HTTPError
 except ImportError:  # Python 2
     from urllib2 import HTTPError as compat_HTTPError
+compat_urllib_HTTPError = compat_HTTPError
 
 try:
     from urllib.request import urlretrieve as compat_urlretrieve
 except ImportError:  # Python 2
     from urllib import urlretrieve as compat_urlretrieve
+compat_urllib_request_urlretrieve = compat_urlretrieve
 
 try:
     from html.parser import HTMLParser as compat_HTMLParser
 except ImportError:  # Python 2
     from HTMLParser import HTMLParser as compat_HTMLParser
+compat_html_parser_HTMLParser = compat_HTMLParser
 
 try:  # Python 2
     from HTMLParser import HTMLParseError as compat_HTMLParseError
@@ -2374,6 +2385,7 @@ except ImportError:  # Python <3.4
         # and uniform cross-version exception handling
         class compat_HTMLParseError(Exception):
             pass
+compat_html_parser_HTMLParseError = compat_HTMLParseError
 
 try:
     from subprocess import DEVNULL
@@ -2390,6 +2402,8 @@ try:
     from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
     from urllib.parse import unquote as compat_urllib_parse_unquote
     from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
+    from urllib.parse import urlencode as compat_urllib_parse_urlencode
+    from urllib.parse import parse_qs as compat_parse_qs
 except ImportError:  # Python 2
     _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
                 else re.compile(r'([\x00-\x7f]+)'))
@@ -2456,9 +2470,6 @@ except ImportError:  # Python 2
         string = string.replace('+', ' ')
         return compat_urllib_parse_unquote(string, encoding, errors)
 
-try:
-    from urllib.parse import urlencode as compat_urllib_parse_urlencode
-except ImportError:  # Python 2
     # Python 2 will choke in urlencode on mixture of byte and unicode strings.
     # Possible solutions are to either port it from python 3 with all
     # the friends or manually ensure input query contains only byte strings.
@@ -2480,7 +2491,62 @@ except ImportError:  # Python 2
         def encode_list(l):
             return [encode_elem(e) for e in l]
 
-        return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
+        return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
+
+    # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
+    # Python 2's version is apparently totally broken
+    def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
+                   encoding='utf-8', errors='replace'):
+        qs, _coerce_result = qs, compat_str
+        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+        r = []
+        for name_value in pairs:
+            if not name_value and not strict_parsing:
+                continue
+            nv = name_value.split('=', 1)
+            if len(nv) != 2:
+                if strict_parsing:
+                    raise ValueError('bad query field: %r' % (name_value,))
+                # Handle case of a control-name with no equal sign
+                if keep_blank_values:
+                    nv.append('')
+                else:
+                    continue
+            if len(nv[1]) or keep_blank_values:
+                name = nv[0].replace('+', ' ')
+                name = compat_urllib_parse_unquote(
+                    name, encoding=encoding, errors=errors)
+                name = _coerce_result(name)
+                value = nv[1].replace('+', ' ')
+                value = compat_urllib_parse_unquote(
+                    value, encoding=encoding, errors=errors)
+                value = _coerce_result(value)
+                r.append((name, value))
+        return r
+
+    def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
+                        encoding='utf-8', errors='replace'):
+        parsed_result = {}
+        pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
+                           encoding=encoding, errors=errors)
+        for name, value in pairs:
+            if name in parsed_result:
+                parsed_result[name].append(value)
+            else:
+                parsed_result[name] = [value]
+        return parsed_result
+
+    setattr(compat_urllib_parse, '_urlencode',
+            getattr(compat_urllib_parse, 'urlencode'))
+    for name, fix in (
+            ('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes),
+            ('parse_unquote', compat_urllib_parse_unquote),
+            ('unquote_plus', compat_urllib_parse_unquote_plus),
+            ('urlencode', compat_urllib_parse_urlencode),
+            ('parse_qs', compat_parse_qs)):
+        setattr(compat_urllib_parse, name, fix)
+
+compat_urllib_parse_parse_qs = compat_parse_qs
 
 try:
     from urllib.request import DataHandler as compat_urllib_request_DataHandler
@@ -2520,6 +2586,7 @@ try:
     from xml.etree.ElementTree import ParseError as compat_xml_parse_error
 except ImportError:  # Python 2.6
     from xml.parsers.expat import ExpatError as compat_xml_parse_error
+compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error
 
 etree = xml.etree.ElementTree
 
@@ -2533,10 +2600,11 @@ try:
     # xml.etree.ElementTree.Element is a method in Python <=2.6 and
     # the following will crash with:
     #  TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
-    isinstance(None, xml.etree.ElementTree.Element)
+    isinstance(None, etree.Element)
     from xml.etree.ElementTree import Element as compat_etree_Element
 except TypeError:  # Python <=2.6
     from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
+compat_xml_etree_ElementTree_Element = compat_etree_Element
 
 if sys.version_info[0] >= 3:
     def compat_etree_fromstring(text):
@@ -2592,6 +2660,7 @@ else:
             if k == uri or v == prefix:
                 del etree._namespace_map[k]
         etree._namespace_map[uri] = prefix
+compat_xml_etree_register_namespace = compat_etree_register_namespace
 
 if sys.version_info < (2, 7):
     # Here comes the crazy part: In 2.6, if the xpath is a unicode,
@@ -2603,53 +2672,6 @@ if sys.version_info < (2, 7):
 else:
     compat_xpath = lambda xpath: xpath
 
-try:
-    from urllib.parse import parse_qs as compat_parse_qs
-except ImportError:  # Python 2
-    # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
-    # Python 2's version is apparently totally broken
-
-    def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
-                   encoding='utf-8', errors='replace'):
-        qs, _coerce_result = qs, compat_str
-        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
-        r = []
-        for name_value in pairs:
-            if not name_value and not strict_parsing:
-                continue
-            nv = name_value.split('=', 1)
-            if len(nv) != 2:
-                if strict_parsing:
-                    raise ValueError('bad query field: %r' % (name_value,))
-                # Handle case of a control-name with no equal sign
-                if keep_blank_values:
-                    nv.append('')
-                else:
-                    continue
-            if len(nv[1]) or keep_blank_values:
-                name = nv[0].replace('+', ' ')
-                name = compat_urllib_parse_unquote(
-                    name, encoding=encoding, errors=errors)
-                name = _coerce_result(name)
-                value = nv[1].replace('+', ' ')
-                value = compat_urllib_parse_unquote(
-                    value, encoding=encoding, errors=errors)
-                value = _coerce_result(value)
-                r.append((name, value))
-        return r
-
-    def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
-                        encoding='utf-8', errors='replace'):
-        parsed_result = {}
-        pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
-                           encoding=encoding, errors=errors)
-        for name, value in pairs:
-            if name in parsed_result:
-                parsed_result[name].append(value)
-            else:
-                parsed_result[name] = [value]
-        return parsed_result
-
 
 compat_os_name = os._name if os.name == 'java' else os.name
 
@@ -2774,6 +2796,8 @@ else:
     else:
         compat_expanduser = os.path.expanduser
 
+compat_os_path_expanduser = compat_expanduser
+
 
 if compat_os_name == 'nt' and sys.version_info < (3, 8):
     # os.path.realpath on Windows does not follow symbolic links
@@ -2785,6 +2809,8 @@ if compat_os_name == 'nt' and sys.version_info < (3, 8):
 else:
     compat_realpath = os.path.realpath
 
+compat_os_path_realpath = compat_realpath
+
 
 if sys.version_info < (3, 0):
     def compat_print(s):
@@ -2805,11 +2831,15 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
 else:
     compat_getpass = getpass.getpass
 
+compat_getpass_getpass = compat_getpass
+
+
 try:
     compat_input = raw_input
 except NameError:  # Python 3
     compat_input = input
 
+
 # Python < 2.6.5 require kwargs to be bytes
 try:
     def _testfunc(x):
@@ -2915,15 +2945,16 @@ else:
                 lines = _lines
         return _terminal_size(columns, lines)
 
+
 try:
     itertools.count(start=0, step=1)
     compat_itertools_count = itertools.count
 except TypeError:  # Python 2.6
     def compat_itertools_count(start=0, step=1):
-        n = start
         while True:
-            yield n
-            n += step
+            yield start
+            start += step
+
 
 if sys.version_info >= (3, 0):
     from tokenize import tokenize as compat_tokenize_tokenize
@@ -3075,6 +3106,8 @@ if sys.version_info < (3, 3):
 else:
     compat_b64decode = base64.b64decode
 
+compat_base64_b64decode = compat_b64decode
+
 
 if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
     # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
@@ -3094,30 +3127,53 @@ else:
         return ctypes.WINFUNCTYPE(*args, **kwargs)
 
 
-__all__ = [
+legacy = [
     'compat_HTMLParseError',
     'compat_HTMLParser',
     'compat_HTTPError',
-    'compat_Struct',
     'compat_b64decode',
+    'compat_cookiejar',
+    'compat_cookiejar_Cookie',
+    'compat_cookies',
+    'compat_cookies_SimpleCookie',
+    'compat_etree_Element',
+    'compat_etree_register_namespace',
+    'compat_expanduser',
+    'compat_getpass',
+    'compat_parse_qs',
+    'compat_realpath',
+    'compat_urllib_parse_parse_qs',
+    'compat_urllib_parse_unquote',
+    'compat_urllib_parse_unquote_plus',
+    'compat_urllib_parse_unquote_to_bytes',
+    'compat_urllib_parse_urlencode',
+    'compat_urllib_parse_urlparse',
+    'compat_urlparse',
+    'compat_urlretrieve',
+    'compat_xml_parse_error',
+]
+
+
+__all__ = [
+    'compat_html_parser_HTMLParseError',
+    'compat_html_parser_HTMLParser',
+    'compat_Struct',
+    'compat_base64_b64decode',
     'compat_basestring',
     'compat_casefold',
     'compat_chr',
     'compat_collections_abc',
     'compat_collections_chain_map',
-    'compat_cookiejar',
-    'compat_cookiejar_Cookie',
-    'compat_cookies',
-    'compat_cookies_SimpleCookie',
+    'compat_http_cookiejar',
+    'compat_http_cookiejar_Cookie',
+    'compat_http_cookies',
+    'compat_http_cookies_SimpleCookie',
     'compat_ctypes_WINFUNCTYPE',
-    'compat_etree_Element',
     'compat_etree_fromstring',
-    'compat_etree_register_namespace',
-    'compat_expanduser',
     'compat_filter',
     'compat_get_terminal_size',
     'compat_getenv',
-    'compat_getpass',
+    'compat_getpass_getpass',
     'compat_html_entities',
     'compat_html_entities_html5',
     'compat_http_client',
@@ -3131,11 +3187,11 @@ __all__ = [
     'compat_numeric_types',
     'compat_ord',
     'compat_os_name',
-    'compat_parse_qs',
+    'compat_os_path_expanduser',
+    'compat_os_path_realpath',
     'compat_print',
     'compat_re_Match',
     'compat_re_Pattern',
-    'compat_realpath',
     'compat_setenv',
     'compat_shlex_quote',
     'compat_shlex_split',
@@ -3147,17 +3203,14 @@ __all__ = [
     'compat_tokenize_tokenize',
     'compat_urllib_error',
     'compat_urllib_parse',
-    'compat_urllib_parse_unquote',
-    'compat_urllib_parse_unquote_plus',
-    'compat_urllib_parse_unquote_to_bytes',
-    'compat_urllib_parse_urlencode',
-    'compat_urllib_parse_urlparse',
     'compat_urllib_request',
     'compat_urllib_request_DataHandler',
     'compat_urllib_response',
-    'compat_urlparse',
-    'compat_urlretrieve',
-    'compat_xml_parse_error',
+    'compat_urllib_request_urlretrieve',
+    'compat_urllib_HTTPError',
+    'compat_xml_etree_ElementTree_Element',
+    'compat_xml_etree_ElementTree_ParseError',
+    'compat_xml_etree_register_namespace',
     'compat_xpath',
     'compat_zip',
     'workaround_optparse_bug9161',

From 90c9f789d94fc2c0b4c28c57ba2e0b2f09ef95e3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 5 Feb 2023 13:46:43 +0000
Subject: [PATCH 175/312] [utils] Add parse_qs, update_url

[skip ci]
---
 youtube_dl/utils.py | 64 ++++++++++++++++++++++-----------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index d5cc6386d..4edbfa27b 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -42,6 +42,7 @@ from .compat import (
     compat_HTMLParser,
     compat_HTTPError,
     compat_basestring,
+    compat_casefold,
     compat_chr,
     compat_collections_abc,
     compat_cookiejar,
@@ -54,18 +55,18 @@ from .compat import (
     compat_integer_types,
     compat_kwargs,
     compat_os_name,
-    compat_parse_qs,
+    compat_re_Match,
     compat_shlex_quote,
     compat_str,
     compat_struct_pack,
     compat_struct_unpack,
     compat_urllib_error,
     compat_urllib_parse,
+    compat_urllib_parse_parse_qs as compat_parse_qs,
     compat_urllib_parse_urlencode,
     compat_urllib_parse_urlparse,
     compat_urllib_parse_unquote_plus,
     compat_urllib_request,
-    compat_urlparse,
     compat_xpath,
 )
 
@@ -80,12 +81,12 @@ def register_socks_protocols():
     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
-        if scheme not in compat_urlparse.uses_netloc:
-            compat_urlparse.uses_netloc.append(scheme)
+        if scheme not in compat_urllib_parse.uses_netloc:
+            compat_urllib_parse.uses_netloc.append(scheme)
 
 
-# This is not clearly defined otherwise
-compiled_regex_type = type(re.compile(''))
+# Unfavoured alias
+compiled_regex_type = compat_re_Match
 
 
 def random_user_agent():
@@ -2725,7 +2726,7 @@ def make_socks_conn_class(base_class, socks_proxy):
     assert issubclass(base_class, (
         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
 
-    url_components = compat_urlparse.urlparse(socks_proxy)
+    url_components = compat_urllib_parse.urlparse(socks_proxy)
     if url_components.scheme.lower() == 'socks5':
         socks_type = ProxyType.SOCKS5
     elif url_components.scheme.lower() in ('socks', 'socks4'):
@@ -3673,7 +3674,7 @@ def remove_quotes(s):
 
 
 def url_basename(url):
-    path = compat_urlparse.urlparse(url).path
+    path = compat_urllib_parse.urlparse(url).path
     return path.strip('/').split('/')[-1]
 
 
@@ -3693,7 +3694,7 @@ def urljoin(base, path):
     if not isinstance(base, compat_str) or not re.match(
             r'^(?:https?:)?//', base):
         return None
-    return compat_urlparse.urljoin(base, path)
+    return compat_urllib_parse.urljoin(base, path)
 
 
 class HEADRequest(compat_urllib_request.Request):
@@ -4091,6 +4092,10 @@ def escape_url(url):
     ).geturl()
 
 
+def parse_qs(url):
+    return compat_parse_qs(compat_urllib_parse.urlparse(url).query)
+
+
 def read_batch_urls(batch_fd):
     def fixup(url):
         if not isinstance(url, compat_str):
@@ -4111,25 +4116,28 @@ def urlencode_postdata(*args, **kargs):
     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
 
 
-def update_url_query(url, query):
-    if not query:
-        return url
-    parsed_url = compat_urlparse.urlparse(url)
-    qs = compat_parse_qs(parsed_url.query)
-    qs.update(query)
-    return compat_urlparse.urlunparse(parsed_url._replace(
-        query=compat_urllib_parse_urlencode(qs, True)))
-
-
 def update_url(url, **kwargs):
     """Replace URL components specified by kwargs
        url: compat_str or parsed URL tuple
-       returns: compat_str"""
+       if query_update is in kwargs, update query with
+       its value instead of replacing (overrides any `query`)
+       returns: compat_str
+    """
     if not kwargs:
-        return compat_urlparse.urlunparse(url) if isinstance(url, tuple) else url
+        return compat_urllib_parse.urlunparse(url) if isinstance(url, tuple) else url
     if not isinstance(url, tuple):
-        url = compat_urlparse.urlparse(url)
-    return compat_urlparse.urlunparse(url._replace(**kwargs))
+        url = compat_urllib_parse.urlparse(url)
+    query = kwargs.pop('query_update', None)
+    if query:
+        qs = compat_parse_qs(url.query)
+        qs.update(query)
+        kwargs['query'] = compat_urllib_parse_urlencode(qs, True)
+        kwargs = compat_kwargs(kwargs)
+    return compat_urllib_parse.urlunparse(url._replace(**kwargs))
+
+
+def update_url_query(url, query):
+    return update_url(url, query_update=query)
 
 
 def update_Request(req, url=None, data=None, headers={}, query={}):
@@ -5597,7 +5605,7 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
 
         if proxy == '__noproxy__':
             return None  # No Proxy
-        if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
+        if compat_urllib_parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
             req.add_header('Ytdl-socks-proxy', proxy)
             # youtube-dl's http/https handlers do wrapping the socket with socks
             return None
@@ -6035,14 +6043,6 @@ def traverse_obj(obj, *paths, **kwargs):
     str = compat_str
 
     is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes))
-    # stand-in until compat_re_Match is added
-    compat_re_Match = type(re.match('a', 'a'))
-    # stand-in until casefold.py is added
-    try:
-        ''.casefold()
-        compat_casefold = lambda s: s.casefold()
-    except AttributeError:
-        compat_casefold = lambda s: s.lower()
     casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k
 
     if isinstance(expected_type, type):

From 4e04f104994c5dac2cb74b64ba7725716ce939d7 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 6 Feb 2023 15:50:28 +0000
Subject: [PATCH 176/312] [compat] Update test_compat

[skip ci]
---
 test/test_compat.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_compat.py b/test/test_compat.py
index 4dddd9a38..e233b1ae1 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -50,9 +50,9 @@ class TestCompat(unittest.TestCase):
         import youtube_dl.compat
         all_names = sorted(
             youtube_dl.compat.__all__ + youtube_dl.compat.legacy)
-        present_names = set(filter(
+        present_names = set(map(compat_str, filter(
             lambda c: '_' in c and not c.startswith('_'),
-            dir(youtube_dl.compat))) - set(['unicode_literals'])
+            dir(youtube_dl.compat)))) - set(['unicode_literals'])
         self.assertEqual(all_names, sorted(present_names))
 
     def test_compat_urllib_parse_unquote(self):

From bafb6dec72865cc494feb35ecc94481c30a81069 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 6 Feb 2023 16:19:21 +0000
Subject: [PATCH 177/312] [YouTube] Refresh compat/utils usage * import
 parse_qs() * import parse_qs in lazy_extractors (clears old TODO) * clean up
 old compiled lazy_extractors for Py2 * use update_url()

---
 devscripts/make_lazy_extractors.py | 10 ++++-
 test/test_execution.py             | 12 +++---
 youtube_dl/extractor/youtube.py    | 61 +++++++++++-------------------
 3 files changed, 39 insertions(+), 44 deletions(-)

diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index 878ae72b1..edc19183d 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -13,6 +13,11 @@ sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
 lazy_extractors_filename = sys.argv[1]
 if os.path.exists(lazy_extractors_filename):
     os.remove(lazy_extractors_filename)
+# Py2: may be confused by leftover lazy_extractors.pyc
+try:
+    os.remove(lazy_extractors_filename + 'c')
+except OSError:
+    pass
 
 from youtube_dl.extractor import _ALL_CLASSES
 from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
@@ -22,7 +27,10 @@ with open('devscripts/lazy_load_template.py', 'rt') as f:
 
 module_contents = [
     module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
-    'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n']
+    'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n',
+    # needed for suitable() methods of Youtube extractor (see #28780)
+    'from youtube_dl.utils import parse_qs\n',
+]
 
 ie_template = '''
 class {name}({bases}):
diff --git a/test/test_execution.py b/test/test_execution.py
index 32948d93e..704e14612 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -40,14 +40,16 @@ class TestExecution(unittest.TestCase):
         self.assertFalse(stderr)
 
     def test_lazy_extractors(self):
+        lazy_extractors = 'youtube_dl/extractor/lazy_extractors.py'
         try:
-            subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
+            subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', lazy_extractors], cwd=rootDir, stdout=_DEV_NULL)
             subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
         finally:
-            try:
-                os.remove('youtube_dl/extractor/lazy_extractors.py')
-            except (IOError, OSError):
-                pass
+            for x in ['', 'c'] if sys.version_info[0] < 3 else ['']:
+                try:
+                    os.remove(lazy_extractors + x)
+                except (IOError, OSError):
+                    pass
 
 
 if __name__ == '__main__':
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 6c1cfe7f2..6c70a98d1 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -14,12 +14,11 @@ from ..compat import (
     compat_chr,
     compat_HTTPError,
     compat_map as map,
-    compat_parse_qs,
     compat_str,
+    compat_urllib_parse,
+    compat_urllib_parse_parse_qs as compat_parse_qs,
     compat_urllib_parse_unquote_plus,
-    compat_urllib_parse_urlencode,
     compat_urllib_parse_urlparse,
-    compat_urlparse,
 )
 from ..jsinterp import JSInterpreter
 from ..utils import (
@@ -33,6 +32,7 @@ from ..utils import (
     mimetype2ext,
     parse_codecs,
     parse_duration,
+    parse_qs,
     qualities,
     remove_start,
     smuggle_url,
@@ -50,10 +50,6 @@ from ..utils import (
 )
 
 
-def parse_qs(url):
-    return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
-
-
 class YoutubeBaseInfoExtractor(InfoExtractor):
     """Provide base functions for Youtube extractors"""
     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
@@ -636,6 +632,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
                 'duration': 142,
                 'uploader': 'The Witcher',
+                'uploader_id': 'WitcherGame',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
                 'upload_date': '20140605',
                 'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
                 'age_limit': 18,
@@ -671,7 +669,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             },
         },
         {
-            'note': 'Age-gated video embedable only with clientScreen=EMBED',
+            'note': 'Age-gated video embeddable only with clientScreen=EMBED',
             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
             'info_dict': {
                 'id': 'Tq92D6wQ1mg',
@@ -1392,11 +1390,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
     @classmethod
     def suitable(cls, url):
-        # Hack for lazy extractors until more generic solution is implemented
-        # (see #28780)
-        from .youtube import parse_qs
-        qs = parse_qs(url)
-        if qs.get('list', [None])[0]:
+        if parse_qs(url).get('list', [None])[0]:
             return False
         return super(YoutubeIE, cls).suitable(url)
 
@@ -1546,7 +1540,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         if player_url.startswith('//'):
             player_url = 'https:' + player_url
         elif not re.match(r'https?://', player_url):
-            player_url = compat_urlparse.urljoin(
+            player_url = compat_urllib_parse.urljoin(
                 'https://www.youtube.com', player_url)
         return player_url
 
@@ -1628,9 +1622,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
     def _unthrottle_format_urls(self, video_id, player_url, formats):
         for fmt in formats:
-            parsed_fmt_url = compat_urlparse.urlparse(fmt['url'])
-            qs = compat_urlparse.parse_qs(parsed_fmt_url.query)
-            n_param = qs.get('n')
+            parsed_fmt_url = compat_urllib_parse.urlparse(fmt['url'])
+            n_param = compat_parse_qs(parsed_fmt_url.query).get('n')
             if not n_param:
                 continue
             n_param = n_param[-1]
@@ -1638,9 +1631,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if n_response is None:
                 # give up if descrambling failed
                 break
-            qs['n'] = [n_response]
-            fmt['url'] = compat_urlparse.urlunparse(
-                parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+            fmt['url'] = update_url(
+                parsed_fmt_url, query_update={'n': [n_response]})
 
     # from yt-dlp, with tweaks
     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
@@ -1669,20 +1661,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
         if not playback_url:
             return
-        parsed_playback_url = compat_urlparse.urlparse(playback_url)
-        qs = compat_urlparse.parse_qs(parsed_playback_url.query)
 
         # cpn generation algorithm is reverse engineered from base.js.
         # In fact it works even with dummy cpn.
         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
 
-        qs.update({
-            'ver': ['2'],
-            'cpn': [cpn],
-        })
-        playback_url = compat_urlparse.urlunparse(
-            parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+        playback_url = update_url(
+            playback_url, query_update={
+                'ver': ['2'],
+                'cpn': [cpn],
+            })
 
         self._download_webpage(
             playback_url, video_id, 'Marking watched',
@@ -2075,9 +2064,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         thumbnails = []
         for container in (video_details, microformat):
-            for thumbnail in (try_get(
+            for thumbnail in try_get(
                     container,
-                    lambda x: x['thumbnail']['thumbnails'], list) or []):
+                    lambda x: x['thumbnail']['thumbnails'], list) or []:
                 thumbnail_url = url_or_none(thumbnail.get('url'))
                 if not thumbnail_url:
                     continue
@@ -3287,11 +3276,7 @@ class YoutubePlaylistIE(InfoExtractor):
     def suitable(cls, url):
         if YoutubeTabIE.suitable(url):
             return False
-        # Hack for lazy extractors until more generic solution is implemented
-        # (see #28780)
-        from .youtube import parse_qs
-        qs = parse_qs(url)
-        if qs.get('v', [None])[0]:
+        if parse_qs(url).get('v', [None])[0]:
             return False
         return super(YoutubePlaylistIE, cls).suitable(url)
 
@@ -3430,9 +3415,9 @@ class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
     }]
 
     def _real_extract(self, url):
-        qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
-        query = (qs.get('search_query') or qs.get('q'))[0]
-        params = qs.get('sp', ('',))[0]
+        qs = parse_qs(url)
+        query = (qs.get('search_query') or qs.get('q'))[-1]
+        params = qs.get('sp', ('',))[-1]
         return self.playlist_result(self._search_results(query, params), query, query)
 
 

From e8198c517b70301dd5a459927b5d5976304d6482 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 8 Feb 2023 18:16:51 +0000
Subject: [PATCH 178/312] [YouTube] Fix tests

---
 youtube_dl/extractor/youtube.py | 55 ++++++++++++++++++++++++++++-----
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 6c70a98d1..ba0f5c8b6 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -27,6 +27,8 @@ from ..utils import (
     dict_get,
     error_to_compat_str,
     float_or_none,
+    extract_attributes,
+    get_element_by_attribute,
     int_or_none,
     js_to_json,
     mimetype2ext,
@@ -38,6 +40,7 @@ from ..utils import (
     smuggle_url,
     str_or_none,
     str_to_int,
+    traverse_obj,
     try_get,
     unescapeHTML,
     unified_strdate,
@@ -656,6 +659,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
                 'duration': 177,
                 'uploader': 'FlyingKitty',
+                'uploader_id': 'FlyingKitty900',
                 'upload_date': '20200408',
                 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
                 'age_limit': 18,
@@ -678,6 +682,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:17eccca93a786d51bc67646756894066',
                 'duration': 106,
                 'uploader': 'Projekt Melody',
+                'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
                 'upload_date': '20191227',
                 'age_limit': 18,
                 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
@@ -929,16 +934,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'lsguqyKfVQg',
                 'ext': 'mp4',
                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
-                'alt_title': 'Dark Walk - Position Music',
+                'alt_title': 'Dark Walk',
                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
                 'duration': 133,
                 'upload_date': '20151119',
                 'uploader_id': 'IronSoulElf',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
                 'uploader': 'IronSoulElf',
-                'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
-                'track': 'Dark Walk - Position Music',
-                'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
+                'creator': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
+                'track': 'Dark Walk',
+                'artist': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
             },
             'params': {
@@ -2091,7 +2096,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             or microformat.get('lengthSeconds')) \
             or parse_duration(search_meta('duration'))
         is_live = video_details.get('isLive')
-        owner_profile_url = microformat.get('ownerProfileUrl')
+
+        def gen_owner_profile_url():
+            yield microformat.get('ownerProfileUrl')
+            yield extract_attributes(self._search_regex(
+                r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')url\2[^>]*>)''',
+                get_element_by_attribute('itemprop', 'author', webpage),
+                'owner_profile_url', default='')).get('href')
+
+        owner_profile_url = next(
+            (x for x in map(url_or_none, gen_owner_profile_url()) if x),
+            None)
 
         if not player_url:
             player_url = self._extract_player_url(webpage)
@@ -2176,6 +2191,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         info[d_k] = parse_duration(query[k][0])
 
         if video_description:
+            # Youtube Music Auto-generated description
             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
             if mobj:
                 release_year = mobj.group('release_year')
@@ -2250,7 +2266,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
                             info['location'] = stl
                         else:
-                            mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
+                            # •? doesn't match, but [•]? does; \xa0 = non-breaking space
+                            mobj = re.search(r'([^\xa0\s].*?)[\xa0\s]*S(\d+)[\xa0\s]*[•]?[\xa0\s]*E(\d+)', stl)
                             if mobj:
                                 info.update({
                                     'series': mobj.group(1),
@@ -2261,7 +2278,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             vpir,
                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
                             list) or []):
-                        tbr = tlb.get('toggleButtonRenderer') or {}
+                        tbr = traverse_obj(tlb, ('segmentedLikeDislikeButtonRenderer', 'likeButton', 'toggleButtonRenderer'), 'toggleButtonRenderer') or {}
                         for getter, regex in [(
                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
@@ -2315,6 +2332,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             elif mrr_title == 'Song':
                                 info['track'] = mrr_contents_text
 
+            # this is not extraction but spelunking!
+            carousel_lockups = traverse_obj(
+                initial_data,
+                ('engagementPanels', Ellipsis, 'engagementPanelSectionListRenderer',
+                 'content', 'structuredDescriptionContentRenderer', 'items', Ellipsis,
+                 'videoDescriptionMusicSectionRenderer', 'carouselLockups', Ellipsis),
+                expected_type=dict) or []
+            # try to reproduce logic from metadataRowContainerRenderer above (if it still is)
+            fields = (('ALBUM', 'album'), ('ARTIST', 'artist'), ('SONG', 'track'), ('LICENSES', 'license'))
+            # multiple_songs ?
+            if len(carousel_lockups) > 1:
+                fields = fields[-1:]
+            for info_row in traverse_obj(
+                    carousel_lockups,
+                    (0, 'carouselLockupRenderer', 'infoRows', Ellipsis, 'infoRowRenderer'),
+                    expected_type=dict):
+                row_title = traverse_obj(info_row, ('title', 'simpleText'))
+                row_text = traverse_obj(info_row, 'defaultMetadata', 'expandedMetadata', expected_type=get_text)
+                if not row_text:
+                    continue
+                for name, field in fields:
+                    if name == row_title and not info.get(field):
+                        info[field] = row_text
+
         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
             v = info.get(s_k)
             if v:

From f33923cba7670ea2e82f233c1f88210eb41f7c3b Mon Sep 17 00:00:00 2001
From: Valentin Metz <31850924+Valentin-Metz@users.noreply.github.com>
Date: Thu, 9 Feb 2023 12:25:28 +0100
Subject: [PATCH 179/312] [rbgtum] Add new extractor (#31305)

* [rbgtum] Add new extractor

* Small update, force CI

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |  4 ++
 youtube_dl/extractor/rbgtum.py     | 97 ++++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 youtube_dl/extractor/rbgtum.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 96b27b179..dfaef0cc3 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1010,6 +1010,10 @@ from .raywenderlich import (
     RayWenderlichIE,
     RayWenderlichCourseIE,
 )
+from .rbgtum import (
+    RbgTumIE,
+    RbgTumCourseIE,
+)
 from .rbmaradio import RBMARadioIE
 from .rds import RDSIE
 from .redbulltv import (
diff --git a/youtube_dl/extractor/rbgtum.py b/youtube_dl/extractor/rbgtum.py
new file mode 100644
index 000000000..da48ebbc4
--- /dev/null
+++ b/youtube_dl/extractor/rbgtum.py
@@ -0,0 +1,97 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class RbgTumIE(InfoExtractor):
+    _VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
+    _TESTS = [{
+        # Combined view
+        'url': 'https://live.rbg.tum.de/w/cpp/22128',
+        'md5': '53a5e7b3e07128e33bbf36687fe1c08f',
+        'info_dict': {
+            'id': 'cpp/22128',
+            'ext': 'mp4',
+            'title': 'Lecture: October 18. 2022',
+            'series': 'Concepts of C++ programming (IN2377)',
+        }
+    }, {
+        # Presentation only
+        'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES',
+        'md5': '36c584272179f3e56b0db5d880639cba',
+        'info_dict': {
+            'id': 'I2DL/12349/PRES',
+            'ext': 'mp4',
+            'title': 'Lecture 3: Introduction to Neural Networks',
+            'series': 'Introduction to Deep Learning (IN2346)',
+        }
+    }, {
+        # Camera only
+        'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM',
+        'md5': 'e04189d92ff2f56aedf5cede65d37aad',
+        'info_dict': {
+            'id': 'fvv-info/16130/CAM',
+            'ext': 'mp4',
+            'title': 'Fachschaftsvollversammlung',
+            'series': 'Fachschaftsvollversammlung Informatik',
+        }
+    }, ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
+        lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
+        lecture_series_title = self._html_search_regex(
+            r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')
+
+        formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': lecture_title,
+            'series': lecture_series_title,
+            'formats': formats,
+        }
+
+
+class RbgTumCourseIE(InfoExtractor):
+    _VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
+    _TESTS = [{
+        'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
+        'info_dict': {
+            'title': 'Funktionale Programmierung und Verifikation (IN0003)',
+            'id': '2022/S/fpv',
+        },
+        'params': {
+            'noplaylist': False,
+        },
+        'playlist_count': 13,
+    }, {
+        'url': 'https://live.rbg.tum.de/course/2022/W/set',
+        'info_dict': {
+            'title': 'SET FSMPIC',
+            'id': '2022/W/set',
+        },
+        'params': {
+            'noplaylist': False,
+        },
+        'playlist_count': 6,
+    }, ]
+
+    def _real_extract(self, url):
+        course_id = self._match_id(url)
+        webpage = self._download_webpage(url, course_id)
+
+        lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
+
+        lecture_urls = []
+        for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
+            lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))
+
+        return self.playlist_result(lecture_urls, course_id, lecture_series_title)

From 33db85c571304bbd6863e3407ad8d08764c9e53b Mon Sep 17 00:00:00 2001
From: teddy171 <teddy171@qq.com>
Date: Fri, 10 Feb 2023 04:19:27 +0800
Subject: [PATCH 180/312] [feat]: Add support to external downloader aria2p
 (#31500)

* feat: add class Aria2pFD

* feat: create call_downloader function

* feat: a colorful download interface to aria2pFD

* feat: change value name

* Apply suggestions from code review

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Typo in suggestion

* fix: remove unused value

* fix: add not function to return value(0 is normal); add total_seconds to download.eta(timedelta object); add waiting status when hook progress

* fix: remove unuse method ..utils.format_bytes

* fix: be up to flake8

* fix: be up to flake8

* Apply suggestions from code review

* [feat] test external downloader aria2p

* [feat] test external downloader aria2p

* [fix] test_external_downloader.py

* Apply suggestions from code review

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Apply suggestions from code review

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Update test/test_external_downloader.py

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Update test/test_external_downloader.py

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Update youtube_dl/downloader/external.py

Co-authored-by: dirkf <fieldhouse@gmx.net>

* refactoring code and fix bugs

* Apply suggestions from code review

* Rename test_external_downloader.py to test_downloader_external.py

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 test/helper.py                    |  11 +++
 test/test_downloader_external.py  | 115 ++++++++++++++++++++++++++++++
 test/test_downloader_http.py      |  17 ++---
 test/test_http.py                 |  16 ++---
 youtube_dl/downloader/external.py |  58 +++++++++++++++
 5 files changed, 193 insertions(+), 24 deletions(-)
 create mode 100644 test/test_downloader_external.py

diff --git a/test/helper.py b/test/helper.py
index c6a2f0667..883b2e877 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -89,6 +89,17 @@ class FakeYDL(YoutubeDL):
         self.report_warning = types.MethodType(report_warning, self)
 
 
+class FakeLogger(object):
+    def debug(self, msg):
+        pass
+
+    def warning(self, msg):
+        pass
+
+    def error(self, msg):
+        pass
+
+
 def gettestcases(include_onlymatching=False):
     for ie in youtube_dl.extractor.gen_extractors():
         for tc in ie.get_testcases(include_onlymatching):
diff --git a/test/test_downloader_external.py b/test/test_downloader_external.py
new file mode 100644
index 000000000..c0239502b
--- /dev/null
+++ b/test/test_downloader_external.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# coding: utf-8
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import re
+import sys
+import subprocess
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import (
+    FakeLogger,
+    http_server_port,
+    try_rm,
+)
+from youtube_dl import YoutubeDL
+from youtube_dl.compat import compat_http_server
+from youtube_dl.utils import encodeFilename
+from youtube_dl.downloader.external import Aria2pFD
+import threading
+
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+TEST_SIZE = 10 * 1024
+
+
+class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+    def log_message(self, format, *args):
+        pass
+
+    def send_content_range(self, total=None):
+        range_header = self.headers.get('Range')
+        start = end = None
+        if range_header:
+            mobj = re.match(r'bytes=(\d+)-(\d+)', range_header)
+            if mobj:
+                start, end = (int(mobj.group(i)) for i in (1, 2))
+        valid_range = start is not None and end is not None
+        if valid_range:
+            content_range = 'bytes %d-%d' % (start, end)
+            if total:
+                content_range += '/%d' % total
+            self.send_header('Content-Range', content_range)
+        return (end - start + 1) if valid_range else total
+
+    def serve(self, range=True, content_length=True):
+        self.send_response(200)
+        self.send_header('Content-Type', 'video/mp4')
+        size = TEST_SIZE
+        if range:
+            size = self.send_content_range(TEST_SIZE)
+        if content_length:
+            self.send_header('Content-Length', size)
+        self.end_headers()
+        self.wfile.write(b'#' * size)
+
+    def do_GET(self):
+        if self.path == '/regular':
+            self.serve()
+        elif self.path == '/no-content-length':
+            self.serve(content_length=False)
+        elif self.path == '/no-range':
+            self.serve(range=False)
+        elif self.path == '/no-range-no-content-length':
+            self.serve(range=False, content_length=False)
+        else:
+            assert False, 'unrecognised server path'
+
+
+@unittest.skipUnless(Aria2pFD.available(), 'aria2p module not found')
+class TestAria2pFD(unittest.TestCase):
+    def setUp(self):
+        self.httpd = compat_http_server.HTTPServer(
+            ('127.0.0.1', 0), HTTPTestRequestHandler)
+        self.port = http_server_port(self.httpd)
+        self.server_thread = threading.Thread(target=self.httpd.serve_forever)
+        self.server_thread.daemon = True
+        self.server_thread.start()
+
+    def download(self, params, ep):
+        with subprocess.Popen(
+            ['aria2c', '--enable-rpc'],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL
+        ) as process:
+            if not process.poll():
+                filename = 'testfile.mp4'
+                params['logger'] = FakeLogger()
+                params['outtmpl'] = filename
+                ydl = YoutubeDL(params)
+                try_rm(encodeFilename(filename))
+                self.assertEqual(ydl.download(['http://127.0.0.1:%d/%s' % (self.port, ep)]), 0)
+                self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
+                try_rm(encodeFilename(filename))
+            process.kill()
+
+    def download_all(self, params):
+        for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
+            self.download(params, ep)
+
+    def test_regular(self):
+        self.download_all({'external_downloader': 'aria2p'})
+
+    def test_chunked(self):
+        self.download_all({
+            'external_downloader': 'aria2p',
+            'http_chunk_size': 1000,
+        })
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
index 750472281..4e6d7a2a0 100644
--- a/test/test_downloader_http.py
+++ b/test/test_downloader_http.py
@@ -9,7 +9,11 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import http_server_port, try_rm
+from test.helper import (
+    FakeLogger,
+    http_server_port,
+    try_rm,
+)
 from youtube_dl import YoutubeDL
 from youtube_dl.compat import compat_http_server
 from youtube_dl.downloader.http import HttpFD
@@ -66,17 +70,6 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
             assert False
 
 
-class FakeLogger(object):
-    def debug(self, msg):
-        pass
-
-    def warning(self, msg):
-        pass
-
-    def error(self, msg):
-        pass
-
-
 class TestHttpFD(unittest.TestCase):
     def setUp(self):
         self.httpd = compat_http_server.HTTPServer(
diff --git a/test/test_http.py b/test/test_http.py
index 3ee0a5dda..487a9bc77 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -8,7 +8,10 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import http_server_port
+from test.helper import (
+    FakeLogger,
+    http_server_port,
+)
 from youtube_dl import YoutubeDL
 from youtube_dl.compat import compat_http_server, compat_urllib_request
 import ssl
@@ -52,17 +55,6 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
             assert False
 
 
-class FakeLogger(object):
-    def debug(self, msg):
-        pass
-
-    def warning(self, msg):
-        pass
-
-    def error(self, msg):
-        pass
-
-
 class TestHTTP(unittest.TestCase):
     def setUp(self):
         self.httpd = compat_http_server.HTTPServer(
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index a06ab2e50..bffcd10b6 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -200,6 +200,64 @@ class Aria2cFD(ExternalFD):
         return cmd
 
 
+class Aria2pFD(ExternalFD):
+    ''' Aria2pFD class
+    This class support to use aria2p as downloader.
+    (Aria2p, a command-line tool and Python library to interact with an aria2c daemon process
+    through JSON-RPC.)
+    It can help you to get download progress more easily.
+    To use aria2p as downloader, you need to install aria2c and aria2p, aria2p can download with pip.
+    Then run aria2c in the background and enable with the --enable-rpc option.
+    '''
+    try:
+        import aria2p
+        __avail = True
+    except ImportError:
+        __avail = False
+
+    @classmethod
+    def available(cls):
+        return cls.__avail
+
+    def _call_downloader(self, tmpfilename, info_dict):
+        aria2 = self.aria2p.API(
+            self.aria2p.Client(
+                host='http://localhost',
+                port=6800,
+                secret=''
+            )
+        )
+
+        options = {
+            'min-split-size': '1M',
+            'max-connection-per-server': 4,
+            'auto-file-renaming': 'false',
+        }
+        options['dir'] = os.path.dirname(tmpfilename) or os.path.abspath('.')
+        options['out'] = os.path.basename(tmpfilename)
+        options['header'] = []
+        for key, val in info_dict['http_headers'].items():
+            options['header'].append('{0}: {1}'.format(key, val))
+        download = aria2.add_uris([info_dict['url']], options)
+        status = {
+            'status': 'downloading',
+            'tmpfilename': tmpfilename,
+        }
+        started = time.time()
+        while download.status in ['active', 'waiting']:
+            download = aria2.get_download(download.gid)
+            status.update({
+                'downloaded_bytes': download.completed_length,
+                'total_bytes': download.total_length,
+                'elapsed': time.time() - started,
+                'eta': download.eta.total_seconds(),
+                'speed': download.download_speed,
+            })
+            self._hook_progress(status)
+            time.sleep(.5)
+        return download.status != 'complete'
+
+
 class HttpieFD(ExternalFD):
     @classmethod
     def available(cls):

From 822f19f05d0ab1a4a945a85f691f2079f7cb3bbb Mon Sep 17 00:00:00 2001
From: fonkap <fonk666@gmail.com>
Date: Sat, 11 Feb 2023 03:37:45 +0100
Subject: [PATCH 181/312] [FileMoonIE] Add extractor for filemoon.sx (#31515)

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/filemoon.py   | 43 ++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 youtube_dl/extractor/filemoon.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index dfaef0cc3..f63a2e030 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -376,6 +376,7 @@ from .fc2 import (
     FC2EmbedIE,
 )
 from .fczenit import FczenitIE
+from .filemoon import FileMoonIE
 from .fifa import FifaIE
 from .filmon import (
     FilmOnIE,
diff --git a/youtube_dl/extractor/filemoon.py b/youtube_dl/extractor/filemoon.py
new file mode 100644
index 000000000..654df9b69
--- /dev/null
+++ b/youtube_dl/extractor/filemoon.py
@@ -0,0 +1,43 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    decode_packed_codes,
+    js_to_json,
+)
+
+
+class FileMoonIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?filemoon\.sx/./(?P<id>\w+)'
+    _TEST = {
+        'url': 'https://filemoon.sx/e/dw40rxrzruqz',
+        'md5': '5a713742f57ac4aef29b74733e8dda01',
+        'info_dict': {
+            'id': 'dw40rxrzruqz',
+            'title': 'dw40rxrzruqz',
+            'ext': 'mp4'
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        matches = re.findall(r'(?s)(eval.*?)</script>', webpage)
+        packed = matches[-1]
+        unpacked = decode_packed_codes(packed)
+        jwplayer_sources = self._parse_json(
+            self._search_regex(
+                r'(?s)player\s*\.\s*setup\s*\(\s*\{\s*sources\s*:\s*(.*?])', unpacked, 'jwplayer sources'),
+            video_id, transform_source=js_to_json)
+
+        formats = self._parse_jwplayer_formats(jwplayer_sources, video_id)
+
+        return {
+            'id': video_id,
+            'title': self._generic_title(url) or video_id,
+            'formats': formats
+        }

From de48105dd870e353af468bfb8d49b14d9894e649 Mon Sep 17 00:00:00 2001
From: fonkap <fonk666@gmail.com>
Date: Sat, 11 Feb 2023 03:47:43 +0100
Subject: [PATCH 182/312] [KommunetvIE] Add extractor for kommunetv.no (#31516)

* Add extractor for kommunetv.no
* Using utils.update_url instead of regex

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/kommunetv.py  | 35 ++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 youtube_dl/extractor/kommunetv.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index f63a2e030..d8428f46f 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -557,6 +557,7 @@ from .khanacademy import (
 from .kickstarter import KickStarterIE
 from .kinja import KinjaEmbedIE
 from .kinopoisk import KinoPoiskIE
+from .kommunetv import KommunetvIE
 from .konserthusetplay import KonserthusetPlayIE
 from .krasview import KrasViewIE
 from .kth import KTHIE
diff --git a/youtube_dl/extractor/kommunetv.py b/youtube_dl/extractor/kommunetv.py
new file mode 100644
index 000000000..91d06a74f
--- /dev/null
+++ b/youtube_dl/extractor/kommunetv.py
@@ -0,0 +1,35 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import update_url
+
+
+class KommunetvIE(InfoExtractor):
+    _VALID_URL = r'https://(\w+).kommunetv.no/archive/(?P<id>\w+)'
+    _TEST = {
+        'url': 'https://oslo.kommunetv.no/archive/921',
+        'md5': '5f102be308ee759be1e12b63d5da4bbc',
+        'info_dict': {
+            'id': '921',
+            'title': 'Bystyremøte',
+            'ext': 'mp4'
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        headers = {
+            'Accept': 'application/json'
+        }
+        data = self._download_json('https://oslo.kommunetv.no/api/streams?streamType=1&id=%s' % video_id, video_id, headers=headers)
+        title = data['stream']['title']
+        file = data['playlist'][0]['playlist'][0]['file']
+        url = update_url(file, query=None, fragment=None)
+        formats = self._extract_m3u8_formats(url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+        self._sort_formats(formats)
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': title
+        }

From 6f8c2635a573c84ef66c02f73b4aeff1cc36ae4e Mon Sep 17 00:00:00 2001
From: fonkap <fonk666@gmail.com>
Date: Sat, 11 Feb 2023 03:54:45 +0100
Subject: [PATCH 183/312] [StreamsbIE] Add extractor for streamsb.com
 (viewsb.com) (#31517)

* Add extractor for streamsb.com (viewsb.com)

* make data url using app.js version

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/streamsb.py   | 61 ++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 youtube_dl/extractor/streamsb.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index d8428f46f..3a87f9e33 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1206,6 +1206,7 @@ from .storyfire import (
 from .streamable import StreamableIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
+from .streamsb import StreamsbIE
 from .streetvoice import StreetVoiceIE
 from .stretchinternet import StretchInternetIE
 from .stv import STVPlayerIE
diff --git a/youtube_dl/extractor/streamsb.py b/youtube_dl/extractor/streamsb.py
new file mode 100644
index 000000000..bffcb3de1
--- /dev/null
+++ b/youtube_dl/extractor/streamsb.py
@@ -0,0 +1,61 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import binascii
+import random
+import re
+import string
+
+from .common import InfoExtractor
+from ..utils import urljoin, url_basename
+
+
+def to_ascii_hex(str1):
+    return binascii.hexlify(str1.encode('utf-8')).decode('ascii')
+
+
+def generate_random_string(length):
+    return ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(length))
+
+
+class StreamsbIE(InfoExtractor):
+    _DOMAINS = ('viewsb.com', )
+    _VALID_URL = r'https://(?P<domain>%s)/(?P<id>.+)' % '|'.join(_DOMAINS)
+    _TEST = {
+        'url': 'https://viewsb.com/dxfvlu4qanjx',
+        'md5': '488d111a63415369bf90ea83adc8a325',
+        'info_dict': {
+            'id': 'dxfvlu4qanjx',
+            'ext': 'mp4',
+            'title': 'Sintel'
+        }
+    }
+
+    def _real_extract(self, url):
+        domain, video_id = re.match(self._VALID_URL, url).group('domain', 'id')
+        webpage = self._download_webpage(url, video_id)
+
+        iframe_rel_url = self._search_regex(r'''(?i)<iframe\b[^>]+\bsrc\s*=\s*('|")(?P<path>/.*\.html)\1''', webpage, 'iframe', group='path')
+        iframe_url = urljoin('https://' + domain, iframe_rel_url)
+
+        iframe_data = self._download_webpage(iframe_url, video_id)
+        app_version = self._search_regex(r'''<script\b[^>]+\bsrc\s*=\s*["|'].*/app\.min\.(\d+)\.js''', iframe_data, 'app version', fatal=False) or '50'
+
+        video_code = url_basename(iframe_url).rsplit('.')[0]
+
+        length = 12
+        req = '||'.join((generate_random_string(length), video_code, generate_random_string(length), 'streamsb'))
+        ereq = 'https://{0}/sources{1}/{2}'.format(domain, app_version, to_ascii_hex(req))
+
+        video_data = self._download_webpage(ereq, video_id, headers={
+            'Referer': iframe_url,
+            'watchsb': 'sbstream',
+        })
+        player_data = self._parse_json(video_data, video_id)
+        title = player_data['stream_data']['title']
+        formats = self._extract_m3u8_formats(player_data['stream_data']['file'], video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': title,
+        }

From 42b098dd79e91295376ca98f394876555481a3eb Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 14 Feb 2023 02:47:09 +0000
Subject: [PATCH 184/312] [InfoExtractor] Handle unquoted values in OpenGraph
 searches

---
 test/test_InfoExtractor.py     | 2 ++
 youtube_dl/extractor/common.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index dd69a681b..4db5c93f1 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -62,6 +62,7 @@ class TestInfoExtractor(unittest.TestCase):
             <meta name="og:test1" content='foo > < bar'/>
             <meta name="og:test2" content="foo >//< bar"/>
             <meta property=og-test3 content='Ill-formatted opengraph'/>
+            <meta property=og:test4 content=unquoted-value/>
             '''
         self.assertEqual(ie._og_search_title(html), 'Foo')
         self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
@@ -74,6 +75,7 @@ class TestInfoExtractor(unittest.TestCase):
         self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
         self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
         self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
+        self.assertEqual(ie._og_search_property('test4', html), 'unquoted-value')
 
     def test_html_search_meta(self):
         ie = self.ie
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index a0a796d7b..7244e5df6 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1087,7 +1087,7 @@ class InfoExtractor(object):
     # Helper functions for extracting OpenGraph info
     @staticmethod
     def _og_regexes(prop):
-        content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
+        content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?)(?=\s|/?>))'
         property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
                        % {'prop': re.escape(prop)})
         template = r'<meta[^>]+?%s[^>]+?%s'

From dd9aa74beefc179f943051c4e19eecad87ab1124 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 14 Feb 2023 16:33:01 +0000
Subject: [PATCH 185/312] [test] Avoid name TestIE which causes a pytest
 warning

See: https://github.com/yt-dlp/yt-dlp/commit/060ac76257a8c1f7370a8a571821c1d73377701f
---
 test/test_InfoExtractor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 4db5c93f1..6d25441db 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -35,13 +35,13 @@ class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler)
             assert False
 
 
-class TestIE(InfoExtractor):
+class DummyIE(InfoExtractor):
     pass
 
 
 class TestInfoExtractor(unittest.TestCase):
     def setUp(self):
-        self.ie = TestIE(FakeYDL())
+        self.ie = DummyIE(FakeYDL())
 
     def test_ie_key(self):
         self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)

From 2dd6c6edd8e0fc5e45865b8e6d865e35147de772 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 17 Feb 2023 11:16:54 +0000
Subject: [PATCH 186/312] [YouTube] Avoid crash if uploader_id extraction fails

See #31530.
---
 youtube_dl/extractor/youtube.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index ba0f5c8b6..66b0257df 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2122,7 +2122,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 microformat.get('uploadDate')
                 or search_meta('uploadDate')),
             'uploader': video_details['author'],
-            'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
+            'uploader_id': self._search_regex(
+                r'/(?:channel|user)/([^/?&#]+)', owner_profile_url,
+                'uploader id', fatal=False) if owner_profile_url else None,
             'uploader_url': owner_profile_url,
             'channel_id': channel_id,
             'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,

From 57802e632f5a741df6fd9b30a455c32632944489 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 19 Feb 2023 13:47:49 +0000
Subject: [PATCH 187/312] [jsinterp] Fix dict comprehension for Py2.6

Resolves #31600
---
 youtube_dl/jsinterp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 60fa2b1b9..a3bc42a61 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -262,7 +262,7 @@ class JSInterpreter(object):
         if not expr:
             return
         # collections.Counter() is ~10% slower in both 2.7 and 3.9
-        counters = {k: 0 for k in _MATCHING_PARENS.values()}
+        counters = dict((k, 0) for k in _MATCHING_PARENS.values())
         start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
         in_quote, escaping, skipping = None, False, 0
         after_op, in_regex_char_group, skip_re = True, False, 0

From 6067451e432fb65d487a8a67bb5cff52efb9ccf4 Mon Sep 17 00:00:00 2001
From: df <fieldhouse@gmx.net>
Date: Mon, 20 Feb 2023 01:41:46 +0000
Subject: [PATCH 188/312] [Vimeo] Fix e19ec52 for tween-age Pythons

* a check in older Pythons in the 2.7 and earlier, 3.3, 3.4 series caused "sre_constants.error: nothing to repeat"
* satisfy the check by avoiding nested qualifiers that can match empty string

Resolves #31597
---
 youtube_dl/extractor/vimeo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 7f2731d83..8e1a805f6 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -286,7 +286,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                              /(?!videos|likes)[^/?#]+/?|
                              (?(q)|/(?P<unlisted_hash>[\da-f]{10}))?
                          )
-                         (?:(?(q)[&]|(?(u)|/?)[?]).*?)?(?:[#].*)?$
+                         (?:(?(q)[&]|(?(u)|/?)[?]).+?)?(?:[#].*)?$
                  '''
     IE_NAME = 'vimeo'
     _TESTS = [

From 1d3751c3fe50b203d3e2bff71d866c8c500f8288 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Tue, 1 Jun 2021 18:05:41 +0530
Subject: [PATCH 189/312] Escape URLs in `sanitized_Request`, not
 `sanitize_url` d2558234cf5dd12d6896eed5427b7dcdb3ab7b5a added escaping of
 URLs while sanitizing. However, `sanitize_url` may not always receive an
 actual URL. Eg: When using `youtube-dl "search query" --default-search
 ytsearch`, `search query` gets escaped to `search%20query` before being
 prefixed with `ytsearch:` which is not the intended behavior. So the escaping
 is moved to `sanitized_Request` instead.

---
 test/test_utils.py              |  1 +
 youtube_dl/extractor/generic.py | 19 +++++++++++++++++++
 youtube_dl/utils.py             |  4 ++--
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index 9d364c863..ea2b96ed2 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -250,6 +250,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
         self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
         self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
+        self.assertEqual(sanitize_url('foo bar'), 'foo bar')
 
     def test_expand_path(self):
         def env(var):
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 0e473e952..b01900afa 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -2320,6 +2320,25 @@ class GenericIE(InfoExtractor):
                 'height': 720,
                 'age_limit': 18,
             },
+        }, {
+            # would like to use the yt-dl test video but searching for
+            # '"\'/\\ä↭𝕐' fails, so using an old vid from YouTube Korea
+            'note': 'Test default search',
+            'url': 'Shorts로 허락 필요없이 놀자! (BTS편)',
+            'info_dict': {
+                'id': 'usDGO4Zb-dc',
+                'ext': 'mp4',
+                'title': 'YouTube Shorts로 허락 필요없이 놀자! (BTS편)',
+                'description': 'md5:96e31607eba81ab441567b5e289f4716',
+                'upload_date': '20211107',
+                'uploader': 'YouTube Korea',
+                'location': '대한민국',
+            },
+            'params': {
+                'default_search': 'ytsearch',
+                'skip_download': True,
+            },
+            'expected_warnings': ['uploader id'],
         },
     ]
 
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 4edbfa27b..761edcd49 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2176,11 +2176,11 @@ def sanitize_url(url):
     for mistake, fixup in COMMON_TYPOS:
         if re.match(mistake, url):
             return re.sub(mistake, fixup, url)
-    return escape_url(url)
+    return url
 
 
 def sanitized_Request(url, *args, **kwargs):
-    return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
+    return compat_urllib_request.Request(escape_url(sanitize_url(url)), *args, **kwargs)
 
 
 def expand_path(s):

From e67e52a8f8fd7e76253e416da76570af8da200d0 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 24 Feb 2023 02:32:40 +0000
Subject: [PATCH 190/312] [test] Support test-case with volatile ID (eg live
 show)

Signalled by regexp ID value, eg: `'id': r're:[\da-zA-Z_-]{8,}'`
---
 test/test_download.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/test/test_download.py b/test/test_download.py
index 19936969f..d50008307 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -148,6 +148,7 @@ def generator(test_case, tname):
                 try_rm(tc_filename)
                 try_rm(tc_filename + '.part')
                 try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
+
         try_rm_tcs_files()
         try:
             try_num = 1
@@ -213,7 +214,15 @@ def generator(test_case, tname):
                 # First, check test cases' data against extracted data alone
                 expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
                 # Now, check downloaded file consistency
+                # support test-case with volatile ID, signalled by regexp value
+                if tc.get('info_dict', {}).get('id', '').startswith('re:'):
+                    test_id = tc['info_dict']['id']
+                    tc['info_dict']['id'] = tc_res_dict['id']
+                else:
+                    test_id = None
                 tc_filename = get_tc_filename(tc)
+                if test_id:
+                    tc['info_dict']['id'] = test_id
                 if not test_case.get('params', {}).get('skip_download', False):
                     self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
                     self.assertTrue(tc_filename in finished_hook_called)

From f7ce98a21e15cb094c772e9082796d009c61578b Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 24 Feb 2023 02:48:37 +0000
Subject: [PATCH 191/312] [YouTube] Support @owner format in uploader_id etc

* implement https://github.com/ytdl-org/youtube-dl/issues/31530#issuecomment-1435734719
* update affected tests
* misc clean-ups
---
 youtube_dl/extractor/youtube.py | 319 +++++++++++++++++++-------------
 1 file changed, 194 insertions(+), 125 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 66b0257df..4246d84f9 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -31,6 +31,7 @@ from ..utils import (
     get_element_by_attribute,
     int_or_none,
     js_to_json,
+    merge_dicts,
     mimetype2ext,
     parse_codecs,
     parse_duration,
@@ -400,6 +401,62 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                 break
             data['continuation'] = token
 
+    @staticmethod
+    def _owner_endpoints_path():
+        return [
+            Ellipsis,
+            lambda k, _: k.endswith('SecondaryInfoRenderer'),
+            ('owner', 'videoOwner'), 'videoOwnerRenderer', 'title',
+            'runs', Ellipsis]
+
+    def _extract_channel_id(self, webpage, videodetails={}, metadata={}, renderers=[]):
+        channel_id = None
+        if any((videodetails, metadata, renderers)):
+            channel_id = (
+                traverse_obj(videodetails, 'channelId')
+                or traverse_obj(metadata, 'externalChannelId', 'externalId')
+                or traverse_obj(renderers,
+                                self._owner_endpoints_path() + [
+                                    'navigationEndpoint', 'browseEndpoint', 'browseId'],
+                                get_all=False)
+            )
+        return channel_id or self._html_search_meta(
+            'channelId', webpage, 'channel id', default=None)
+
+    def _extract_author_var(self, webpage, var_name,
+                            videodetails={}, metadata={}, renderers=[]):
+        result = None
+        paths = {
+            #       (HTML, videodetails, metadata, renderers)
+            'name': ('content', 'author', (('ownerChannelName', None), 'title'), ['text']),
+            'url': ('href', 'ownerProfileUrl', 'vanityChannelUrl',
+                    ['navigationEndpoint', 'browseEndpoint', 'canonicalBaseUrl'])
+        }
+        if any((videodetails, metadata, renderers)):
+            result = (
+                traverse_obj(videodetails, paths[var_name][1], get_all=False)
+                or traverse_obj(metadata, paths[var_name][2], get_all=False)
+                or traverse_obj(renderers,
+                                self._owner_endpoints_path() + paths[var_name][3],
+                                get_all=False)
+            )
+        return result or traverse_obj(
+            extract_attributes(self._search_regex(
+                r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')%s\2[^>]*>)'''
+                % re.escape(var_name),
+                get_element_by_attribute('itemprop', 'author', webpage) or '',
+                'author link', default='')),
+            paths[var_name][0])
+
+    @staticmethod
+    def _yt_urljoin(url_or_path):
+        return urljoin('https://www.youtube.com', url_or_path)
+
+    def _extract_uploader_id(self, uploader_url):
+        return self._search_regex(
+            r'/(?:(?:channel|user)/|(?=@))([^/?&#]+)', uploader_url or '',
+            'uploader id', default=None)
+
 
 class YoutubeIE(YoutubeBaseInfoExtractor):
     IE_DESC = 'YouTube.com'
@@ -516,8 +573,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
                 'uploader': 'Philipp Hagemeister',
-                'uploader_id': 'phihag',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
+                'uploader_id': '@PhilippHagemeister',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@PhilippHagemeister',
                 'channel': 'Philipp Hagemeister',
                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
@@ -557,8 +614,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
                 'uploader': 'Philipp Hagemeister',
-                'uploader_id': 'phihag',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
+                'uploader_id': '@PhilippHagemeister',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@PhilippHagemeister',
                 'upload_date': '20121002',
                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
                 'categories': ['Science & Technology'],
@@ -588,7 +645,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'youtube_include_dash_manifest': True,
                 'format': '141',
             },
-            'skip': 'format 141 not served anymore',
+            'skip': 'format 141 not served any more',
         },
         # DASH manifest with encrypted signature
         {
@@ -600,7 +657,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
                 'duration': 244,
                 'uploader': 'AfrojackVEVO',
-                'uploader_id': 'AfrojackVEVO',
+                'uploader_id': '@AfrojackVEVO',
                 'upload_date': '20131011',
                 'abr': 129.495,
             },
@@ -618,8 +675,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'duration': 219,
                 'upload_date': '20100909',
                 'uploader': 'Amazing Atheist',
-                'uploader_id': 'TheAmazingAtheist',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
+                'uploader_id': '@theamazingatheist',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@theamazingatheist',
                 'title': 'Burning Everyone\'s Koran',
                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
             }
@@ -635,8 +692,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
                 'duration': 142,
                 'uploader': 'The Witcher',
-                'uploader_id': 'WitcherGame',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
+                'uploader_id': '@thewitcher',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@thewitcher',
                 'upload_date': '20140605',
                 'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
                 'age_limit': 18,
@@ -659,7 +716,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
                 'duration': 177,
                 'uploader': 'FlyingKitty',
-                'uploader_id': 'FlyingKitty900',
+                'uploader_id': '@FlyingKitty900',
                 'upload_date': '20200408',
                 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
                 'age_limit': 18,
@@ -682,7 +739,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:17eccca93a786d51bc67646756894066',
                 'duration': 106,
                 'uploader': 'Projekt Melody',
-                'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
+                'uploader_id': '@ProjektMelody',
                 'upload_date': '20191227',
                 'age_limit': 18,
                 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
@@ -704,10 +761,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
                 'duration': 210,
-                'uploader': 'Herr Lurik',
-                'uploader_id': 'st3in234',
                 'upload_date': '20130730',
-                'uploader_url': 'http://www.youtube.com/user/st3in234',
+                'uploader': 'Herr Lurik',
+                'uploader_id': '@HerrLurik',
+                'uploader_url': 'http://www.youtube.com/@HerrLurik',
                 'age_limit': 0,
                 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/hqdefault.jpg',
                 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
@@ -740,8 +797,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'duration': 266,
                 'upload_date': '20100430',
-                'uploader_id': 'deadmau5',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
+                'uploader_id': '@deadmau5',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@deadmau5',
                 'creator': 'deadmau5',
                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
                 'uploader': 'deadmau5',
@@ -762,8 +819,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': r're:(?s)(?:.+\s)?HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games\s*',
                 'duration': 6085,
                 'upload_date': '20150827',
-                'uploader_id': 'olympic',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
+                'uploader_id': '@Olympics',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@Olympics',
                 'uploader': r're:Olympics?',
                 'age_limit': 0,
                 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
@@ -785,8 +842,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'stretched_ratio': 16 / 9.,
                 'duration': 85,
                 'upload_date': '20110310',
-                'uploader_id': 'AllenMeow',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
+                'uploader_id': '@AllenMeow',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@AllenMeow',
                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
                 'uploader': '孫ᄋᄅ',
                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
@@ -824,7 +881,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'dorappi2000',
                 'formats': 'mincount:31',
             },
-            'skip': 'not actual anymore',
+            'skip': 'not actual any more',
         },
         # DASH manifest with segment_list
         {
@@ -905,6 +962,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'params': {
                 'skip_download': True,
             },
+            'skip': 'Not multifeed any more',
         },
         {
             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
@@ -914,7 +972,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
             },
             'playlist_count': 2,
-            'skip': 'Not multifeed anymore',
+            'skip': 'Not multifeed any more',
         },
         {
             'url': 'https://vid.plus/FlRa-iH7PGw',
@@ -938,8 +996,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
                 'duration': 133,
                 'upload_date': '20151119',
-                'uploader_id': 'IronSoulElf',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
+                'uploader_id': '@IronSoulElf',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@IronSoulElf',
                 'uploader': 'IronSoulElf',
                 'creator': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
                 'track': 'Dark Walk',
@@ -987,8 +1045,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
                 'duration': 721,
                 'upload_date': '20150127',
-                'uploader_id': 'BerkmanCenter',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
+                'uploader_id': '@BKCHarvard',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@BKCHarvard',
                 'uploader': 'The Berkman Klein Center for Internet & Society',
                 'license': 'Creative Commons Attribution license (reuse allowed)',
             },
@@ -1007,8 +1065,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'duration': 4060,
                 'upload_date': '20151119',
                 'uploader': 'Bernie Sanders',
-                'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
+                'uploader_id': '@BernieSanders',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@BernieSanders',
                 'license': 'Creative Commons Attribution license (reuse allowed)',
             },
             'params': {
@@ -1054,8 +1112,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'duration': 2085,
                 'upload_date': '20170118',
                 'uploader': 'Vsauce',
-                'uploader_id': 'Vsauce',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
+                'uploader_id': '@Vsauce',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@Vsauce',
                 'series': 'Mind Field',
                 'season_number': 1,
                 'episode_number': 1,
@@ -1134,7 +1192,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'skip_download': True,
                 'youtube_include_dash_manifest': False,
             },
-            'skip': 'not actual anymore',
+            'skip': 'not actual any more',
         },
         {
             # Youtube Music Auto-generated description
@@ -1191,8 +1249,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'IMG 3456',
                 'description': '',
                 'upload_date': '20170613',
-                'uploader_id': 'ElevageOrVert',
                 'uploader': 'ElevageOrVert',
+                'uploader_id': '@ElevageOrVert',
             },
             'params': {
                 'skip_download': True,
@@ -1210,8 +1268,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'Part 77   Sort a list of simple types in c#',
                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
                 'upload_date': '20130831',
-                'uploader_id': 'kudvenkat',
                 'uploader': 'kudvenkat',
+                'uploader_id': '@Csharp-video-tutorialsBlogspot',
             },
             'params': {
                 'skip_download': True,
@@ -1263,8 +1321,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
                 'upload_date': '20201120',
                 'uploader': 'Walk around Japan',
-                'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
+                'uploader_id': '@walkaroundjapan7124',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@walkaroundjapan7124',
             },
             'params': {
                 'skip_download': True,
@@ -1276,11 +1334,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'info_dict': {
                 'id': '4L2J27mJ3Dc',
                 'ext': 'mp4',
+                'title': 'Midwest Squid Game #Shorts',
+                'description': 'md5:976512b8a29269b93bbd8a61edc45a6d',
                 'upload_date': '20211025',
                 'uploader': 'Charlie Berens',
-                'description': 'md5:976512b8a29269b93bbd8a61edc45a6d',
-                'uploader_id': 'fivedlrmilkshake',
-                'title': 'Midwest Squid Game #Shorts',
+                'uploader_id': '@CharlieBerens',
             },
             'params': {
                 'skip_download': True,
@@ -2088,25 +2146,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 thumbnails = [{'url': thumbnail}]
 
         category = microformat.get('category') or search_meta('genre')
-        channel_id = video_details.get('channelId') \
-            or microformat.get('externalChannelId') \
-            or search_meta('channelId')
+        channel_id = self._extract_channel_id(
+            webpage, videodetails=video_details, metadata=microformat)
         duration = int_or_none(
             video_details.get('lengthSeconds')
             or microformat.get('lengthSeconds')) \
             or parse_duration(search_meta('duration'))
         is_live = video_details.get('isLive')
 
-        def gen_owner_profile_url():
-            yield microformat.get('ownerProfileUrl')
-            yield extract_attributes(self._search_regex(
-                r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')url\2[^>]*>)''',
-                get_element_by_attribute('itemprop', 'author', webpage),
-                'owner_profile_url', default='')).get('href')
+        owner_profile_url = self._yt_urljoin(self._extract_author_var(
+            webpage, 'url', videodetails=video_details, metadata=microformat))
 
-        owner_profile_url = next(
-            (x for x in map(url_or_none, gen_owner_profile_url()) if x),
-            None)
+        uploader = self._extract_author_var(
+            webpage, 'name', videodetails=video_details, metadata=microformat)
 
         if not player_url:
             player_url = self._extract_player_url(webpage)
@@ -2121,13 +2173,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'upload_date': unified_strdate(
                 microformat.get('uploadDate')
                 or search_meta('uploadDate')),
-            'uploader': video_details['author'],
-            'uploader_id': self._search_regex(
-                r'/(?:channel|user)/([^/?&#]+)', owner_profile_url,
-                'uploader id', fatal=False) if owner_profile_url else None,
-            'uploader_url': owner_profile_url,
+            'uploader': uploader,
             'channel_id': channel_id,
-            'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
             'duration': duration,
             'view_count': int_or_none(
                 video_details.get('viewCount')
@@ -2257,6 +2304,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 initial_data,
                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
                 list) or []
+            if not info['channel_id']:
+                channel_id = self._extract_channel_id('', renderers=contents)
+            if not info['uploader']:
+                info['uploader'] = self._extract_author_var('', 'name', renderers=contents)
+            if not owner_profile_url:
+                owner_profile_url = self._yt_urljoin(self._extract_author_var('', 'url', renderers=contents))
+
             for content in contents:
                 vpir = content.get('videoPrimaryInfoRenderer')
                 if vpir:
@@ -2304,10 +2358,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         })
                 vsir = content.get('videoSecondaryInfoRenderer')
                 if vsir:
-                    info['channel'] = get_text(try_get(
-                        vsir,
-                        lambda x: x['owner']['videoOwnerRenderer']['title'],
-                        dict))
                     rows = try_get(
                         vsir,
                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
@@ -2365,7 +2415,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         self.mark_watched(video_id, player_response)
 
-        return info
+        return merge_dicts(
+            info, {
+                'uploader_id': self._extract_uploader_id(owner_profile_url),
+                'uploader_url': owner_profile_url,
+                'channel_id': channel_id,
+                'channel_url': channel_id and self._yt_urljoin('/channel/' + channel_id),
+                'channel': info['uploader'],
+            })
 
 
 class YoutubeTabIE(YoutubeBaseInfoExtractor):
@@ -2394,6 +2451,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'description': 'Short clips from Super Cooper Sundays!',
             'id': 'UCKMA8kHZ8bPYpnMNaUSxfEQ',
             'title': 'Super Cooper Shorts - Shorts',
+            'uploader': 'Super Cooper Shorts',
+            'uploader_id': '@SuperCooperShorts',
         }
     }, {
         # Channel that does not have a Shorts tab. Test should just download videos on Home tab instead
@@ -2404,14 +2463,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'title': 'Emergency Awesome - Home',
         },
         'playlist_mincount': 5,
+        'skip': 'new test page needed to replace `Emergency Awesome - Shorts`',
     }, {
         # playlists, multipage
         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
         'playlist_mincount': 94,
         'info_dict': {
             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
-            'title': 'Игорь Клейнер - Playlists',
+            'title': 'Igor Kleiner - Playlists',
             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
+            'uploader': 'Igor Kleiner',
+            'uploader_id': '@IgorDataScience',
         },
     }, {
         # playlists, multipage, different order
@@ -2419,8 +2481,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         'playlist_mincount': 94,
         'info_dict': {
             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
-            'title': 'Игорь Клейнер - Playlists',
+            'title': 'Igor Kleiner - Playlists',
             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
+            'uploader': 'Igor Kleiner',
+            'uploader_id': '@IgorDataScience',
         },
     }, {
         # playlists, series
@@ -2430,6 +2494,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'id': 'UCYO_jab_esuFRV4b17AJtAw',
             'title': '3Blue1Brown - Playlists',
             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
+            'uploader': '3Blue1Brown',
+            'uploader_id': '@3blue1brown',
         },
     }, {
         # playlists, singlepage
@@ -2439,6 +2505,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
             'title': 'ThirstForScience - Playlists',
             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
+            'uploader': 'ThirstForScience',
+            'uploader_id': '@ThirstForScience',
         }
     }, {
         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
@@ -2447,20 +2515,22 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         # basic, single video playlist
         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
         'info_dict': {
-            'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
-            'uploader': 'Sergey M.',
             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
             'title': 'youtube-dl public playlist',
+            'uploader': 'Sergey M.',
+            'uploader_id': '@sergeym.6173',
+            'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
         },
         'playlist_count': 1,
     }, {
         # empty playlist
         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
         'info_dict': {
-            'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
-            'uploader': 'Sergey M.',
             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
             'title': 'youtube-dl empty playlist',
+            'uploader': 'Sergey M.',
+            'uploader_id': '@sergeym.6173',
+            'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
         },
         'playlist_count': 0,
     }, {
@@ -2470,6 +2540,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'title': 'lex will - Home',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+            'uploader': 'lex will',
+            'uploader_id': '@lexwill718',
         },
         'playlist_mincount': 2,
     }, {
@@ -2479,6 +2551,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'title': 'lex will - Videos',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+            'uploader': 'lex will',
+            'uploader_id': '@lexwill718',
         },
         'playlist_mincount': 975,
     }, {
@@ -2488,6 +2562,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'title': 'lex will - Videos',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+            'uploader': 'lex will',
+            'uploader_id': '@lexwill718',
         },
         'playlist_mincount': 199,
     }, {
@@ -2497,6 +2573,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'title': 'lex will - Playlists',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+            'uploader': 'lex will',
+            'uploader_id': '@lexwill718',
         },
         'playlist_mincount': 17,
     }, {
@@ -2506,6 +2584,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'title': 'lex will - Community',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+            'uploader': 'lex will',
+            'uploader_id': '@lexwill718',
         },
         'playlist_mincount': 18,
     }, {
@@ -2515,8 +2595,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
             'title': 'lex will - Channels',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
+            'uploader': 'lex will',
+            'uploader_id': '@lexwill718',
         },
-        'playlist_mincount': 138,
+        'playlist_mincount': 75,
     }, {
         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
         'only_matching': True,
@@ -2533,7 +2615,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'title': '29C3: Not my department',
             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
             'uploader': 'Christiaan008',
-            'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
+            'uploader_id': '@ChRiStIaAn008',
+            'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
         },
         'playlist_count': 96,
     }, {
@@ -2543,7 +2626,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'title': 'Uploads from Cauchemar',
             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
             'uploader': 'Cauchemar',
-            'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
+            'uploader_id': '@Cauchemar89',
+            'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
         },
         'playlist_mincount': 1123,
     }, {
@@ -2557,7 +2641,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'title': 'Uploads from Interstellar Movie',
             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
             'uploader': 'Interstellar Movie',
-            'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
+            'uploader_id': '@InterstellarMovie',
+            'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
         },
         'playlist_mincount': 21,
     }, {
@@ -2566,8 +2651,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         'info_dict': {
             'title': 'Data Analysis with Dr Mike Pound',
             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
-            'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
             'uploader': 'Computerphile',
+            'uploader_id': '@Computerphile',
+            'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
         },
         'playlist_mincount': 11,
     }, {
@@ -2605,14 +2691,14 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
     }, {
         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
         'info_dict': {
-            'id': '9Auq9mYxFEE',
+            'id': r're:[\da-zA-Z_-]{8,}',
             'ext': 'mp4',
-            'title': 'Watch Sky News live',
+            'title': r're:(?s)[A-Z].{20,}',
             'uploader': 'Sky News',
-            'uploader_id': 'skynews',
-            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
-            'upload_date': '20191102',
-            'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
+            'uploader_id': '@SkyNews',
+            'uploader_url': r're:https?://(?:www\.)?youtube\.com/@SkyNews',
+            'upload_date': r're:\d{8}',
+            'description': r're:(?s)(?:.*\n)+SUBSCRIBE to our YouTube channel for more videos: http://www\.youtube\.com/skynews *\n.*',
             'categories': ['News & Politics'],
             'tags': list,
             'like_count': int,
@@ -2701,34 +2787,22 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
     }, {
         'note': 'Search tab',
         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
-        'playlist_mincount': 40,
+        'playlist_mincount': 20,
         'info_dict': {
             'id': 'UCYO_jab_esuFRV4b17AJtAw',
             'title': '3Blue1Brown - Search - linear algebra',
             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
             'uploader': '3Blue1Brown',
-            'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
+            'uploader_id': '@3blue1brown',
+            'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
         }
     }]
 
     @classmethod
     def suitable(cls, url):
-        return False if YoutubeIE.suitable(url) else super(
+        return not YoutubeIE.suitable(url) and super(
             YoutubeTabIE, cls).suitable(url)
 
-    def _extract_channel_id(self, webpage):
-        channel_id = self._html_search_meta(
-            'channelId', webpage, 'channel id', default=None)
-        if channel_id:
-            return channel_id
-        channel_url = self._html_search_meta(
-            ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
-             'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
-             'twitter:app:url:googleplay'), webpage, 'channel url')
-        return self._search_regex(
-            r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
-            channel_url, 'channel id')
-
     @staticmethod
     def _extract_grid_item_renderer(item):
         assert isinstance(item, dict)
@@ -3116,27 +3190,18 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         else:
             raise ExtractorError('Unable to find selected tab')
 
-    @staticmethod
-    def _extract_uploader(data):
+    def _extract_uploader(self, metadata, data):
         uploader = {}
-        sidebar_renderer = try_get(
-            data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
-        if sidebar_renderer:
-            for item in sidebar_renderer:
-                if not isinstance(item, dict):
-                    continue
-                renderer = item.get('playlistSidebarSecondaryInfoRenderer')
-                if not isinstance(renderer, dict):
-                    continue
-                owner = try_get(
-                    renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
-                if owner:
-                    uploader['uploader'] = owner.get('text')
-                    uploader['uploader_id'] = try_get(
-                        owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
-                    uploader['uploader_url'] = urljoin(
-                        'https://www.youtube.com/',
-                        try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
+        renderers = traverse_obj(data,
+                                 ('sidebar', 'playlistSidebarRenderer', 'items'))
+        uploader['channel_id'] = self._extract_channel_id('', metadata=metadata, renderers=renderers)
+        uploader['uploader'] = (
+            self._extract_author_var('', 'name', renderers=renderers)
+            or self._extract_author_var('', 'name', metadata=metadata))
+        uploader['uploader_url'] = self._yt_urljoin(
+            self._extract_author_var('', 'url', metadata=metadata, renderers=renderers))
+        uploader['uploader_id'] = self._extract_uploader_id(uploader['uploader_url'])
+        uploader['channel'] = uploader['uploader']
         return uploader
 
     @staticmethod
@@ -3187,8 +3252,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             self._entries(selected_tab, item_id, webpage),
             playlist_id=playlist_id, playlist_title=title,
             playlist_description=description)
-        playlist.update(self._extract_uploader(data))
-        return playlist
+        return merge_dicts(playlist, self._extract_uploader(renderer, data))
 
     def _extract_from_playlist(self, item_id, url, data, playlist):
         title = playlist.get('title') or try_get(
@@ -3275,8 +3339,9 @@ class YoutubePlaylistIE(InfoExtractor):
         'info_dict': {
             'title': '[OLD]Team Fortress 2 (Class-based LP)',
             'id': 'PLBB231211A4F62143',
-            'uploader': 'Wickydoo',
-            'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
+            'uploader': 'Wickman',
+            'uploader_id': '@WickmanVT',
+            'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
         },
         'playlist_mincount': 29,
     }, {
@@ -3290,21 +3355,25 @@ class YoutubePlaylistIE(InfoExtractor):
     }, {
         'note': 'embedded',
         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
-        'playlist_count': 4,
+        # TODO: full playlist requires _reload_with_unavailable_videos()
+        # 'playlist_count': 4,
+        'playlist_mincount': 1,
         'info_dict': {
             'title': 'JODA15',
             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
             'uploader': 'milan',
-            'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
+            'uploader_id': '@milan5503',
+            'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
         }
     }, {
         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
-        'playlist_mincount': 982,
+        'playlist_mincount': 455,
         'info_dict': {
             'title': '2018 Chinese New Singles (11/6 updated)',
             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
             'uploader': 'LBK',
-            'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
+            'uploader_id': '@music_king',
+            'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
         }
     }, {
         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
@@ -3342,8 +3411,8 @@ class YoutubeYtBeIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Small Scale Baler and Braiding Rugs',
             'uploader': 'Backus-Page House Museum',
-            'uploader_id': 'backuspagemuseum',
-            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
+            'uploader_id': '@backuspagemuseum',
+            'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
             'upload_date': '20161008',
             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
             'categories': ['Nonprofits & Activism'],

From 3da17834a49fad2a97c308fdd89aa26781ef4d60 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Tue, 28 Feb 2023 23:03:44 +0530
Subject: [PATCH 192/312] [Youtube] Construct dash formats with `range` query

See yt-dlp/yt_dlp#6369
---
 youtube_dl/extractor/youtube.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 4246d84f9..89711c84e 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1694,8 +1694,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if n_response is None:
                 # give up if descrambling failed
                 break
-            fmt['url'] = update_url(
-                parsed_fmt_url, query_update={'n': [n_response]})
+            for fmt_dct in traverse_obj(fmt, (None, (None, ('fragments', Ellipsis))), expected_type=dict):
+                fmt_dct['url'] = update_url(
+                    fmt_dct['url'], query_update={'n': [n_response]})
 
     # from yt-dlp, with tweaks
     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
@@ -2047,10 +2048,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if no_video:
                 dct['abr'] = tbr
             if no_audio or no_video:
-                dct['downloader_options'] = {
-                    # Youtube throttles chunks >~10M
-                    'http_chunk_size': 10485760,
-                }
+                CHUNK_SIZE = 10 << 20
+                # avoid Youtube throttling
+                dct.update({
+                    'protocol': 'http_dash_segments',
+                    'fragments': [{
+                        'url': update_url_query(dct['url'], {
+                            'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, dct['filesize']))
+                        })
+                    } for range_start in range(0, dct['filesize'], CHUNK_SIZE)]
+                } if dct['filesize'] else {
+                    'downloader_options': {'http_chunk_size': CHUNK_SIZE}  # No longer useful?
+                })
+
                 if dct.get('ext'):
                     dct['container'] = dct['ext'] + '_dash'
             formats.append(dct)

From 3e92c60fcd94c37428d57153dbdd14cd0a1f9226 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Fri, 3 Mar 2023 16:48:54 +0530
Subject: [PATCH 193/312] [jsinterp] Handle `Date` at epoch 0

See yt-dlp/yt_dlp#6400
---
 test/test_youtube_signature.py | 4 ++++
 youtube_dl/jsinterp.py         | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index ac37ffa45..decf7ee38 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -67,6 +67,10 @@ _SIG_TESTS = [
 ]
 
 _NSIG_TESTS = [
+    (
+        'https://www.youtube.com/s/player/7862ca1f/player_ias.vflset/en_US/base.js',
+        'X_LCxVDjAavgE5t', 'yxJ1dM6iz5ogUg',
+    ),
     (
         'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js',
         'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w',
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index a3bc42a61..e28670a3f 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -405,7 +405,7 @@ class JSInterpreter(object):
                 left, right = self._separate_at_paren(obj[len(klass):])
                 argvals = self.interpret_iter(left, local_vars, allow_recursion)
                 expr = konstr(*argvals)
-                if not expr:
+                if expr is None:
                     raise self.Exception('Failed to parse {klass} {left!r:.100}'.format(**locals()), expr=expr)
                 expr = self._dump(expr, local_vars) + right
                 break

From 040271022709c4d20d33c604d1dbc72dc2da472d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 5 Mar 2023 23:07:07 +0000
Subject: [PATCH 194/312] [jsinterp] Fix regexp parsing and .replace[All]
 method

 * For performance, make regexp object instantiation lazy
 * Other small performance improvements
---
 test/test_jsinterp.py  | 46 ++++++++++++++++++-----
 youtube_dl/jsinterp.py | 84 ++++++++++++++++++++++++++++--------------
 2 files changed, 93 insertions(+), 37 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index b5962356c..5d129433d 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -139,21 +139,16 @@ class TestJSInterpreter(unittest.TestCase):
         self.assertTrue(math.isnan(jsi.call_function('x')))
 
     def test_Date(self):
-        jsi = JSInterpreter('''
-        function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 86000)
-
         jsi = JSInterpreter('''
         function x(dt) { return new Date(dt) - 0; }
         ''')
         self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
 
         # date format m/d/y
-        jsi = JSInterpreter('''
-        function x() { return new Date('12/31/1969 18:01:26 MDT') - 0; }
-        ''')
-        self.assertEqual(jsi.call_function('x'), 86000)
+        self.assertEqual(jsi.call_function('x', '12/31/1969 18:01:26 MDT'), 86000)
+
+        # epoch 0
+        self.assertEqual(jsi.call_function('x', '1 January 1970 00:00:00 UTC'), 0)
 
     def test_call(self):
         jsi = JSInterpreter('''
@@ -445,7 +440,7 @@ class TestJSInterpreter(unittest.TestCase):
         self.assertIs(jsi.call_function('x'), None)
 
         jsi = JSInterpreter('''
-        function x() { let a=/,,[/,913,/](,)}/; return a; }
+        function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
         ''')
         attrs = set(('findall', 'finditer', 'flags', 'groupindex',
                      'groups', 'match', 'pattern', 'scanner',
@@ -457,6 +452,31 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
 
+        jsi = JSInterpreter(r'''
+        function x() { let a="data-name".replace("data-", ""); return a }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 'name')
+
+        jsi = JSInterpreter(r'''
+        function x() { let a="data-name".replace(new RegExp("^.+-"), ""); return a; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 'name')
+
+        jsi = JSInterpreter(r'''
+        function x() { let a="data-name".replace(/^.+-/, ""); return a; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 'name')
+
+        jsi = JSInterpreter(r'''
+        function x() { let a="data-name".replace(/a/g, "o"); return a; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 'doto-nome')
+
+        jsi = JSInterpreter(r'''
+        function x() { let a="data-name".replaceAll("a", "o"); return a; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 'doto-nome')
+
         jsi = JSInterpreter(r'''
         function x() { let a=[/[)\\]/]; return a[0]; }
         ''')
@@ -485,6 +505,12 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function x(){return 1236566549 << 5}')
         self.assertEqual(jsi.call_function('x'), 915423904)
 
+    """ # fails so far
+    def test_packed(self):
+        jsi = JSInterpreter('''function x(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''')
+        self.assertEqual(jsi.call_function('x', '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|')))
+    """
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index e28670a3f..ab7d6f926 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -12,9 +12,11 @@ from .utils import (
     js_to_json,
     remove_quotes,
     unified_timestamp,
+    variadic,
 )
 from .compat import (
     compat_basestring,
+    compat_chr,
     compat_collections_chain_map as ChainMap,
     compat_itertools_zip_longest as zip_longest,
     compat_str,
@@ -205,10 +207,10 @@ class JSInterpreter(object):
             super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
 
     class JS_RegExp(object):
-        _RE_FLAGS = {
+        RE_FLAGS = {
             # special knowledge: Python's re flags are bitmask values, current max 128
             # invent new bitmask values well above that for literal parsing
-            # TODO: new pattern class to execute matches with these flags
+            # TODO: execute matches with these flags (remaining: d, y)
             'd': 1024,  # Generate indices for substring matches
             'g': 2048,  # Global search
             'i': re.I,  # Case-insensitive search
@@ -218,12 +220,19 @@ class JSInterpreter(object):
             'y': 4096,  # Perform a "sticky" search that matches starting at the current position in the target string
         }
 
-        def __init__(self, pattern_txt, flags=''):
+        def __init__(self, pattern_txt, flags=0):
             if isinstance(flags, compat_str):
                 flags, _ = self.regex_flags(flags)
-            # Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern
             # First, avoid https://github.com/python/cpython/issues/74534
-            self.__self = re.compile(pattern_txt.replace('[[', r'[\['), flags)
+            self.__self = None
+            self.__pattern_txt = pattern_txt.replace('[[', r'[\[')
+            self.__flags = flags
+
+        def __instantiate(self):
+            if self.__self:
+                return
+            self.__self = re.compile(self.__pattern_txt, self.__flags)
+            # Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern
             for name in dir(self.__self):
                 # Only these? Obviously __class__, __init__.
                 # PyPy creates a __weakref__ attribute with value None
@@ -232,15 +241,21 @@ class JSInterpreter(object):
                     continue
                 setattr(self, name, getattr(self.__self, name))
 
+        def __getattr__(self, name):
+            self.__instantiate()
+            if hasattr(self, name):
+                return getattr(self, name)
+            return super(JSInterpreter.JS_RegExp, self).__getattr__(name)
+
         @classmethod
         def regex_flags(cls, expr):
             flags = 0
             if not expr:
                 return flags, expr
             for idx, ch in enumerate(expr):
-                if ch not in cls._RE_FLAGS:
+                if ch not in cls.RE_FLAGS:
                     break
-                flags |= cls._RE_FLAGS[ch]
+                flags |= cls.RE_FLAGS[ch]
             return flags, expr[idx + 1:]
 
     @classmethod
@@ -265,17 +280,17 @@ class JSInterpreter(object):
         counters = dict((k, 0) for k in _MATCHING_PARENS.values())
         start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
         in_quote, escaping, skipping = None, False, 0
-        after_op, in_regex_char_group, skip_re = True, False, 0
+        after_op, in_regex_char_group = True, False
 
         for idx, char in enumerate(expr):
-            if skip_re > 0:
-                skip_re -= 1
-                continue
+            paren_delta = 0
             if not in_quote:
                 if char in _MATCHING_PARENS:
                     counters[_MATCHING_PARENS[char]] += 1
+                    paren_delta = 1
                 elif char in counters:
                     counters[char] -= 1
+                    paren_delta = -1
             if not escaping:
                 if char in _QUOTES and in_quote in (char, None):
                     if in_quote or after_op or char != '/':
@@ -283,7 +298,7 @@ class JSInterpreter(object):
                 elif in_quote == '/' and char in '[]':
                     in_regex_char_group = char == '['
             escaping = not escaping and in_quote and char == '\\'
-            after_op = not in_quote and (char in cls.OP_CHARS or (char.isspace() and after_op))
+            after_op = not in_quote and (char in cls.OP_CHARS or paren_delta > 0 or (after_op and char.isspace()))
 
             if char != delim[pos] or any(counters.values()) or in_quote:
                 pos = skipping = 0
@@ -293,7 +308,7 @@ class JSInterpreter(object):
                 continue
             elif pos == 0 and skip_delims:
                 here = expr[idx:]
-                for s in skip_delims if isinstance(skip_delims, (list, tuple)) else [skip_delims]:
+                for s in variadic(skip_delims):
                     if here.startswith(s) and s:
                         skipping = len(s) - 1
                         break
@@ -316,7 +331,7 @@ class JSInterpreter(object):
         separated = list(cls._separate(expr, delim, 1))
 
         if len(separated) < 2:
-            raise cls.Exception('No terminating paren {delim} in {expr}'.format(**locals()))
+            raise cls.Exception('No terminating paren {delim} in {expr!r:.5500}'.format(**locals()))
         return separated[0][1:].strip(), separated[1].strip()
 
     @staticmethod
@@ -361,6 +376,20 @@ class JSInterpreter(object):
         except TypeError:
             return self._named_object(namespace, obj)
 
+    # used below
+    _VAR_RET_THROW_RE = re.compile(r'''(?x)
+        (?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["'])|$)|(?P<throw>throw\s+)
+        ''')
+    _COMPOUND_RE = re.compile(r'''(?x)
+        (?P<try>try)\s*\{|
+        (?P<if>if)\s*\(|
+        (?P<switch>switch)\s*\(|
+        (?P<for>for)\s*\(|
+        (?P<while>while)\s*\(
+        ''')
+    _FINALLY_RE = re.compile(r'finally\s*\{')
+    _SWITCH_RE = re.compile(r'switch\s*\(')
+
     def interpret_statement(self, stmt, local_vars, allow_recursion=100):
         if allow_recursion < 0:
             raise self.Exception('Recursion limit reached')
@@ -375,7 +404,7 @@ class JSInterpreter(object):
             if should_return:
                 return ret, should_return
 
-        m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["\'])|$)|(?P<throw>throw\s+)', stmt)
+        m = self._VAR_RET_THROW_RE.match(stmt)
         if m:
             expr = stmt[len(m.group(0)):].strip()
             if m.group('throw'):
@@ -447,13 +476,7 @@ class JSInterpreter(object):
                 for item in self._separate(inner)])
             expr = name + outer
 
-        m = re.match(r'''(?x)
-                (?P<try>try)\s*\{|
-                (?P<if>if)\s*\(|
-                (?P<switch>switch)\s*\(|
-                (?P<for>for)\s*\(|
-                (?P<while>while)\s*\(
-                ''', expr)
+        m = self._COMPOUND_RE.match(expr)
         md = m.groupdict() if m else {}
         if md.get('if'):
             cndn, expr = self._separate_at_paren(expr[m.end() - 1:])
@@ -512,7 +535,7 @@ class JSInterpreter(object):
                     err = None
                     pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion)
 
-            m = re.match(r'finally\s*\{', expr)
+            m = self._FINALLY_RE.match(expr)
             if m:
                 sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
                 ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
@@ -531,7 +554,7 @@ class JSInterpreter(object):
             if remaining.startswith('{'):
                 body, expr = self._separate_at_paren(remaining)
             else:
-                switch_m = re.match(r'switch\s*\(', remaining)  # FIXME
+                switch_m = self._SWITCH_RE.match(remaining)  # FIXME
                 if switch_m:
                     switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:])
                     body, expr = self._separate_at_paren(remaining, '}')
@@ -735,7 +758,7 @@ class JSInterpreter(object):
                 if obj == compat_str:
                     if member == 'fromCharCode':
                         assertion(argvals, 'takes one or more arguments')
-                        return ''.join(map(chr, argvals))
+                        return ''.join(map(compat_chr, argvals))
                     raise self.Exception('Unsupported string method ' + member, expr=expr)
                 elif obj == float:
                     if member == 'pow':
@@ -808,10 +831,17 @@ class JSInterpreter(object):
                     if idx >= len(obj):
                         return None
                     return ord(obj[idx])
-                elif member == 'replace':
+                elif member in ('replace', 'replaceAll'):
                     assertion(isinstance(obj, compat_str), 'must be applied on a string')
                     assertion(len(argvals) == 2, 'takes exactly two arguments')
-                    return re.sub(argvals[0], argvals[1], obj)
+                    # TODO: argvals[1] callable, other Py vs JS edge cases
+                    if isinstance(argvals[0], self.JS_RegExp):
+                        count = 0 if argvals[0].flags & self.JS_RegExp.RE_FLAGS['g'] else 1
+                        assertion(member != 'replaceAll' or count == 0,
+                                  'replaceAll must be called with a global RegExp')
+                        return argvals[0].sub(argvals[1], obj, count=count)
+                    count = ('replaceAll', 'replace').index(member)
+                    return re.sub(re.escape(argvals[0]), argvals[1], obj, count=count)
 
                 idx = int(member) if isinstance(obj, list) else member
                 return obj[idx](argvals, allow_recursion=allow_recursion)

From 27d41d73655b8fbf2dedf88cac96220520d526b5 Mon Sep 17 00:00:00 2001
From: Sophira <github@theblob.org>
Date: Tue, 7 Mar 2023 15:49:31 +0000
Subject: [PATCH 195/312] [doc] Recommend "Get cookies.txt LOCALLY" extension
 in README.md (#31763)

* remove link to suspect "Get cookies.txt" extension, dropped from Chrome store
* link to new Manifest V3-compatible open-source "Get cookies.txt LOCALLY" extension.

Fixes #31465.
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 6e07ddb1c..227e34046 100644
--- a/README.md
+++ b/README.md
@@ -918,7 +918,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
 
 Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
 
-In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
+In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt LOCALLY](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
 
 Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
 

From 8c86fd33dca48ebb505ed04150d9e35993b9fe7e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 9 Mar 2023 16:40:30 +0000
Subject: [PATCH 196/312] [doc] Improve "guidance" on bug reporting

---
 README.md | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 227e34046..14a3d6c86 100644
--- a/README.md
+++ b/README.md
@@ -1408,7 +1408,11 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
 
 # BUGS
 
-Bugs and suggestions should be reported at: <https://github.com/ytdl-org/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
+Bugs and suggestions should be reported in the issue tracker: <https://github.com/ytdl-org/youtube-dl/issues> (<https://yt-dl.org/bug> is an alias for this). Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
+
+## Opening a bug report or suggestion
+
+Be sure to follow instructions provided **below** and **in the issue tracker**. Complete the appropriate issue template fully. Consider whether your problem is covered by an existing issue: if so, follow the discussion there. Avoid commenting on existing duplicate issues as such comments do not add to the discussion of the issue and are liable to be treated as spam.
 
 **Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
 ```
@@ -1428,17 +1432,17 @@ $ youtube-dl -v <your command line>
 
 The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
 
-Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
+Finally please review your issue to avoid various common mistakes (you can and should use this as a checklist) listed below.
 
 ### Is the description of the issue itself sufficient?
 
-We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts.
+We often get issue reports that are hard to understand. To avoid subsequent clarifications, and to assist participants who are not native English speakers, please elaborate on what feature you are requesting, or what bug you want to be fixed.
 
-So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious
+Make sure that it's obvious
 
 - What the problem is
 - How it could be fixed
-- How your proposed solution would look like
+- How your proposed solution would look
 
 If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
 
@@ -1448,14 +1452,14 @@ If your server has multiple IPs or you suspect censorship, adding `--call-home`
 
 **Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
 
+###  Is the issue already documented?
+
+Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. Initially, at least, use the search term `-label:duplicate` to focus on active issues. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
+
 ###  Are you using the latest version?
 
 Before reporting any issue, type `youtube-dl -U`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well.
 
-###  Is the issue already documented?
-
-Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
-
 ###  Why are existing options not enough?
 
 Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.

From 5c985d4f81a43ada75dafb23233e7fe39913907a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 11 Mar 2023 12:09:55 +0000
Subject: [PATCH 197/312] [downloader] Let _ffmpeg_ handle DASH segments

Fixes https://github.com/ytdl-org/youtube-dl/issues/31792 after 3da1783.
---
 youtube_dl/downloader/external.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index bffcd10b6..1b6bd1fa2 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -273,7 +273,7 @@ class HttpieFD(ExternalFD):
 class FFmpegFD(ExternalFD):
     @classmethod
     def supports(cls, info_dict):
-        return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
+        return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms', 'http_dash_segments')
 
     @classmethod
     def available(cls):

From baa6c5e95cb307e7d716645780ff8aef22de6aca Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 11 Mar 2023 12:17:00 +0000
Subject: [PATCH 198/312] [FragmentFD] Respect `--no-continue`

* discard partial fragment on `--no-continue`
* continue with correct progress display otherwise

Resolves #21467
---
 youtube_dl/downloader/common.py   | 24 +++++++++++-----
 youtube_dl/downloader/dash.py     | 10 +++----
 youtube_dl/downloader/fragment.py | 46 +++++++++++++++++++++----------
 youtube_dl/downloader/http.py     | 15 ++++------
 4 files changed, 58 insertions(+), 37 deletions(-)

diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index 1cdba89cd..c86ce2aa5 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -88,17 +88,21 @@ class FileDownloader(object):
             return '---.-%'
         return '%6s' % ('%3.1f%%' % percent)
 
-    @staticmethod
-    def calc_eta(start, now, total, current):
+    @classmethod
+    def calc_eta(cls, start_or_rate, now_or_remaining, *args):
+        if len(args) < 2:
+            rate, remaining = (start_or_rate, now_or_remaining)
+            if None in (rate, remaining):
+                return None
+            return int(float(remaining) / rate)
+        start, now = (start_or_rate, now_or_remaining)
+        total, current = args
         if total is None:
             return None
         if now is None:
             now = time.time()
-        dif = now - start
-        if current == 0 or dif < 0.001:  # One millisecond
-            return None
-        rate = float(current) / dif
-        return int((float(total) - float(current)) / rate)
+        rate = cls.calc_speed(start, now, current)
+        return rate and int((float(total) - float(current)) / rate)
 
     @staticmethod
     def format_eta(eta):
@@ -123,6 +127,12 @@ class FileDownloader(object):
     def format_retries(retries):
         return 'inf' if retries == float('inf') else '%.0f' % retries
 
+    @staticmethod
+    def filesize_or_none(unencoded_filename):
+        fn = encodeFilename(unencoded_filename)
+        if os.path.isfile(fn):
+            return os.path.getsize(fn)
+
     @staticmethod
     def best_block_size(elapsed_time, bytes):
         new_min = max(bytes / 2.0, 1.0)
diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
index c6d674bc6..cc30485f8 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@@ -38,8 +38,7 @@ class DashSegmentsFD(FragmentFD):
             # In DASH, the first segment contains necessary headers to
             # generate a valid MP4 file, so always abort for the first segment
             fatal = i == 0 or not skip_unavailable_fragments
-            count = 0
-            while count <= fragment_retries:
+            for count in range(fragment_retries + 1):
                 try:
                     fragment_url = fragment.get('url')
                     if not fragment_url:
@@ -57,9 +56,8 @@ class DashSegmentsFD(FragmentFD):
                     # is usually enough) thus allowing to download the whole file successfully.
                     # To be future-proof we will retry all fragments that fail with any
                     # HTTP error.
-                    count += 1
-                    if count <= fragment_retries:
-                        self.report_retry_fragment(err, frag_index, count, fragment_retries)
+                    if count < fragment_retries:
+                        self.report_retry_fragment(err, frag_index, count + 1, fragment_retries)
                 except DownloadError:
                     # Don't retry fragment if error occurred during HTTP downloading
                     # itself since it has own retry settings
@@ -68,7 +66,7 @@ class DashSegmentsFD(FragmentFD):
                         break
                     raise
 
-            if count > fragment_retries:
+            if count >= fragment_retries:
                 if not fatal:
                     self.report_skip_fragment(frag_index)
                     continue
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
index 35c76feba..913e91b64 100644
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@@ -71,7 +71,7 @@ class FragmentFD(FileDownloader):
 
     @staticmethod
     def __do_ytdl_file(ctx):
-        return not ctx['live'] and not ctx['tmpfilename'] == '-'
+        return ctx['live'] is not True and ctx['tmpfilename'] != '-'
 
     def _read_ytdl_file(self, ctx):
         assert 'ytdl_corrupt' not in ctx
@@ -101,6 +101,13 @@ class FragmentFD(FileDownloader):
             'url': frag_url,
             'http_headers': headers or info_dict.get('http_headers'),
         }
+        frag_resume_len = 0
+        if ctx['dl'].params.get('continuedl', True):
+            frag_resume_len = self.filesize_or_none(
+                self.temp_name(fragment_filename))
+        fragment_info_dict['frag_resume_len'] = frag_resume_len
+        ctx['frag_resume_len'] = frag_resume_len or 0
+
         success = ctx['dl'].download(fragment_filename, fragment_info_dict)
         if not success:
             return False, None
@@ -124,9 +131,7 @@ class FragmentFD(FileDownloader):
             del ctx['fragment_filename_sanitized']
 
     def _prepare_frag_download(self, ctx):
-        if 'live' not in ctx:
-            ctx['live'] = False
-        if not ctx['live']:
+        if not ctx.setdefault('live', False):
             total_frags_str = '%d' % ctx['total_frags']
             ad_frags = ctx.get('ad_frags', 0)
             if ad_frags:
@@ -136,10 +141,11 @@ class FragmentFD(FileDownloader):
         self.to_screen(
             '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
         self.report_destination(ctx['filename'])
+        continuedl = self.params.get('continuedl', True)
         dl = HttpQuietDownloader(
             self.ydl,
             {
-                'continuedl': True,
+                'continuedl': continuedl,
                 'quiet': True,
                 'noprogress': True,
                 'ratelimit': self.params.get('ratelimit'),
@@ -150,12 +156,11 @@ class FragmentFD(FileDownloader):
         )
         tmpfilename = self.temp_name(ctx['filename'])
         open_mode = 'wb'
-        resume_len = 0
 
         # Establish possible resume length
-        if os.path.isfile(encodeFilename(tmpfilename)):
+        resume_len = self.filesize_or_none(tmpfilename) or 0
+        if resume_len > 0:
             open_mode = 'ab'
-            resume_len = os.path.getsize(encodeFilename(tmpfilename))
 
         # Should be initialized before ytdl file check
         ctx.update({
@@ -164,7 +169,8 @@ class FragmentFD(FileDownloader):
         })
 
         if self.__do_ytdl_file(ctx):
-            if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
+            ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
+            if continuedl and ytdl_file_exists:
                 self._read_ytdl_file(ctx)
                 is_corrupt = ctx.get('ytdl_corrupt') is True
                 is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
@@ -178,7 +184,12 @@ class FragmentFD(FileDownloader):
                     if 'ytdl_corrupt' in ctx:
                         del ctx['ytdl_corrupt']
                     self._write_ytdl_file(ctx)
+
             else:
+                if not continuedl:
+                    if ytdl_file_exists:
+                        self._read_ytdl_file(ctx)
+                    ctx['fragment_index'] = resume_len = 0
                 self._write_ytdl_file(ctx)
                 assert ctx['fragment_index'] == 0
 
@@ -209,6 +220,7 @@ class FragmentFD(FileDownloader):
         start = time.time()
         ctx.update({
             'started': start,
+            'fragment_started': start,
             # Amount of fragment's bytes downloaded by the time of the previous
             # frag progress hook invocation
             'prev_frag_downloaded_bytes': 0,
@@ -218,6 +230,9 @@ class FragmentFD(FileDownloader):
             if s['status'] not in ('downloading', 'finished'):
                 return
 
+            if not total_frags and ctx.get('fragment_count'):
+                state['fragment_count'] = ctx['fragment_count']
+
             time_now = time.time()
             state['elapsed'] = time_now - start
             frag_total_bytes = s.get('total_bytes') or 0
@@ -232,16 +247,17 @@ class FragmentFD(FileDownloader):
                 ctx['fragment_index'] = state['fragment_index']
                 state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
                 ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
+                ctx['speed'] = state['speed'] = self.calc_speed(
+                    ctx['fragment_started'], time_now, frag_total_bytes)
+                ctx['fragment_started'] = time.time()
                 ctx['prev_frag_downloaded_bytes'] = 0
             else:
                 frag_downloaded_bytes = s['downloaded_bytes']
                 state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
+                ctx['speed'] = state['speed'] = self.calc_speed(
+                    ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx['frag_resume_len'])
                 if not ctx['live']:
-                    state['eta'] = self.calc_eta(
-                        start, time_now, estimated_size - resume_len,
-                        state['downloaded_bytes'] - resume_len)
-                state['speed'] = s.get('speed') or ctx.get('speed')
-                ctx['speed'] = state['speed']
+                    state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
                 ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
             self._hook_progress(state)
 
@@ -268,7 +284,7 @@ class FragmentFD(FileDownloader):
                         os.utime(ctx['filename'], (time.time(), filetime))
                     except Exception:
                         pass
-            downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
+            downloaded_bytes = self.filesize_or_none(ctx['filename']) or 0
 
         self._hook_progress({
             'downloaded_bytes': downloaded_bytes,
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index d8ac41dcc..440471aa0 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -58,9 +58,9 @@ class HttpFD(FileDownloader):
 
         if self.params.get('continuedl', True):
             # Establish possible resume length
-            if os.path.isfile(encodeFilename(ctx.tmpfilename)):
-                ctx.resume_len = os.path.getsize(
-                    encodeFilename(ctx.tmpfilename))
+            ctx.resume_len = info_dict.get('frag_resume_len')
+            if ctx.resume_len is None:
+                ctx.resume_len = self.filesize_or_none(ctx.tmpfilename) or 0
 
         ctx.is_resume = ctx.resume_len > 0
 
@@ -115,9 +115,9 @@ class HttpFD(FileDownloader):
                         raise RetryDownload(err)
                     raise err
                 # When trying to resume, Content-Range HTTP header of response has to be checked
-                # to match the value of requested Range HTTP header. This is due to a webservers
+                # to match the value of requested Range HTTP header. This is due to webservers
                 # that don't support resuming and serve a whole file with no Content-Range
-                # set in response despite of requested Range (see
+                # set in response despite requested Range (see
                 # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
                 if has_range:
                     content_range = ctx.data.headers.get('Content-Range')
@@ -293,10 +293,7 @@ class HttpFD(FileDownloader):
 
                 # Progress message
                 speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
-                if ctx.data_len is None:
-                    eta = None
-                else:
-                    eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
+                eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - ctx.resume_len))
 
                 self._hook_progress({
                     'status': 'downloading',

From e8de54bce50f6f77a4d7e8e80675f7003d5bf630 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 13 Mar 2023 19:45:54 +0000
Subject: [PATCH 199/312] [core] Handle `/../` sequences in HTTP URLs

* use Python's RFC implementation for embedded sequences
* hack: strip unbalanced leading `../` from path, like eg Firefox

See https://github.com/yt-dlp/yt-dlp/issues/3355
---
 youtube_dl/YoutubeDL.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 8e8546596..bcf781744 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -39,6 +39,7 @@ from .compat import (
     compat_str,
     compat_tokenize_tokenize,
     compat_urllib_error,
+    compat_urllib_parse,
     compat_urllib_request,
     compat_urllib_request_DataHandler,
 )
@@ -60,6 +61,7 @@ from .utils import (
     format_bytes,
     formatSeconds,
     GeoRestrictedError,
+    HEADRequest,
     int_or_none,
     ISO3166Utils,
     locked_file,
@@ -74,6 +76,7 @@ from .utils import (
     preferredencoding,
     prepend_extension,
     process_communicate_or_kill,
+    PUTRequest,
     register_socks_protocols,
     render_table,
     replace_extension,
@@ -2297,6 +2300,27 @@ class YoutubeDL(object):
         """ Start an HTTP download """
         if isinstance(req, compat_basestring):
             req = sanitized_Request(req)
+        # an embedded /../ sequence is not automatically handled by urllib2
+        # see https://github.com/yt-dlp/yt-dlp/issues/3355
+        url = req.get_full_url()
+        parts = url.partition('/../')
+        if parts[1]:
+            url = compat_urllib_parse.urljoin(parts[0] + parts[1][:1], parts[1][1:] + parts[2])
+        if url:
+            # worse, URL path may have initial /../ against RFCs: work-around
+            # by stripping such prefixes, like eg Firefox
+            parts = compat_urllib_parse.urlsplit(url)
+            path = parts.path
+            while path.startswith('/../'):
+                path = path[3:]
+            url = parts._replace(path=path).geturl()
+            # get a new Request with the munged URL
+            if url != req.get_full_url():
+                req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get(
+                    req.get_method(), compat_urllib_request.Request)
+                req = req_type(
+                    url, data=req.data, headers=dict(req.header_items()),
+                    origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
         return self._opener.open(req, timeout=self._socket_timeout)
 
     def print_debug_header(self):

From 70ff01391068c98b4377c5cc17a8d00d5645e734 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 14 Mar 2023 00:58:59 +0000
Subject: [PATCH 200/312] [devscripts] Add a hack to convert command-line
 options to API options

---
 devscripts/cli_to_api.py | 64 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100755 devscripts/cli_to_api.py

diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py
new file mode 100755
index 000000000..2f4d6a458
--- /dev/null
+++ b/devscripts/cli_to_api.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+"""
+This script displays the API parameters corresponding to a yt-dl command line
+
+Example:
+$ ./cli_to_api.py -f best
+{u'format': 'best'}
+$
+"""
+
+# Allow direct execution
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import youtube_dl
+from types import MethodType
+
+
+def cli_to_api(*opts):
+    YDL = youtube_dl.YoutubeDL
+
+    # to extract the parsed options, break out of YoutubeDL instantiation
+
+    # return options via this Exception
+    class ParseYTDLResult(Exception):
+        def __init__(self, result):
+            super(ParseYTDLResult, self).__init__('result')
+            self.opts = result
+
+    # replacement constructor that raises ParseYTDLResult
+    def ytdl_init(ydl, ydl_opts):
+        super(YDL, ydl).__init__(ydl_opts)
+        raise ParseYTDLResult(ydl_opts)
+
+    # patch in the constructor
+    YDL.__init__ = MethodType(ytdl_init, YDL)
+
+    # core parser
+    def parsed_options(argv):
+        try:
+            youtube_dl._real_main(list(argv))
+        except ParseYTDLResult as result:
+            return result.opts
+
+    # from https://github.com/yt-dlp/yt-dlp/issues/5859#issuecomment-1363938900
+    default = parsed_options([])
+    diff = dict((k, v) for k, v in parsed_options(opts).items() if default[k] != v)
+    if 'postprocessors' in diff:
+        diff['postprocessors'] = [pp for pp in diff['postprocessors'] if pp not in default['postprocessors']]
+    return diff
+
+
+def main():
+    from pprint import pprint
+    pprint(cli_to_api(*sys.argv))
+
+
+if __name__ == '__main__':
+    main()

From 6fece0a96b3cd8677f5c1185a57c6e21403fcb44 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 14 Mar 2023 13:01:32 +0000
Subject: [PATCH 201/312] [AENetworksBaseIE] Report missing show data instead
 of crash

---
 youtube_dl/extractor/aenetworks.py | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py
index 2a1f08e39..59fbe048a 100644
--- a/youtube_dl/extractor/aenetworks.py
+++ b/youtube_dl/extractor/aenetworks.py
@@ -8,6 +8,8 @@ from ..utils import (
     ExtractorError,
     GeoRestrictedError,
     int_or_none,
+    remove_start,
+    traverse_obj,
     update_url_query,
     urlencode_postdata,
 )
@@ -33,14 +35,17 @@ class AENetworksBaseIE(ThePlatformIE):
     }
 
     def _extract_aen_smil(self, smil_url, video_id, auth=None):
-        query = {'mbr': 'true'}
+        query = {
+            'mbr': 'true',
+            'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
+        }
         if auth:
             query['auth'] = auth
         TP_SMIL_QUERY = [{
             'assetTypes': 'high_video_ak',
-            'switch': 'hls_high_ak'
+            'switch': 'hls_high_ak',
         }, {
-            'assetTypes': 'high_video_s3'
+            'assetTypes': 'high_video_s3',
         }, {
             'assetTypes': 'high_video_s3',
             'switch': 'hls_high_fastly',
@@ -75,7 +80,14 @@ class AENetworksBaseIE(ThePlatformIE):
         requestor_id, brand = self._DOMAIN_MAP[domain]
         result = self._download_json(
             'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
-            filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
+            filter_value, query={'filter[%s]' % filter_key: filter_value})
+        result = traverse_obj(
+            result, ('results',
+                     lambda k, v: k == 0 and v[filter_key] == filter_value),
+            get_all=False)
+        if not result:
+            raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
+                                 video_id=remove_start(filter_value, '/'))
         title = result['title']
         video_id = result['id']
         media_url = result['publicUrl']
@@ -126,7 +138,7 @@ class AENetworksIE(AENetworksBaseIE):
             'skip_download': True,
         },
         'add_ie': ['ThePlatform'],
-        'skip': 'This video is only available for users of participating TV providers.',
+        'skip': 'Geo-restricted - This content is not available in your location.'
     }, {
         'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
         'info_dict': {
@@ -143,6 +155,7 @@ class AENetworksIE(AENetworksBaseIE):
             'skip_download': True,
         },
         'add_ie': ['ThePlatform'],
+        'skip': 'This video is only available for users of participating TV providers.',
     }, {
         'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
         'only_matching': True

From 45495228b7a6728b7e764bbcf1f38490cd3d8697 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 19 Mar 2023 00:51:44 +0000
Subject: [PATCH 202/312] [downloader/http] Only check for resumability when
 actually resuming

---
 test/test_downloader_http.py  | 2 +-
 youtube_dl/downloader/http.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
index 4e6d7a2a0..6af86ae48 100644
--- a/test/test_downloader_http.py
+++ b/test/test_downloader_http.py
@@ -88,7 +88,7 @@ class TestHttpFD(unittest.TestCase):
         self.assertTrue(downloader.real_download(filename, {
             'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
         }))
-        self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
+        self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep)
         try_rm(encodeFilename(filename))
 
     def download_all(self, params):
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index 440471aa0..28a49b9e8 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -141,7 +141,8 @@ class HttpFD(FileDownloader):
                     # Content-Range is either not present or invalid. Assuming remote webserver is
                     # trying to send the whole file, resume is not possible, so wiping the local file
                     # and performing entire redownload
-                    self.report_unable_to_resume()
+                    if range_start > 0:
+                        self.report_unable_to_resume()
                     ctx.resume_len = 0
                     ctx.open_mode = 'wb'
                 ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))

From f35b757c826027ab5263d431bbe363c6403bd66d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 19 Mar 2023 02:27:46 +0000
Subject: [PATCH 203/312] [utils] Ensure `allow_types` for `variadic()` is a
 tuple

---
 test/test_utils.py  | 1 +
 youtube_dl/utils.py | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/test/test_utils.py b/test/test_utils.py
index ea2b96ed2..b85d397d0 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1563,6 +1563,7 @@ Line 1
         self.assertEqual(variadic(None), (None, ))
         self.assertEqual(variadic('spam'), ('spam', ))
         self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
+        self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
 
     def test_traverse_obj(self):
         _TEST_DATA = {
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 761edcd49..f3c7af437 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4213,6 +4213,8 @@ def multipart_encode(data, boundary=None):
 
 
 def variadic(x, allowed_types=(compat_str, bytes, dict)):
+    if not isinstance(allowed_types, tuple) and isinstance(allowed_types, compat_collections_abc.Iterable):
+        allowed_types = tuple(allowed_types)
     return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,)
 
 

From 88f28f620bcae7ba7302f8b049b74f0f8a12831f Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Sun, 12 Mar 2023 14:46:09 +0530
Subject: [PATCH 204/312] [extractor/youtube] Construct fragment list lazily

Ref: yt-dlp/yt-dlp/commit/e389d17
See: yt-dlp/yt-dlp#6517
---
 youtube_dl/extractor/youtube.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 89711c84e..6b153193c 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -31,6 +31,7 @@ from ..utils import (
     get_element_by_attribute,
     int_or_none,
     js_to_json,
+    LazyList,
     merge_dicts,
     mimetype2ext,
     parse_codecs,
@@ -1986,9 +1987,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         itags = []
         itag_qualities = {}
         q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
+        CHUNK_SIZE = 10 << 20
+
         streaming_data = player_response.get('streamingData') or {}
         streaming_formats = streaming_data.get('formats') or []
         streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
+
+        def build_fragments(f):
+            return LazyList({
+                'url': update_url_query(f['url'], {
+                    'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, f['filesize']))
+                })
+            } for range_start in range(0, f['filesize'], CHUNK_SIZE))
+
         for fmt in streaming_formats:
             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
                 continue
@@ -2048,15 +2059,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if no_video:
                 dct['abr'] = tbr
             if no_audio or no_video:
-                CHUNK_SIZE = 10 << 20
                 # avoid Youtube throttling
                 dct.update({
                     'protocol': 'http_dash_segments',
-                    'fragments': [{
-                        'url': update_url_query(dct['url'], {
-                            'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, dct['filesize']))
-                        })
-                    } for range_start in range(0, dct['filesize'], CHUNK_SIZE)]
+                    'fragments': build_fragments(dct),
                 } if dct['filesize'] else {
                     'downloader_options': {'http_chunk_size': CHUNK_SIZE}  # No longer useful?
                 })

From 3f6d2bd76f3393eef90896dfabc2d8dde37c2009 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 9 Mar 2023 22:09:23 +0530
Subject: [PATCH 205/312] [extractor/youtube] Bypass throttling for `-f17`

and related cleanup

Thanks @AudricV for the finding

Ref: yt-dlp/yt-dlp/commit/c9abebb
---
 youtube_dl/extractor/youtube.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 6b153193c..ae3416b20 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2052,13 +2052,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 if mobj:
                     dct['ext'] = mimetype2ext(mobj.group(1))
                     dct.update(parse_codecs(mobj.group(2)))
-            no_audio = dct.get('acodec') == 'none'
-            no_video = dct.get('vcodec') == 'none'
-            if no_audio:
-                dct['vbr'] = tbr
-            if no_video:
-                dct['abr'] = tbr
-            if no_audio or no_video:
+            single_stream = 'none' in (dct.get(c) for c in ('acodec', 'vcodec'))
+            if single_stream and dct.get('ext'):
+                dct['container'] = dct['ext'] + '_dash'
+            if single_stream or itag == '17':
                 # avoid Youtube throttling
                 dct.update({
                     'protocol': 'http_dash_segments',
@@ -2067,8 +2064,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'downloader_options': {'http_chunk_size': CHUNK_SIZE}  # No longer useful?
                 })
 
-                if dct.get('ext'):
-                    dct['container'] = dct['ext'] + '_dash'
             formats.append(dct)
 
         hls_manifest_url = streaming_data.get('hlsManifestUrl')

From cdf40b6aa651d949ce01e9bec1a11f792e8af899 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 3 Apr 2023 21:07:10 +0100
Subject: [PATCH 206/312] [test] Update tests for Ubuntu 20.04 * 18.04 test
 runner was withdrawn * for now, disable Py 3.3/3.4 tests

---
 .github/workflows/ci.yml | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a609f3704..51abdce1d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -7,9 +7,10 @@ jobs:
     strategy:
       fail-fast: true
       matrix:
-        os: [ubuntu-18.04]
+        os: [ubuntu-20.04]
         # TODO: python 2.6
-        python-version: [2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
+        # TODO: restore support for 3.3, 3.4
+        python-version: [2.7, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
         python-impl: [cpython]
         ytdl-test-set: [core, download]
         run-tests-ext: [sh]
@@ -26,26 +27,27 @@ jobs:
           ytdl-test-set: download
           run-tests-ext: bat
         # jython
-        - os: ubuntu-18.04
+        - os: ubuntu-20.04
           python-impl: jython
           ytdl-test-set: core
           run-tests-ext: sh
-        - os: ubuntu-18.04
+        - os: ubuntu-20.04
           python-impl: jython
           ytdl-test-set: download
           run-tests-ext: sh
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
-      if: ${{ matrix.python-impl == 'cpython' }}
+    - uses: actions/checkout@v3
+    - name: Set up supported Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      if: ${{ matrix.python-impl == 'cpython' && ! contains(fromJSON('["3.3", "3.4"]'), matrix.python-version) }}
       with:
         python-version: ${{ matrix.python-version }}
     - name: Set up Java 8
       if: ${{ matrix.python-impl == 'jython' }}
-      uses: actions/setup-java@v1
+      uses: actions/setup-java@v2
       with:
         java-version: 8
+        distribution: 'zulu'
     - name: Install Jython
       if: ${{ matrix.python-impl == 'jython' }}
       run: |
@@ -70,9 +72,9 @@ jobs:
     name: Linter
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
     - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: 3.9
     - name: Install flake8

From 557dbac173c30a51acd284b46f2d5460e539f51a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 5 Apr 2023 18:29:24 +0100
Subject: [PATCH 207/312] [FragmentFD] Fix iteration with infinite limit

* fixes ytdl-org/youtube-dl/baa6c5e
* resolves #31885
---
 youtube_dl/downloader/dash.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
index cc30485f8..67a8e173f 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@@ -1,5 +1,7 @@
 from __future__ import unicode_literals
 
+import itertools
+
 from .fragment import FragmentFD
 from ..compat import compat_urllib_error
 from ..utils import (
@@ -30,15 +32,13 @@ class DashSegmentsFD(FragmentFD):
         fragment_retries = self.params.get('fragment_retries', 0)
         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
 
-        frag_index = 0
-        for i, fragment in enumerate(fragments):
-            frag_index += 1
+        for frag_index, fragment in enumerate(fragments, 1):
             if frag_index <= ctx['fragment_index']:
                 continue
             # In DASH, the first segment contains necessary headers to
             # generate a valid MP4 file, so always abort for the first segment
-            fatal = i == 0 or not skip_unavailable_fragments
-            for count in range(fragment_retries + 1):
+            fatal = frag_index == 1 or not skip_unavailable_fragments
+            for count in itertools.count():
                 try:
                     fragment_url = fragment.get('url')
                     if not fragment_url:
@@ -48,7 +48,6 @@ class DashSegmentsFD(FragmentFD):
                     if not success:
                         return False
                     self._append_fragment(ctx, frag_content)
-                    break
                 except compat_urllib_error.HTTPError as err:
                     # YouTube may often return 404 HTTP error for a fragment causing the
                     # whole download to fail. However if the same fragment is immediately
@@ -58,13 +57,14 @@ class DashSegmentsFD(FragmentFD):
                     # HTTP error.
                     if count < fragment_retries:
                         self.report_retry_fragment(err, frag_index, count + 1, fragment_retries)
+                        continue
                 except DownloadError:
                     # Don't retry fragment if error occurred during HTTP downloading
-                    # itself since it has own retry settings
-                    if not fatal:
-                        self.report_skip_fragment(frag_index)
-                        break
-                    raise
+                    # itself since it has its own retry settings
+                    if fatal:
+                        raise
+                    self.report_skip_fragment(frag_index)
+                break
 
             if count >= fragment_retries:
                 if not fatal:

From 78da22489b483988e198a8352893df9c6cf34032 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 5 Apr 2023 18:39:54 +0100
Subject: [PATCH 208/312] [compat] Add and use `compat_open()` like Py3
 `open()`

* resolves FIXME: ytdl-org/youtube-dl/commit/dfe5fa4
---
 youtube_dl/compat.py  | 11 +++++++++++
 youtube_dl/options.py |  6 ++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 39551f810..fe62caf80 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -3127,6 +3127,16 @@ else:
         return ctypes.WINFUNCTYPE(*args, **kwargs)
 
 
+if sys.version_info < (3, 0):
+    # open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None
+    def compat_open(file_, *args, **kwargs):
+        if len(args) > 6 or 'opener' in kwargs:
+            raise ValueError('open: unsupported argument "opener"')
+        return io.open(file_, *args, **kwargs)
+else:
+    compat_open = open
+
+
 legacy = [
     'compat_HTMLParseError',
     'compat_HTMLParser',
@@ -3185,6 +3195,7 @@ __all__ = [
     'compat_kwargs',
     'compat_map',
     'compat_numeric_types',
+    'compat_open',
     'compat_ord',
     'compat_os_name',
     'compat_os_path_expanduser',
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index f6d2b0898..7b059b51e 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -11,6 +11,7 @@ from .compat import (
     compat_get_terminal_size,
     compat_getenv,
     compat_kwargs,
+    compat_open as open,
     compat_shlex_split,
 )
 from .utils import (
@@ -41,14 +42,11 @@ def _hide_login_info(opts):
 def parseOpts(overrideArguments=None):
     def _readOptions(filename_bytes, default=[]):
         try:
-            optionf = open(filename_bytes)
+            optionf = open(filename_bytes, encoding=preferredencoding())
         except IOError:
             return default  # silently skip if file is not present
         try:
-            # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
             contents = optionf.read()
-            if sys.version_info < (3,):
-                contents = contents.decode(preferredencoding())
             res = compat_shlex_split(contents, comments=True)
         finally:
             optionf.close()

From 25124bd640acf2fbae71b2a52738ee41da548fb1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 5 Apr 2023 18:47:49 +0100
Subject: [PATCH 209/312] [devscripts] Improve hack to convert command-line
 options to API options

* define equality for DateRange
* don't show default DateRange
---
 devscripts/cli_to_api.py | 25 ++++++++++++++++++++++---
 youtube_dl/utils.py      |  4 ++++
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py
index 2f4d6a458..9fb1d2ba8 100755
--- a/devscripts/cli_to_api.py
+++ b/devscripts/cli_to_api.py
@@ -49,15 +49,34 @@ def cli_to_api(*opts):
 
     # from https://github.com/yt-dlp/yt-dlp/issues/5859#issuecomment-1363938900
     default = parsed_options([])
-    diff = dict((k, v) for k, v in parsed_options(opts).items() if default[k] != v)
+
+    def neq_opt(a, b):
+        if a == b:
+            return False
+        if a is None and repr(type(object)).endswith(".utils.DateRange'>"):
+            return '0001-01-01 - 9999-12-31' != '{0}'.format(b)
+        return a != b
+
+    diff = dict((k, v) for k, v in parsed_options(opts).items() if neq_opt(default[k], v))
     if 'postprocessors' in diff:
         diff['postprocessors'] = [pp for pp in diff['postprocessors'] if pp not in default['postprocessors']]
     return diff
 
 
 def main():
-    from pprint import pprint
-    pprint(cli_to_api(*sys.argv))
+    from pprint import PrettyPrinter
+
+    pprint = PrettyPrinter()
+    super_format = pprint.format
+
+    def format(object, context, maxlevels, level):
+        if repr(type(object)).endswith(".utils.DateRange'>"):
+            return '{0}: {1}>'.format(repr(object)[:-2], object), True, False
+        return super_format(object, context, maxlevels, level)
+
+    pprint.format = format
+
+    pprint.pprint(cli_to_api(*sys.argv))
 
 
 if __name__ == '__main__':
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index f3c7af437..d80ceb007 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -3190,6 +3190,10 @@ class DateRange(object):
     def __str__(self):
         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
 
+    def __eq__(self, other):
+        return (isinstance(other, DateRange)
+                and self.start == other.start and self.end == other.end)
+
 
 def platform_name():
     """ Returns the platform name as a compat_str """

From 9f4d83ff4255d8840c0fa9b367722c129ebecdb2 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 5 Apr 2023 18:50:25 +0100
Subject: [PATCH 210/312] [options] Add --mtime option, unsets default
 --no-mtime

* resolves #1709 (!)
---
 youtube_dl/options.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 7b059b51e..d802b7e59 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -731,9 +731,13 @@ def parseOpts(overrideArguments=None):
         '--no-part',
         action='store_true', dest='nopart', default=False,
         help='Do not use .part files - write directly into output file')
+    filesystem.add_option(
+        '--mtime',
+        action='store_true', dest='updatetime', default=True,
+        help='Use the Last-modified header to set the file modification time (default)')
     filesystem.add_option(
         '--no-mtime',
-        action='store_false', dest='updatetime', default=True,
+        action='store_false', dest='updatetime',
         help='Do not use the Last-modified header to set the file modification time')
     filesystem.add_option(
         '--write-description',

From d6ae3b77cd50083ef245c28f904ee0b70a77d5c6 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 6 Apr 2023 14:11:18 +0100
Subject: [PATCH 211/312] [core] Avoid deepcopy of ctx dict (fix f35b757)

* may now contain `LazyList`s
* resolves #31999
---
 youtube_dl/YoutubeDL.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index bcf781744..2c0d4926c 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1399,7 +1399,7 @@ class YoutubeDL(object):
             filters = [self._build_format_filter(f) for f in selector.filters]
 
             def final_selector(ctx):
-                ctx_copy = copy.deepcopy(ctx)
+                ctx_copy = dict(ctx)
                 for _filter in filters:
                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
                 return selector_function(ctx_copy)

From f8253a528935f78e1a3b724db8c1f0089f99314a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 6 Apr 2023 19:42:36 +0100
Subject: [PATCH 212/312] [core] Avoid deepcopy of ctx dict (fix f35b757) (Pt
 2)

---
 youtube_dl/YoutubeDL.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 2c0d4926c..927b19417 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1389,11 +1389,10 @@ class YoutubeDL(object):
                         'abr': formats_info[1].get('abr'),
                         'ext': output_ext,
                     }
-                video_selector, audio_selector = map(_build_selector_function, selector.selector)
 
                 def selector_function(ctx):
-                    for pair in itertools.product(
-                            video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
+                    selector_fn = lambda x: _build_selector_function(x)(ctx)
+                    for pair in itertools.product(*map(selector_fn, selector.selector)):
                         yield _merge(pair)
 
             filters = [self._build_format_filter(f) for f in selector.filters]

From 213d1d91bfc4a00fefc72fa2730555d51060b42d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 6 Apr 2023 19:49:46 +0100
Subject: [PATCH 213/312] [core] No longer importing copy

---
 youtube_dl/YoutubeDL.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 927b19417..2a1e59bf8 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -5,7 +5,6 @@ from __future__ import absolute_import, unicode_literals
 
 import collections
 import contextlib
-import copy
 import datetime
 import errno
 import fileinput

From fe7e13066c20b10fe48bc154431440da36baec53 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 10 Apr 2023 17:12:31 +0100
Subject: [PATCH 214/312] [core] Add and use sanitize_info() method from yt-dlp

---
 youtube_dl/YoutubeDL.py | 38 ++++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 2a1e59bf8..2719d546f 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -30,9 +30,12 @@ from string import ascii_letters
 from .compat import (
     compat_basestring,
     compat_cookiejar,
+    compat_filter as filter,
     compat_get_terminal_size,
     compat_http_client,
+    compat_integer_types,
     compat_kwargs,
+    compat_map as map,
     compat_numeric_types,
     compat_os_name,
     compat_str,
@@ -64,6 +67,7 @@ from .utils import (
     int_or_none,
     ISO3166Utils,
     locked_file,
+    LazyList,
     make_HTTPS_handler,
     MaxDownloadsReached,
     orderedSet,
@@ -2109,10 +2113,36 @@ class YoutubeDL(object):
         return self._download_retcode
 
     @staticmethod
-    def filter_requested_info(info_dict):
-        return dict(
-            (k, v) for k, v in info_dict.items()
-            if k not in ['requested_formats', 'requested_subtitles'])
+    def sanitize_info(info_dict, remove_private_keys=False):
+        ''' Sanitize the infodict for converting to json '''
+        if info_dict is None:
+            return info_dict
+
+        if remove_private_keys:
+            reject = lambda k, v: (v is None
+                                   or k.startswith('__')
+                                   or k in ('requested_formats',
+                                            'requested_subtitles'))
+        else:
+            reject = lambda k, v: False
+
+        def filter_fn(obj):
+            if isinstance(obj, dict):
+                return dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v))
+            elif isinstance(obj, (list, tuple, set, LazyList)):
+                return list(map(filter_fn, obj))
+            elif obj is None or any(isinstance(obj, c)
+                                    for c in (compat_integer_types,
+                                              (compat_str, float, bool))):
+                return obj
+            else:
+                return repr(obj)
+
+        return filter_fn(info_dict)
+
+    @classmethod
+    def filter_requested_info(cls, info_dict):
+        return cls.sanitize_info(info_dict, True)
 
     def post_process(self, filename, ie_info):
         """Run all the postprocessors on the given file."""

From 735e87adfc44b284dcdb4d9a0155ce0616e3af97 Mon Sep 17 00:00:00 2001
From: Gabriel Nagy <gabrielnagy@me.com>
Date: Thu, 13 Apr 2023 01:40:38 +0300
Subject: [PATCH 215/312] [core] Sanitize info dict before dumping JSON (fixes
 fe7e130)  (#32032)

* follow up to fe7e130 which didn't fix everything.

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/YoutubeDL.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 2719d546f..117f1c513 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1777,7 +1777,7 @@ class YoutubeDL(object):
             self.to_stdout(formatSeconds(info_dict['duration']))
         print_mandatory('format')
         if self.params.get('forcejson', False):
-            self.to_stdout(json.dumps(info_dict))
+            self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
 
     def process_info(self, info_dict):
         """Process a single resolved IE result."""
@@ -2091,7 +2091,7 @@ class YoutubeDL(object):
                 raise
             else:
                 if self.params.get('dump_single_json', False):
-                    self.to_stdout(json.dumps(res))
+                    self.to_stdout(json.dumps(self.sanitize_info(res)))
 
         return self._download_retcode
 
@@ -2100,6 +2100,7 @@ class YoutubeDL(object):
                 [info_filename], mode='r',
                 openhook=fileinput.hook_encoded('utf-8'))) as f:
             # FileInput doesn't have a read method, we can't call json.load
+            # TODO: let's use io.open(), then
             info = self.filter_requested_info(json.loads('\n'.join(f)))
         try:
             self.process_ie_result(info, download=True)

From 2da3fa04a68ff0652f49d6874d82b7a0edb85ea3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 11 Apr 2023 17:36:27 +0100
Subject: [PATCH 216/312] [YouTube] Simplify signature patterns

---
 youtube_dl/extractor/youtube.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index ae3416b20..80fff7ada 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -19,6 +19,7 @@ from ..compat import (
     compat_urllib_parse_parse_qs as compat_parse_qs,
     compat_urllib_parse_unquote_plus,
     compat_urllib_parse_urlparse,
+    compat_zip as zip,
 )
 from ..jsinterp import JSInterpreter
 from ..utils import (
@@ -1555,17 +1556,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
-             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
-             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
+             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
              # Obsolete patterns
-             r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+             r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
             jscode, 'Initial JS player signature function name', group='sig')
 

From 26035bde46c0acc30dc053618451d9aeca4b7709 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 13 Apr 2023 00:15:07 +0100
Subject: [PATCH 217/312] [DashSegmentsFD] Correctly detect errors when
 `fragment_retries` == 0

* use the success flag instead of the retry count
* establish the fragment_url outside the retry loop
* only report skipping a fragment once.
* resolves #32033
---
 youtube_dl/downloader/dash.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
index 67a8e173f..2800d4260 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@@ -38,12 +38,13 @@ class DashSegmentsFD(FragmentFD):
             # In DASH, the first segment contains necessary headers to
             # generate a valid MP4 file, so always abort for the first segment
             fatal = frag_index == 1 or not skip_unavailable_fragments
+            fragment_url = fragment.get('url')
+            if not fragment_url:
+                assert fragment_base_url
+                fragment_url = urljoin(fragment_base_url, fragment['path'])
+            success = False
             for count in itertools.count():
                 try:
-                    fragment_url = fragment.get('url')
-                    if not fragment_url:
-                        assert fragment_base_url
-                        fragment_url = urljoin(fragment_base_url, fragment['path'])
                     success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
                     if not success:
                         return False
@@ -63,14 +64,13 @@ class DashSegmentsFD(FragmentFD):
                     # itself since it has its own retry settings
                     if fatal:
                         raise
-                    self.report_skip_fragment(frag_index)
                 break
 
-            if count >= fragment_retries:
+            if not success:
                 if not fatal:
                     self.report_skip_fragment(frag_index)
                     continue
-                self.report_error('giving up after %s fragment retries' % fragment_retries)
+                self.report_error('giving up after %s fragment retries' % count)
                 return False
 
         self._finish_frag_download(ctx)

From 211cbfd5d46025a8e4d8f9f3d424aaada4698974 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 21 Apr 2023 14:04:30 +0100
Subject: [PATCH 218/312] [jsinterp] Minimally handle arithmetic operator
 precedence

Resolves #32066
---
 test/test_jsinterp.py  | 11 +++++++++++
 youtube_dl/jsinterp.py | 40 +++++++++++++++++++++++++++++++++++++---
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 5d129433d..e121358d7 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -505,6 +505,17 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function x(){return 1236566549 << 5}')
         self.assertEqual(jsi.call_function('x'), 915423904)
 
+    def test_32066(self):
+        jsi = JSInterpreter("function x(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}")
+        self.assertEqual(jsi.call_function('x'), 70)
+
+    def test_unary_operators(self):
+        jsi = JSInterpreter('function f(){return 2  -  - - 2;}')
+        self.assertEqual(jsi.call_function('f'), 0)
+        # fails
+        # jsi = JSInterpreter('function f(){return 2 + - + - - 2;}')
+        # self.assertEqual(jsi.call_function('f'), 0)
+
     """ # fails so far
     def test_packed(self):
         jsi = JSInterpreter('''function x(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''')
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index ab7d6f926..a06fc4ff5 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -1,5 +1,6 @@
 from __future__ import unicode_literals
 
+from functools import update_wrapper
 import itertools
 import json
 import math
@@ -23,11 +24,23 @@ from .compat import (
 )
 
 
+def wraps_op(op):
+
+    def update_and_rename_wrapper(w):
+        f = update_wrapper(w, op)
+        # fn names are str in both Py 2/3
+        f.__name__ = str('JS_') + f.__name__
+        return f
+
+    return update_and_rename_wrapper
+
+
 def _js_bit_op(op):
 
     def zeroise(x):
         return 0 if x in (None, JS_Undefined) else x
 
+    @wraps_op(op)
     def wrapped(a, b):
         return op(zeroise(a), zeroise(b)) & 0xffffffff
 
@@ -36,6 +49,7 @@ def _js_bit_op(op):
 
 def _js_arith_op(op):
 
+    @wraps_op(op)
     def wrapped(a, b):
         if JS_Undefined in (a, b):
             return float('nan')
@@ -66,6 +80,7 @@ def _js_exp(a, b):
 
 def _js_eq_op(op):
 
+    @wraps_op(op)
     def wrapped(a, b):
         if set((a, b)) <= set((None, JS_Undefined)):
             return op(a, a)
@@ -76,6 +91,7 @@ def _js_eq_op(op):
 
 def _js_comp_op(op):
 
+    @wraps_op(op)
     def wrapped(a, b):
         if JS_Undefined in (a, b):
             return False
@@ -356,6 +372,7 @@ class JSInterpreter(object):
             return right_val
 
         try:
+            # print('Eval:', opfunc.__name__, left_val, right_val)
             return opfunc(left_val, right_val)
         except Exception as e:
             raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
@@ -395,6 +412,7 @@ class JSInterpreter(object):
             raise self.Exception('Recursion limit reached')
         allow_recursion -= 1
 
+        # print('At: ' + stmt[:60])
         should_return = False
         # fails on (eg) if (...) stmt1; else stmt2;
         sub_statements = list(self._separate(stmt, ';')) or ['']
@@ -702,9 +720,24 @@ class JSInterpreter(object):
                 continue
 
             right_expr = separated.pop()
-            while op == '-' and len(separated) > 1 and not separated[-1].strip():
-                right_expr = '-' + right_expr
-                separated.pop()
+            # handle operators that are both unary and binary, minimal BODMAS
+            if op in ('+', '-'):
+                undone = 0
+                while len(separated) > 1 and not separated[-1].strip():
+                    undone += 1
+                    separated.pop()
+                if op == '-' and undone % 2 != 0:
+                    right_expr = op + right_expr
+                left_val = separated[-1]
+                for dm_op in ('*', '%', '/', '**'):
+                    bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
+                    if len(bodmas) > 1 and not bodmas[-1].strip():
+                        expr = op.join(separated) + op + right_expr
+                        right_expr = None
+                        break
+                if right_expr is None:
+                    continue
+
             left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
             return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return
 
@@ -955,6 +988,7 @@ class JSInterpreter(object):
     def build_function(self, argnames, code, *global_stack):
         global_stack = list(global_stack) or [{}]
         argnames = tuple(argnames)
+        # import pdb; pdb.set_trace()
 
         def resf(args, kwargs={}, allow_recursion=100):
             global_stack[0].update(

From 64d6dd64c8b7a35a87655d27fc83f2e98ef6ce13 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 23 Apr 2023 22:58:35 +0100
Subject: [PATCH 219/312] [YouTube] Support Releases tab

---
 youtube_dl/extractor/youtube.py | 114 +++++++++++++++++++-------------
 youtube_dl/utils.py             |   9 ++-
 2 files changed, 74 insertions(+), 49 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 80fff7ada..0411c49f1 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -31,6 +31,7 @@ from ..utils import (
     extract_attributes,
     get_element_by_attribute,
     int_or_none,
+    join_nonempty,
     js_to_json,
     LazyList,
     merge_dicts,
@@ -45,6 +46,7 @@ from ..utils import (
     str_to_int,
     traverse_obj,
     try_get,
+    txt_or_none,
     unescapeHTML,
     unified_strdate,
     unsmuggle_url,
@@ -2608,6 +2610,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'uploader_id': '@lexwill718',
         },
         'playlist_mincount': 75,
+    }, {
+        # Releases tab
+        'url': 'https://www.youtube.com/@daftpunk/releases',
+        'info_dict': {
+            'id': 'UC_kRDKYrUlrbtrSiyu5Tflg',
+            'title': 'Daft Punk - Releases',
+            'description': 'Daft Punk (1993 - 2021) - Official YouTube Channel',
+            'uploader_id': '@daftpunk',
+            'uploader': 'Daft Punk',
+        },
+        'playlist_mincount': 36,
     }, {
         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
         'only_matching': True,
@@ -2822,6 +2835,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                 continue
             return renderer
 
+    @staticmethod
+    def _get_text(r, k):
+        return traverse_obj(
+            r, (k, 'runs', 0, 'text'), (k, 'simpleText'),
+            expected_type=txt_or_none)
+
     def _grid_entries(self, grid_renderer):
         for item in grid_renderer['items']:
             if not isinstance(item, dict):
@@ -2829,9 +2848,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             renderer = self._extract_grid_item_renderer(item)
             if not isinstance(renderer, dict):
                 continue
-            title = try_get(
-                renderer, (lambda x: x['title']['runs'][0]['text'],
-                           lambda x: x['title']['simpleText']), compat_str)
+            title = self._get_text(renderer, 'title')
             # playlist
             playlist_id = renderer.get('playlistId')
             if playlist_id:
@@ -2848,8 +2865,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             # channel
             channel_id = renderer.get('channelId')
             if channel_id:
-                title = try_get(
-                    renderer, lambda x: x['title']['simpleText'], compat_str)
+                title = self._get_text(renderer, 'title')
                 yield self.url_result(
                     'https://www.youtube.com/channel/%s' % channel_id,
                     ie=YoutubeTabIE.ie_key(), video_title=title)
@@ -2958,15 +2974,26 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 
     def _rich_grid_entries(self, contents):
         for content in contents:
-            video_renderer = try_get(
-                content,
-                (lambda x: x['richItemRenderer']['content']['videoRenderer'],
-                 lambda x: x['richItemRenderer']['content']['reelItemRenderer']),
-                dict)
+            content = traverse_obj(
+                content, ('richItemRenderer', 'content'),
+                expected_type=dict) or {}
+            video_renderer = traverse_obj(
+                content, 'videoRenderer', 'reelItemRenderer',
+                expected_type=dict)
             if video_renderer:
                 entry = self._video_entry(video_renderer)
                 if entry:
                     yield entry
+            # playlist
+            renderer = traverse_obj(
+                content, 'playlistRenderer', expected_type=dict) or {}
+            title = self._get_text(renderer, 'title')
+            playlist_id = renderer.get('playlistId')
+            if playlist_id:
+                yield self.url_result(
+                    'https://www.youtube.com/playlist?list=%s' % playlist_id,
+                    ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
+                    video_title=title)
 
     @staticmethod
     def _build_continuation_query(continuation, ctp=None):
@@ -3071,6 +3098,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                 return
             for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
                 yield entry
+
             continuation = self._extract_continuation(rich_grid_renderer)
 
         ytcfg = self._extract_ytcfg(item_id, webpage)
@@ -3213,50 +3241,41 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         uploader['channel'] = uploader['uploader']
         return uploader
 
-    @staticmethod
-    def _extract_alert(data):
+    @classmethod
+    def _extract_alert(cls, data):
         alerts = []
-        for alert in try_get(data, lambda x: x['alerts'], list) or []:
-            if not isinstance(alert, dict):
-                continue
-            alert_text = try_get(
-                alert, lambda x: x['alertRenderer']['text'], dict)
+        for alert in traverse_obj(data, ('alerts', Ellipsis), expected_type=dict):
+            alert_text = traverse_obj(
+                alert, (None, lambda x: x['alertRenderer']['text']), get_all=False)
             if not alert_text:
                 continue
-            text = try_get(
-                alert_text,
-                (lambda x: x['simpleText'], lambda x: x['runs'][0]['text']),
-                compat_str)
+            text = cls._get_text(alert_text, 'text')
             if text:
                 alerts.append(text)
         return '\n'.join(alerts)
 
     def _extract_from_tabs(self, item_id, webpage, data, tabs):
         selected_tab = self._extract_selected_tab(tabs)
-        renderer = try_get(
-            data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
+        renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'),
+                                expected_type=dict) or {}
         playlist_id = item_id
         title = description = None
         if renderer:
-            channel_title = renderer.get('title') or item_id
-            tab_title = selected_tab.get('title')
-            title = channel_title or item_id
-            if tab_title:
-                title += ' - %s' % tab_title
-            if selected_tab.get('expandedText'):
-                title += ' - %s' % selected_tab['expandedText']
-            description = renderer.get('description')
-            playlist_id = renderer.get('externalId')
+            channel_title = txt_or_none(renderer.get('title')) or item_id
+            tab_title = txt_or_none(selected_tab.get('title'))
+            title = join_nonempty(
+                channel_title or item_id, tab_title,
+                txt_or_none(selected_tab.get('expandedText')),
+                delim=' - ')
+            description = txt_or_none(renderer.get('description'))
+            playlist_id = txt_or_none(renderer.get('externalId')) or playlist_id
         else:
-            renderer = try_get(
-                data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
-            if renderer:
-                title = renderer.get('title')
-            else:
-                renderer = try_get(
-                    data, lambda x: x['header']['hashtagHeaderRenderer'], dict)
-                if renderer:
-                    title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
+            renderer = traverse_obj(data,
+                                    ('metadata', 'playlistMetadataRenderer'),
+                                    ('header', 'hashtagHeaderRenderer'),
+                                    expected_type=dict) or {}
+            title = traverse_obj(renderer, 'title', ('hashtag', 'simpleText'),
+                                 expected_type=txt_or_none)
         playlist = self.playlist_result(
             self._entries(selected_tab, item_id, webpage),
             playlist_id=playlist_id, playlist_title=title,
@@ -3264,15 +3283,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         return merge_dicts(playlist, self._extract_uploader(renderer, data))
 
     def _extract_from_playlist(self, item_id, url, data, playlist):
-        title = playlist.get('title') or try_get(
-            data, lambda x: x['titleText']['simpleText'], compat_str)
-        playlist_id = playlist.get('playlistId') or item_id
+        title = traverse_obj((playlist, data),
+                             (0, 'title'), (1, 'titleText', 'simpleText'),
+                             expected_type=txt_or_none)
+        playlist_id = txt_or_none(playlist.get('playlistId')) or item_id
         # Inline playlist rendition continuation does not always work
         # at Youtube side, so delegating regular tab-based playlist URL
         # processing whenever possible.
-        playlist_url = urljoin(url, try_get(
-            playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
-            compat_str))
+        playlist_url = urljoin(url, traverse_obj(
+            playlist, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
+            expected_type=url_or_none))
         if playlist_url and playlist_url != url:
             return self.url_result(
                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index d80ceb007..65ddb3b0f 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -3753,6 +3753,11 @@ def strip_or_none(v, default=None):
     return v.strip() if isinstance(v, compat_str) else default
 
 
+def txt_or_none(v, default=None):
+    """ Combine str/strip_or_none, disallow blank value (for traverse_obj) """
+    return default if v is None else (compat_str(v).strip() or default)
+
+
 def url_or_none(url):
     if not url or not isinstance(url, compat_str):
         return None
@@ -4096,8 +4101,8 @@ def escape_url(url):
     ).geturl()
 
 
-def parse_qs(url):
-    return compat_parse_qs(compat_urllib_parse.urlparse(url).query)
+def parse_qs(url, **kwargs):
+    return compat_parse_qs(compat_urllib_parse.urlparse(url).query, **kwargs)
 
 
 def read_batch_urls(batch_fd):

From 11cc3f3ad03a88d6cb1eab18a8e5dd6bf148ac54 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 11 May 2023 20:53:07 +0100
Subject: [PATCH 220/312] [utils] Fix `compiled_regex_type` in 249f2b6

---
 youtube_dl/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 65ddb3b0f..584581b6a 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -56,6 +56,7 @@ from .compat import (
     compat_kwargs,
     compat_os_name,
     compat_re_Match,
+    compat_re_Pattern,
     compat_shlex_quote,
     compat_str,
     compat_struct_pack,
@@ -86,7 +87,7 @@ def register_socks_protocols():
 
 
 # Unfavoured alias
-compiled_regex_type = compat_re_Match
+compiled_regex_type = compat_re_Pattern
 
 
 def random_user_agent():

From a85a875fef2e9b097c3f6f93f1d0cead06f84e43 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 11 May 2023 20:59:30 +0100
Subject: [PATCH 221/312] [jsinterp] Handle NaN in bitwise operators * also add
 _NaN * also pull function naming from yt-dlp

---
 test/test_jsinterp.py  | 11 +++++++++++
 youtube_dl/jsinterp.py | 41 ++++++++++++++++++++++++++++++++---------
 2 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index e121358d7..a8f312fde 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -18,6 +18,7 @@ class TestJSInterpreter(unittest.TestCase):
     def test_basic(self):
         jsi = JSInterpreter('function x(){;}')
         self.assertEqual(jsi.call_function('x'), None)
+        self.assertEqual(repr(jsi.extract_function('x')), 'F<x>')
 
         jsi = JSInterpreter('function x3(){return 42;}')
         self.assertEqual(jsi.call_function('x3'), 42)
@@ -505,6 +506,16 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function x(){return 1236566549 << 5}')
         self.assertEqual(jsi.call_function('x'), 915423904)
 
+    def test_bitwise_operators_madness(self):
+        jsi = JSInterpreter('function x(){return null << 5}')
+        self.assertEqual(jsi.call_function('x'), 0)
+
+        jsi = JSInterpreter('function x(){return undefined >> 5}')
+        self.assertEqual(jsi.call_function('x'), 0)
+
+        jsi = JSInterpreter('function x(){return 42 << NaN}')
+        self.assertEqual(jsi.call_function('x'), 42)
+
     def test_32066(self):
         jsi = JSInterpreter("function x(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}")
         self.assertEqual(jsi.call_function('x'), 70)
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index a06fc4ff5..bb406647a 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -1,12 +1,13 @@
 from __future__ import unicode_literals
 
-from functools import update_wrapper
 import itertools
 import json
 import math
 import operator
 import re
 
+from functools import update_wrapper
+
 from .utils import (
     error_to_compat_str,
     ExtractorError,
@@ -24,6 +25,22 @@ from .compat import (
 )
 
 
+# name JS functions
+class function_with_repr(object):
+    # from yt_dlp/utils.py, but in this module
+    # repr_ is always set
+    def __init__(self, func, repr_):
+        update_wrapper(self, func)
+        self.func, self.__repr = func, repr_
+
+    def __call__(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+
+    def __repr__(self):
+        return self.__repr
+
+
+# name JS operators
 def wraps_op(op):
 
     def update_and_rename_wrapper(w):
@@ -35,10 +52,13 @@ def wraps_op(op):
     return update_and_rename_wrapper
 
 
+_NaN = float('nan')
+
+
 def _js_bit_op(op):
 
     def zeroise(x):
-        return 0 if x in (None, JS_Undefined) else x
+        return 0 if x in (None, JS_Undefined, _NaN) else x
 
     @wraps_op(op)
     def wrapped(a, b):
@@ -52,7 +72,7 @@ def _js_arith_op(op):
     @wraps_op(op)
     def wrapped(a, b):
         if JS_Undefined in (a, b):
-            return float('nan')
+            return _NaN
         return op(a or 0, b or 0)
 
     return wrapped
@@ -60,13 +80,13 @@ def _js_arith_op(op):
 
 def _js_div(a, b):
     if JS_Undefined in (a, b) or not (a and b):
-        return float('nan')
+        return _NaN
     return operator.truediv(a or 0, b) if b else float('inf')
 
 
 def _js_mod(a, b):
     if JS_Undefined in (a, b) or not b:
-        return float('nan')
+        return _NaN
     return (a or 0) % b
 
 
@@ -74,7 +94,7 @@ def _js_exp(a, b):
     if not b:
         return 1  # even 0 ** 0 !!
     elif JS_Undefined in (a, b):
-        return float('nan')
+        return _NaN
     return (a or 0) ** b
 
 
@@ -285,6 +305,8 @@ class JSInterpreter(object):
     def _named_object(self, namespace, obj):
         self.__named_object_counter += 1
         name = '%s%d' % (self._OBJ_NAME, self.__named_object_counter)
+        if callable(obj) and not isinstance(obj, function_with_repr):
+            obj = function_with_repr(obj, 'F<%s>' % (self.__named_object_counter, ))
         namespace[name] = obj
         return name
 
@@ -693,7 +715,7 @@ class JSInterpreter(object):
         elif expr == 'undefined':
             return JS_Undefined, should_return
         elif expr == 'NaN':
-            return float('NaN'), should_return
+            return _NaN, should_return
 
         elif md.get('return'):
             return local_vars[m.group('name')], should_return
@@ -953,7 +975,9 @@ class JSInterpreter(object):
         return self.build_arglist(func_m.group('args')), code
 
     def extract_function(self, funcname):
-        return self.extract_function_from_code(*self.extract_function_code(funcname))
+        return function_with_repr(
+            self.extract_function_from_code(*self.extract_function_code(funcname)),
+            'F<%s>' % (funcname, ))
 
     def extract_function_from_code(self, argnames, code, *global_stack):
         local_vars = {}
@@ -988,7 +1012,6 @@ class JSInterpreter(object):
     def build_function(self, argnames, code, *global_stack):
         global_stack = list(global_stack) or [{}]
         argnames = tuple(argnames)
-        # import pdb; pdb.set_trace()
 
         def resf(args, kwargs={}, allow_recursion=100):
             global_stack[0].update(

From 6ed34338285f722d0da312ce0af3a15a077a3e2a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 11 May 2023 21:02:01 +0100
Subject: [PATCH 222/312] [jsinterp] Add short-cut evaluation for common
 expression * special handling for (d%e.length+e.length)%e.length speeds up
 ~6%

---
 youtube_dl/jsinterp.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index bb406647a..f837865c4 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -502,8 +502,15 @@ class JSInterpreter(object):
                 expr = self._dump(inner, local_vars) + outer
 
         if expr.startswith('('):
-            inner, outer = self._separate_at_paren(expr)
-            inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
+
+            m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr)
+            if m:
+                # short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig`
+                outer = None
+                inner, should_abort = self._offset_e_by_d(m.group('d'), m.group('e'), local_vars)
+            else:
+                inner, outer = self._separate_at_paren(expr)
+                inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
             if not outer or should_abort:
                 return inner, should_abort or should_return
             else:
@@ -957,6 +964,17 @@ class JSInterpreter(object):
 
         return obj
 
+    @staticmethod
+    def _offset_e_by_d(d, e, local_vars):
+        """ Short-cut eval: (d%e.length+e.length)%e.length """
+        try:
+            d = local_vars[d]
+            e = local_vars[e]
+            e = len(e)
+            return _js_mod(_js_mod(d, e) + e, e), False
+        except Exception:
+            return None, True
+
     def extract_function_code(self, funcname):
         """ @returns argnames, code """
         func_m = re.search(

From d1c6c5c4d618fa950813c0c71aede34a5ac851e9 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 11 May 2023 21:17:31 +0100
Subject: [PATCH 223/312] [core] Improve platform debug log, based on yt-dlp

---
 youtube_dl/YoutubeDL.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 117f1c513..212c04298 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -25,6 +25,7 @@ import tokenize
 import traceback
 import random
 
+from ssl import OPENSSL_VERSION
 from string import ascii_letters
 
 from .compat import (
@@ -66,6 +67,7 @@ from .utils import (
     HEADRequest,
     int_or_none,
     ISO3166Utils,
+    join_nonempty,
     locked_file,
     LazyList,
     make_HTTPS_handler,
@@ -2395,9 +2397,20 @@ class YoutubeDL(object):
                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
             return impl_name
 
-        self._write_string('[debug] Python version %s (%s) - %s\n' % (
-            platform.python_version(), python_implementation(),
-            platform_name()))
+        def libc_ver():
+            try:
+                return platform.libc_ver()
+            except OSError:  # We may not have access to the executable
+                return []
+
+        self._write_string('[debug] Python %s (%s %s) - %s (%s%s)\n' % (
+            platform.python_version(),
+            python_implementation(),
+            platform.architecture()[0],
+            platform_name(),
+            OPENSSL_VERSION,
+            ', %s' % (join_nonempty(*libc_ver(), delim=' ') or '-'),
+        ))
 
         exe_versions = FFmpegPostProcessor.get_versions(self)
         exe_versions['rtmpdump'] = rtmpdump_version()

From d89c2137ba4c1def185358a9ff48642e05ac65a2 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 19 May 2023 13:09:18 +0100
Subject: [PATCH 224/312] [jsinterp] Small updates for a85a875 * update
 signature tests * clarify NaN handling

---
 test/test_jsinterp.py          |  3 +++
 test/test_youtube_signature.py |  8 ++++++++
 youtube_dl/jsinterp.py         | 12 +++++-------
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index a8f312fde..1cc148b15 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -516,6 +516,9 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function x(){return 42 << NaN}')
         self.assertEqual(jsi.call_function('x'), 42)
 
+        jsi = JSInterpreter('function x(){return 42 << Infinity}')
+        self.assertEqual(jsi.call_function('x'), 42)
+
     def test_32066(self):
         jsi = JSInterpreter("function x(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}")
         self.assertEqual(jsi.call_function('x'), 70)
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index decf7ee38..d41d708a0 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -143,6 +143,14 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
         'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
     ),
+    (
+        'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js',
+        'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw',
+    ),
+    (
+        'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
+        'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index f837865c4..dc580943e 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -2,7 +2,6 @@ from __future__ import unicode_literals
 
 import itertools
 import json
-import math
 import operator
 import re
 
@@ -52,6 +51,10 @@ def wraps_op(op):
     return update_and_rename_wrapper
 
 
+# NB In principle NaN cannot be checked by membership.
+# Here all NaN values are actually this one, so _NaN is _NaN,
+# although _NaN != _NaN.
+
 _NaN = float('nan')
 
 
@@ -126,13 +129,8 @@ def _js_comp_op(op):
 
 def _js_ternary(cndn, if_true=True, if_false=False):
     """Simulate JS's ternary operator (cndn?if_true:if_false)"""
-    if cndn in (False, None, 0, '', JS_Undefined):
+    if cndn in (False, None, 0, '', JS_Undefined, _NaN):
         return if_false
-    try:
-        if math.isnan(cndn):  # NB: NaN cannot be checked by membership
-            return if_false
-    except TypeError:
-        pass
     return if_true
 
 

From 1f7c6f8b2ba5bedc9b4da279659688fbbf06a059 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 19 May 2023 13:12:59 +0100
Subject: [PATCH 225/312] [core] Further improve platform debug log * see
 d1c6c5c

---
 youtube_dl/YoutubeDL.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 212c04298..1b3ef94b4 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -102,6 +102,7 @@ from .utils import (
     YoutubeDLCookieProcessor,
     YoutubeDLHandler,
     YoutubeDLRedirectHandler,
+    ytdl_is_updateable,
 )
 from .cache import Cache
 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
@@ -2373,9 +2374,11 @@ class YoutubeDL(object):
                 self.get_encoding()))
         write_string(encoding_str, encoding=None)
 
-        self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
+        writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))
+
+        writeln_debug('youtube-dl version ', __version__, (' (single file build)' if ytdl_is_updateable() else ''))
         if _LAZY_LOADER:
-            self._write_string('[debug] Lazy loading extractors enabled' + '\n')
+            writeln_debug('Lazy loading extractors enabled')
         try:
             sp = subprocess.Popen(
                 ['git', 'rev-parse', '--short', 'HEAD'],
@@ -2384,7 +2387,7 @@ class YoutubeDL(object):
             out, err = process_communicate_or_kill(sp)
             out = out.decode().strip()
             if re.match('[0-9a-f]+', out):
-                self._write_string('[debug] Git HEAD: ' + out + '\n')
+                writeln_debug('Git HEAD: ', out)
         except Exception:
             try:
                 sys.exc_clear()
@@ -2403,13 +2406,15 @@ class YoutubeDL(object):
             except OSError:  # We may not have access to the executable
                 return []
 
-        self._write_string('[debug] Python %s (%s %s) - %s (%s%s)\n' % (
+        libc = join_nonempty(*libc_ver(), delim=' ')
+        writeln_debug('Python %s (%s %s %s) - %s - %s%s' % (
             platform.python_version(),
             python_implementation(),
+            platform.machine(),
             platform.architecture()[0],
             platform_name(),
             OPENSSL_VERSION,
-            ', %s' % (join_nonempty(*libc_ver(), delim=' ') or '-'),
+            (' - %s' % (libc, )) if libc else ''
         ))
 
         exe_versions = FFmpegPostProcessor.get_versions(self)
@@ -2422,17 +2427,17 @@ class YoutubeDL(object):
         )
         if not exe_str:
             exe_str = 'none'
-        self._write_string('[debug] exe versions: %s\n' % exe_str)
+        writeln_debug('exe versions: %s' % (exe_str, ))
 
         proxy_map = {}
         for handler in self._opener.handlers:
             if hasattr(handler, 'proxies'):
                 proxy_map.update(handler.proxies)
-        self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
+        writeln_debug('Proxy map: ', compat_str(proxy_map))
 
         if self.params.get('call_home', False):
             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
-            self._write_string('[debug] Public IP address: %s\n' % ipaddr)
+            writeln_debug('Public IP address: %s' % (ipaddr, ))
             latest_version = self.urlopen(
                 'https://yt-dl.org/latest/version').read().decode('utf-8')
             if version_tuple(latest_version) > version_tuple(__version__):

From ee731f3d00064f446faa9ffb4c21ce4ca388bf5d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 23 May 2023 16:19:55 +0100
Subject: [PATCH 226/312] [ITV] Fix UA capitalisation in 384f632

---
 youtube_dl/extractor/itv.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py
index 7026139ea..c64af3be6 100644
--- a/youtube_dl/extractor/itv.py
+++ b/youtube_dl/extractor/itv.py
@@ -59,7 +59,7 @@ class ITVBaseIE(InfoExtractor):
 
     @staticmethod
     def _vanilla_ua_header():
-        return {'User-agent': 'Mozilla/5.0'}
+        return {'User-Agent': 'Mozilla/5.0'}
 
     def _download_webpage_handle(self, url, video_id, *args, **kwargs):
         # specialised to (a) use vanilla UA (b) detect geo-block
@@ -69,7 +69,7 @@ class ITVBaseIE(InfoExtractor):
                 'user_agent' not in params
                 and not any(re.match(r'(?i)user-agent\s*:', h)
                             for h in (params.get('headers') or []))
-                and 'User-agent' not in (kwargs.get('headers') or {})):
+                and 'User-Agent' not in (kwargs.get('headers') or {})):
 
             kwargs.setdefault('headers', {})
             kwargs['headers'] = self._vanilla_ua_header()

From 2389c7cbd30813435c50848a9b276bcfe2a810db Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 23 May 2023 17:11:22 +0100
Subject: [PATCH 227/312] [compat] Fix casefold import __all__ syntax in
 a19855f

---
 youtube_dl/casefold.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/casefold.py b/youtube_dl/casefold.py
index 748c2d491..ad9c66f8e 100644
--- a/youtube_dl/casefold.py
+++ b/youtube_dl/casefold.py
@@ -1663,5 +1663,5 @@ def casefold(s):
 
 
 __all__ = [
-    casefold
+    'casefold',
 ]

From b8a86dcf1aa837577178ae25357d8241ab4ba6c1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 26 May 2023 20:25:25 +0100
Subject: [PATCH 228/312] [core] Revise 1f7c6f8 to help downstream merger
 (possibly)

---
 youtube_dl/YoutubeDL.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 1b3ef94b4..98b878fc1 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -2374,11 +2374,10 @@ class YoutubeDL(object):
                 self.get_encoding()))
         write_string(encoding_str, encoding=None)
 
-        writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))
-
-        writeln_debug('youtube-dl version ', __version__, (' (single file build)' if ytdl_is_updateable() else ''))
+        self._write_string('[debug] youtube-dl version ' + __version__ + (' (single file build)\n' if ytdl_is_updateable() else '\n'))
         if _LAZY_LOADER:
-            writeln_debug('Lazy loading extractors enabled')
+            self._write_string('[debug] Lazy loading extractors enabled\n')
+        writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))  # moved down for easier merge
         try:
             sp = subprocess.Popen(
                 ['git', 'rev-parse', '--short', 'HEAD'],

From a2534f7b888416e872d5afd1862eb3e30fc69fc7 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 11 Jun 2023 13:33:50 +0100
Subject: [PATCH 229/312] [jsinterp] Fix div bug breaking player 8c7583ff

Thx bashonly: https://github.com/ytdl-org/youtube-dl/issues/32292#issuecomment-1585639223
Fixes #32292
---
 test/test_jsinterp.py          | 49 ++++++++++++++++++++++++++++++++++
 test/test_youtube_signature.py |  4 +++
 youtube_dl/jsinterp.py         |  2 +-
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 1cc148b15..ecd6ab3c9 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -33,6 +33,55 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function x4(a){return 2*a+1;}')
         self.assertEqual(jsi.call_function('x4', 3), 7)
 
+    def test_add(self):
+        jsi = JSInterpreter('function f(){return 42 + 7;}')
+        self.assertEqual(jsi.call_function('f'), 49)
+        jsi = JSInterpreter('function f(){return 42 + undefined;}')
+        self.assertTrue(math.isnan(jsi.call_function('f')))
+        jsi = JSInterpreter('function f(){return 42 + null;}')
+        self.assertEqual(jsi.call_function('f'), 42)
+
+    def test_sub(self):
+        jsi = JSInterpreter('function f(){return 42 - 7;}')
+        self.assertEqual(jsi.call_function('f'), 35)
+        jsi = JSInterpreter('function f(){return 42 - undefined;}')
+        self.assertTrue(math.isnan(jsi.call_function('f')))
+        jsi = JSInterpreter('function f(){return 42 - null;}')
+        self.assertEqual(jsi.call_function('f'), 42)
+
+    def test_mul(self):
+        jsi = JSInterpreter('function f(){return 42 * 7;}')
+        self.assertEqual(jsi.call_function('f'), 294)
+        jsi = JSInterpreter('function f(){return 42 * undefined;}')
+        self.assertTrue(math.isnan(jsi.call_function('f')))
+        jsi = JSInterpreter('function f(){return 42 * null;}')
+        self.assertEqual(jsi.call_function('f'), 0)
+
+    def test_div(self):
+        jsi = JSInterpreter('function f(a, b){return a / b;}')
+        self.assertTrue(math.isnan(jsi.call_function('f', 0, 0)))
+        self.assertTrue(math.isnan(jsi.call_function('f', JS_Undefined, 1)))
+        self.assertTrue(math.isinf(jsi.call_function('f', 2, 0)))
+        self.assertEqual(jsi.call_function('f', 0, 3), 0)
+
+    def test_mod(self):
+        jsi = JSInterpreter('function f(){return 42 % 7;}')
+        self.assertEqual(jsi.call_function('f'), 0)
+        jsi = JSInterpreter('function f(){return 42 % 0;}')
+        self.assertTrue(math.isnan(jsi.call_function('f')))
+        jsi = JSInterpreter('function f(){return 42 % undefined;}')
+        self.assertTrue(math.isnan(jsi.call_function('f')))
+
+    def test_exp(self):
+        jsi = JSInterpreter('function f(){return 42 ** 2;}')
+        self.assertEqual(jsi.call_function('f'), 1764)
+        jsi = JSInterpreter('function f(){return 42 ** undefined;}')
+        self.assertTrue(math.isnan(jsi.call_function('f')))
+        jsi = JSInterpreter('function f(){return 42 ** null;}')
+        self.assertEqual(jsi.call_function('f'), 1)
+        jsi = JSInterpreter('function f(){return undefined ** 42;}')
+        self.assertTrue(math.isnan(jsi.call_function('f')))
+
     def test_empty_return(self):
         jsi = JSInterpreter('function f(){return; y()}')
         self.assertEqual(jsi.call_function('f'), None)
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index d41d708a0..e7bce9d68 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -151,6 +151,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
         'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
     ),
+    (
+        'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
+        'E2AQVN6y_zM7uN9w8z', '9A2dbY5GDZrt9A',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index dc580943e..9d4a5bc57 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -82,7 +82,7 @@ def _js_arith_op(op):
 
 
 def _js_div(a, b):
-    if JS_Undefined in (a, b) or not (a and b):
+    if JS_Undefined in (a, b) or not (a or b):
         return _NaN
     return operator.truediv(a or 0, b) if b else float('inf')
 

From ff75c300f52321dc7322e28d1df153cf0ea65a6d Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 17 Jun 2023 15:34:11 +0100
Subject: [PATCH 230/312] [jsinterp] Fix test for failed match in
 extract_object()

---
 youtube_dl/jsinterp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 9d4a5bc57..c18c4fef1 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -985,9 +985,9 @@ class JSInterpreter(object):
                 \((?P<args>[^)]*)\)\s*
                 (?P<code>{.+})''' % {'name': re.escape(funcname)},
             self.code)
-        code, _ = self._separate_at_paren(func_m.group('code'))  # refine the match
         if func_m is None:
             raise self.Exception('Could not find JS function "{funcname}"'.format(**locals()))
+        code, _ = self._separate_at_paren(func_m.group('code'))  # refine the match
         return self.build_arglist(func_m.group('args')), code
 
     def extract_function(self, funcname):

From d6433cbb2c4440056a38846e35bb5a3efa9bcac2 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 17 Jun 2023 15:43:10 +0100
Subject: [PATCH 231/312] [jsinterp] Don't find unrelated objects

---
 youtube_dl/jsinterp.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index c18c4fef1..00f219440 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -941,15 +941,15 @@ class JSInterpreter(object):
         _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
         obj = {}
         obj_m = re.search(
-            r'''(?x)
-                (?<!this\.)%s\s*=\s*{\s*
-                    (?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
-                }\s*;
-            ''' % (re.escape(objname), _FUNC_NAME_RE),
+            r'''(?xs)
+                (?:{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
+                    (?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
+                }}\s*);
+            '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
             self.code)
-        if not obj_m:
+        fields = obj_m and obj_m.group('fields')
+        if fields is None:
             raise self.Exception('Could not find object ' + objname)
-        fields = obj_m.group('fields')
         # Currently, it only supports function definitions
         fields_m = re.finditer(
             r'''(?x)

From ae8ba2c31977b68b75221f80c488c0b12385269c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 17 Jun 2023 15:36:39 +0100
Subject: [PATCH 232/312] [YouTube] Fix `KeyError QV` in signature extraction
 failed * temporarily force missing global definition into sig JS * improve
 test: thanks
 https://github.com/yt-dlp/yt-dlp/issues/7327#issuecomment-1595274615 *
 resolves #32314

---
 test/test_youtube_signature.py  | 7 ++++++-
 youtube_dl/extractor/youtube.py | 6 +++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index e7bce9d68..4ba586e53 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -63,6 +63,11 @@ _SIG_TESTS = [
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
         '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
         '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
+    ),
+    (
+        'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
+        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+        'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
     )
 ]
 
@@ -231,7 +236,7 @@ def n_sig(jscode, sig_input):
 
 
 make_sig_test = t_factory(
-    'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
+    'signature', signature, re.compile(r'(?s).*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$'))
 for test_spec in _SIG_TESTS:
     make_sig_test(*test_spec)
 
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 0411c49f1..0bbce71a3 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1569,8 +1569,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
             jscode, 'Initial JS player signature function name', group='sig')
 
-        jsi = JSInterpreter(jscode)
+        # temporary (please) hack for player 6ed0d907 #32314
+        ah = 'var AH={LR:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c},QV:function(a){a.reverse()},pO:function(a,b){a.splice(0,b)}};'
+        jsi = JSInterpreter(ah + jscode)
+
         initial_function = jsi.extract_function(funcname)
+
         return lambda s: initial_function([s])
 
     def _decrypt_signature(self, s, video_id, player_url):

From 07af47960f3bb262ead02490ce65c8c45c01741e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 18 Jun 2023 00:52:18 +0100
Subject: [PATCH 233/312] [YouTube] Improve fix for ae8ba2c Thx:
 https://github.com/yt-dlp/yt-dlp/commit/01aba25

---
 youtube_dl/extractor/youtube.py |  4 +---
 youtube_dl/jsinterp.py          | 21 ++++++++++++---------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 0bbce71a3..1855fca7f 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1569,9 +1569,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
             jscode, 'Initial JS player signature function name', group='sig')
 
-        # temporary (please) hack for player 6ed0d907 #32314
-        ah = 'var AH={LR:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c},QV:function(a){a.reverse()},pO:function(a,b){a.splice(0,b)}};'
-        jsi = JSInterpreter(ah + jscode)
+        jsi = JSInterpreter(jscode)
 
         initial_function = jsi.extract_function(funcname)
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 00f219440..1ba9c3d67 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -940,15 +940,18 @@ class JSInterpreter(object):
     def extract_object(self, objname):
         _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
         obj = {}
-        obj_m = re.search(
-            r'''(?xs)
-                (?:{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
-                    (?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
-                }}\s*);
-            '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
-            self.code)
-        fields = obj_m and obj_m.group('fields')
-        if fields is None:
+        fields = None
+        for obj_m in re.finditer(
+                r'''(?xs)
+                    {0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
+                        (?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
+                    }}\s*;
+                '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
+                self.code):
+            fields = obj_m.group('fields')
+            if fields:
+                break
+        else:
             raise self.Exception('Could not find object ' + objname)
         # Currently, it only supports function definitions
         fields_m = re.finditer(

From 9112e668a5ea6376017718db9ff13b369d53ad7a Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Thu, 22 Jun 2023 13:23:31 +0530
Subject: [PATCH 234/312] [YouTube] Improve nsig function name extraction

Fixes player b7910ca8, using `,` vs `;`
See https://github.com/ytdl-org/youtube-dl/issues/32292#issuecomment-1602231170

Co-authored-by: dirkf
---
 test/test_youtube_signature.py  | 11 +++--------
 youtube_dl/extractor/youtube.py | 19 +++++++++++++------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 4ba586e53..5dcabaf95 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -63,11 +63,6 @@ _SIG_TESTS = [
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
         '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
         '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
-    ),
-    (
-        'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
-        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
-        'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
     )
 ]
 
@@ -157,8 +152,8 @@ _NSIG_TESTS = [
         'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
     ),
     (
-        'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
-        'E2AQVN6y_zM7uN9w8z', '9A2dbY5GDZrt9A',
+        'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
+        '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
     ),
 ]
 
@@ -236,7 +231,7 @@ def n_sig(jscode, sig_input):
 
 
 make_sig_test = t_factory(
-    'signature', signature, re.compile(r'(?s).*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$'))
+    'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
 for test_spec in _SIG_TESTS:
     make_sig_test(*test_spec)
 
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 1855fca7f..24e2efbd9 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1623,15 +1623,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
         if not idx:
             return nfunc
+
+        VAR_RE_TMPL = r'var\s+%s\s*=\s*(?P<name>\[(?P<alias>%s)\])[;,]'
+        note = 'Initial JS player n function {0} (%s[%s])' % (nfunc, idx)
+
+        def search_function_code(needle, group):
+            return self._search_regex(
+                VAR_RE_TMPL % (re.escape(nfunc), needle), jscode,
+                note.format(group), group=group)
+
         if int_or_none(idx) == 0:
-            real_nfunc = self._search_regex(
-                r'var %s\s*=\s*\[([a-zA-Z_$][\w$]*)\];' % (re.escape(nfunc), ), jscode,
-                'Initial JS player n function alias ({nfunc}[{idx}])'.format(**locals()))
+            real_nfunc = search_function_code(r'[a-zA-Z_$][\w$]*', group='alias')
             if real_nfunc:
                 return real_nfunc
-        return self._parse_json(self._search_regex(
-            r'var %s\s*=\s*(\[.+?\]);' % (re.escape(nfunc), ), jscode,
-            'Initial JS player n function name ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
+        return self._parse_json(
+            search_function_code('.+?', group='name'),
+            nfunc, transform_source=js_to_json)[int(idx)]
 
     def _extract_n_function(self, video_id, player_url):
         player_id = self._extract_player_info(player_url)

From ebdc82c58684b4e202fabc046f9a40fc73cccde5 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 22 Jun 2023 17:24:48 +0100
Subject: [PATCH 235/312] [workflows/ci.yml] Replace actions/setup-python for
 legacy Pythons

Thanks MatteoH2O1999: https://github.com/MatteoH2O1999/setup-python
---
 .github/workflows/ci.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 51abdce1d..9be4eaa89 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -38,10 +38,12 @@ jobs:
     steps:
     - uses: actions/checkout@v3
     - name: Set up supported Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
-      if: ${{ matrix.python-impl == 'cpython' && ! contains(fromJSON('["3.3", "3.4"]'), matrix.python-version) }}
+      # wrap broken actions/setup-python@v4
+      uses: ytdl-org/setup-python@v1
       with:
         python-version: ${{ matrix.python-version }}
+        cache-build: true
+        allow-build: info
     - name: Set up Java 8
       if: ${{ matrix.python-impl == 'jython' }}
       uses: actions/setup-java@v2

From fa7f0effbe4e14fcf70e1dc4496371c9862b64b9 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 22 Jun 2023 23:10:04 +0100
Subject: [PATCH 236/312] [YouTube] Avoid crash in author extraction

---
 youtube_dl/extractor/youtube.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 24e2efbd9..9c419c002 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -448,7 +448,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             extract_attributes(self._search_regex(
                 r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')%s\2[^>]*>)'''
                 % re.escape(var_name),
-                get_element_by_attribute('itemprop', 'author', webpage) or '',
+                get_element_by_attribute('itemprop', 'author', webpage or '') or '',
                 'author link', default='')),
             paths[var_name][0])
 

From 58fc5bde47215d9e7c60647dd21202a254b3b066 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 23 Jun 2023 00:15:06 +0100
Subject: [PATCH 237/312] [workflows/ci.yml] Restore test support for Py 3.3,
 3.4, and add 2.6

---
 .github/workflows/ci.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9be4eaa89..4008cc190 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -8,9 +8,7 @@ jobs:
       fail-fast: true
       matrix:
         os: [ubuntu-20.04]
-        # TODO: python 2.6
-        # TODO: restore support for 3.3, 3.4
-        python-version: [2.7, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
+        python-version: [2.6, 2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
         python-impl: [cpython]
         ytdl-test-set: [core, download]
         run-tests-ext: [sh]

From 2500300c2a5986ace34390aa473a8bd51f83622c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 29 Jun 2023 15:27:12 +0100
Subject: [PATCH 238/312] [workflows/ci.yml] Restore test support for Py 3.2

---
 .github/workflows/ci.yml           | 319 +++++++++++++++++++++++++++--
 devscripts/make_lazy_extractors.py |   4 +
 test/test_execution.py             |   8 +-
 test/test_unicode_literals.py      |   1 +
 youtube_dl/__init__.py             |   8 +-
 youtube_dl/compat.py               |  18 +-
 6 files changed, 328 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4008cc190..8d8e654fb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,73 +1,349 @@
 name: CI
-on: [push, pull_request]
+
+env:
+  # add 3.10+ after patching nose (https://github.com/nose-devs/nose/issues/1099)
+  # or switching to fork of https://github.com/mdmintz/pynose
+  all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9
+  main-cpython-versions: 2.7, 3.2, 3.5, 3.9
+  pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7
+  cpython-versions: all
+  # test-set: both
+  test-set: core
+
+on:
+  push:
+  pull_request:
+  workflow_dispatch:
+    inputs:
+      cpython-versions:
+        type: choice
+        description: CPython versions (main = 2.7, 3.2, 3.5, 3.9)
+        options:
+          - all
+          - main
+        required: true
+        default: main
+      test-set:
+        type: choice
+        description: core, download
+        options:
+          - both
+          - core
+          - download
+        required: true
+        default: core
+
+permissions:
+  contents: read
+
 jobs:
+  select:
+    name: Select tests from inputs
+    runs-on: ubuntu-latest
+    outputs:
+      cpython-versions: ${{ steps.run.outputs.cpython-versions }}
+      test-set: ${{ steps.run.outputs.test-set }}
+      own-pip-versions: ${{ steps.run.outputs.own-pip-versions }}
+    steps:
+    - id: run
+      run: |
+        # Make a JSON Array from comma/space-separated string (no extra escaping)
+        json_list() { \
+          ret=""; IFS="${IFS},"; set -- $*; \
+          for a in "$@"; do \
+            ret=$(printf '%s"%s"' "${ret}${ret:+, }" "$a"); \
+          done; \
+          printf '[%s]' "$ret"; }
+        tests="${{ inputs.test-set || env.test-set }}"
+        [ $tests = both ] && tests="core download"
+        printf 'test-set=%s\n' "$(json_list $tests)" >> "$GITHUB_OUTPUT"
+        versions="${{ inputs.cpython-versions || env.cpython-versions }}"
+        if [ "$versions" = all ]; then \
+          versions="${{ env.all-cpython-versions }}"; else \
+          versions="${{ env.main-cpython-versions }}"; \
+        fi
+        printf 'cpython-versions=%s\n' \
+          "$(json_list ${versions}${versions:+, }${{ env.pypy-versions }})" >> "$GITHUB_OUTPUT"
+        # versions with a special get-pip.py in a per-version subdirectory
+        printf 'own-pip-versions=%s\n' \
+          "$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT"
+
   tests:
-    name: Tests
+    name: Run tests
+    needs: select
+    permissions:
+      contents: read
+      packages: write
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: true
       matrix:
         os: [ubuntu-20.04]
-        python-version: [2.6, 2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
+        # outside steps, use github.env...., not env....
+        python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }}
         python-impl: [cpython]
-        ytdl-test-set: [core, download]
+        ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }}
         run-tests-ext: [sh]
         include:
-        # python 3.2 is only available on windows via setup-python
         - os: windows-2019
           python-version: 3.2
           python-impl: cpython
-          ytdl-test-set: core
+          ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
           run-tests-ext: bat
         - os: windows-2019
           python-version: 3.2
           python-impl: cpython
-          ytdl-test-set: download
+          ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download'  || 'nodownload' }}
           run-tests-ext: bat
         # jython
         - os: ubuntu-20.04
           python-impl: jython
-          ytdl-test-set: core
+          ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
           run-tests-ext: sh
         - os: ubuntu-20.04
           python-impl: jython
-          ytdl-test-set: download
+          ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download'  || 'nodownload' }}
           run-tests-ext: sh
     steps:
-    - uses: actions/checkout@v3
+    - name: Checkout
+      uses: actions/checkout@v3
+    #-------- Python 3 -----
     - name: Set up supported Python ${{ matrix.python-version }}
+      id: setup-python
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7'}}
       # wrap broken actions/setup-python@v4
       uses: ytdl-org/setup-python@v1
       with:
         python-version: ${{ matrix.python-version }}
         cache-build: true
         allow-build: info
+    - name: Locate supported Python ${{ matrix.python-version }}
+      if: ${{ env.pythonLocation }}
+      shell: bash
+      run: |
+        echo "PYTHONHOME=${pythonLocation}" >> "$GITHUB_ENV"
+        export expected="${{ steps.setup-python.outputs.python-path }}"
+        dirname() { printf '%s\n' \
+            'import os, sys' \
+            'print(os.path.dirname(sys.argv[1]))' \
+            | ${expected} - "$1"; }
+        expd="$(dirname "$expected")"
+        export python="$(command -v python)"
+        [ "$expd" = "$(dirname "$python")" ] || echo "PATH=$expd:${PATH}" >> "$GITHUB_ENV"
+        [ -x "$python" ] || printf '%s\n' \
+            'import os' \
+            'exp = os.environ["expected"]' \
+            'python = os.environ["python"]' \
+            'exps = os.path.split(exp)' \
+            'if python and (os.path.dirname(python) == exp[0]):' \
+            '    exit(0)' \
+            'exps[1] = "python" + os.path.splitext(exps[1])[1]' \
+            'python = os.path.join(*exps)' \
+            'try:' \
+            '    os.symlink(exp, python)' \
+            'except AttributeError:' \
+            '    os.rename(exp, python)' \
+            | ${expected} -
+        printf '%s\n' \
+            'import sys' \
+            'print(sys.path)' \
+            | ${expected} -
+    #-------- Python 2.7 --
+    - name: Set up Python 2.7
+      if: ${{ matrix.python-version == '2.7' }}
+      # install 2.7
+      run: |
+        sudo apt-get install -y python2 python-is-python2
+        echo "PYTHONHOME=/usr" >> "$GITHUB_ENV"
+    #-------- Python 2.6 --
+    - name: Set up Python 2.6 environment
+      if: ${{ matrix.python-version == '2.6' }}
+      run: |
+        openssl_name=openssl-1.0.2u
+        echo "openssl_name=${openssl_name}" >> "$GITHUB_ENV"
+        openssl_dir=$HOME/.local/opt/$openssl_name
+        echo "openssl_dir=${openssl_dir}" >> "$GITHUB_ENV"
+        PYENV_ROOT=$HOME/.local/share/pyenv
+        echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV"
+        sudo apt-get install -y openssl ca-certificates
+    - name: Cache Python 2.6
+      id: cache26
+      if: ${{ matrix.python-version == '2.6' }}
+      uses: actions/cache@v3
+      with:
+        key: python-2.6.9
+        path: |
+          ${{ env.openssl_dir }}
+          ${{ env.PYENV_ROOT }}
+    - name: Build and set up Python 2.6
+      if: ${{ matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }}
+      # dl and build locally
+      run: |
+        # Install build environment
+        sudo apt-get install -y build-essential llvm libssl-dev tk-dev  \
+                      libncursesw5-dev libreadline-dev libsqlite3-dev   \
+                      libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev
+        # Download and install OpenSSL 1.0.2, back in time
+        openssl_name=${{ env.openssl_name }}
+        openssl_targz=${openssl_name}.tar.gz
+        openssl_dir=${{ env.openssl_dir }}
+        openssl_inc=$openssl_dir/include
+        openssl_lib=$openssl_dir/lib
+        openssl_ssl=$openssl_dir/ssl
+        curl -L "https://www.openssl.org/source/$openssl_targz" -o $openssl_targz
+        tar -xf $openssl_targz
+        ( cd $openssl_name; \
+          ./config --prefix=$openssl_dir --openssldir=${openssl_dir}/ssl \
+            --libdir=lib -Wl,-rpath=${openssl_dir}/lib shared zlib-dynamic && \
+          make && \
+          make install )
+        rm -rf $openssl_name
+        rmdir $openssl_ssl/certs && ln -s /etc/ssl/certs $openssl_ssl/certs
+
+        # Download PyEnv from its GitHub repository.
+        export PYENV_ROOT=${{ env.PYENV_ROOT }}
+        export PATH=$PYENV_ROOT/bin:$PATH
+        git clone https://github.com/pyenv/pyenv.git $PYENV_ROOT
+        eval "$(pyenv init --path)"
+
+        # Prevent pyenv build trying (and failing) to update pip
+        export GET_PIP=get-pip-2.6.py
+        echo 'import sys; sys.exit(0)' > ${GET_PIP}
+        GET_PIP=$(realpath $GET_PIP)
+
+        # Build and install Python
+        export CFLAGS="-I$openssl_inc"
+        export LDFLAGS="-L$openssl_lib"
+        export LD_LIBRARY_PATH="$openssl_lib"
+        pyenv install 2.6.9
+        echo "PYTHONHOME=${PYENV_ROOT}" >> "$GITHUB_ENV"
+        echo "PATH=$PYENV_ROOT/bin:$PATH" >> "$GITHUB_ENV"
+    - name: Set up cached Python 2.6
+      if: ${{ steps.cache26.outputs.cache-hit }}
+      run: |
+        export PYENV_ROOT
+        export PATH=$PYENV_ROOT/bin:$PATH
+        eval "$(pyenv init --path)"
+        pyenv local 2.6.9
+        echo "PYTHONHOME=${PYENV_ROOT}" >> "$GITHUB_ENV"
+        echo "PATH=$PYENV_ROOT/bin:$PATH" >> "$GITHUB_ENV"
+    #-------- Jython ------
     - name: Set up Java 8
       if: ${{ matrix.python-impl == 'jython' }}
       uses: actions/setup-java@v2
       with:
         java-version: 8
         distribution: 'zulu'
-    - name: Install Jython
+    - name: Setup Jython environment
       if: ${{ matrix.python-impl == 'jython' }}
       run: |
-        wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
-        java -jar jython-installer.jar -s -d "$HOME/jython"
-        echo "$HOME/jython/bin" >> $GITHUB_PATH
-    - name: Install nose
-      if: ${{ matrix.python-impl != 'jython' }}
-      run: pip install nose
+        echo "JYTHON_ROOT=${HOME}/jython" >> "$GITHUB_ENV"
+    - name: Cache Jython
+      id: cachejy
+      if: ${{ matrix.python-impl == 'jython' }}
+      uses: actions/cache@v3
+      with:
+        # 2.7.3 now available, may solve SNI issue
+        key: jython-2.7.1
+        path: |
+          ${{ env.JYTHON_ROOT }}
+    - name: Install Jython
+      if: ${{ matrix.python-impl == 'jython' && ! steps.cachejy.outputs.cache-hit }}
+      run: |
+        JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
+        curl -L "https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar" -o jython-installer.jar
+        java -jar jython-installer.jar -s -d "${JYTHON_ROOT}"
+        echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH
+    - name: Set up cached Jython
+      if: ${{ steps.cachejy.outputs.cache-hit }}
+      run: |
+        JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
+        echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH
+    #-------- pip ---------
+    - name: Set up supported Python ${{ matrix.python-version }} pip
+      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.6' || matrix.python-version == '2.7' }}
+      # This step may run in either Linux or Windows
+      shell: bash
+      run: |
+        echo "$PATH"
+        echo "$PYTHONHOME"
+        # curl is available on both Windows and Linux, -L follows redirects, -O gets name
+        python -m ensurepip || python -m pip --version || { \
+          get_pip="${{ contains(needs.select.outputs.own-pip-versions, matrix.python-version) && format('{0}/', matrix.python-version) || '' }}"; \
+          curl -L -O "https://bootstrap.pypa.io/pip/${get_pip}get-pip.py"; \
+          python get-pip.py; }
+    - name: Set up other Python ${{ matrix.python-version }} pip
+      if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }}
+      shell: bash
+      run: |
+        # https://files.pythonhosted.org/packages/8a/e9/8468cd68b582b06ef554be0b96b59f59779627131aad48f8a5bce4b13450/wheel-0.29.0-py2.py3-none-any.whl
+        # https://files.pythonhosted.org/packages/06/4b/86a670fd21f7849adb092e40883c48dcd0d66b8a878fc8d63b7f0ea04213/setuptools-29.0.1-py2.py3-none-any.whl
+        python -m pip --version || { \
+          curl -L -O "https://bootstrap.pypa.io/pip/3.2/get-pip.py"; \
+          curl -L -O "https://files.pythonhosted.org/packages/b2/d0/cd115fe345dd6f07ec1c780020a7dfe74966fceeb171e0f20d1d4905b0b7/pip-7.1.2-py2.py3-none-any.whl"; \
+          python -v get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; }
+
+    #-------- nose --------
+    - name: Install nose for Python ${{ matrix.python-version }}
+      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.6' || matrix.python-version == '2.7' }}
+      shell: bash
+      run: |
+        echo "$PATH"
+        echo "$PYTHONHOME"
+        python --version
+        python -m pip --version
+        python -m pip nose --version || python -m pip install nose
+    - name: Install nose for other Python ${{ matrix.python-version }}
+      if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }}
+      shell: bash
+      run: |
+        python -m pip nose --version || { \
+          curl -L -O "https://files.pythonhosted.org/packages/15/d8/dd071918c040f50fa1cf80da16423af51ff8ce4a0f2399b7bf8de45ac3d9/nose-1.3.7-py3-none-any.whl"; \
+          python --version; \
+          printf '%s\n' \
+            'import sys' \
+            'print(sys.path)' \
+            | python -; \
+          python -m pip --version; \
+          python -m pip install nose-1.3.7-py3-none-any.whl; }
     - name: Install nose (Jython)
       if: ${{ matrix.python-impl == 'jython' }}
-      # Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
+      # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
       run: |
-        wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl
-        pip install nose-1.3.7-py2-none-any.whl
+        pip nose --version || { \
+          curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \
+          pip --version; \
+          pip install nose-1.3.7-py2-none-any.whl; }
+    - name: Set up nosetest test
+      if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }}
+      shell: bash
+      run: |
+        # define a test to validate the Python version used by nosetests
+        printf '%s\n' \
+          'from __future__ import unicode_literals' \
+          'import sys, os, platform, unittest' \
+          'class TestPython(unittest.TestCase):' \
+          '    def setUp(self):' \
+          '        self.ver = os.environ["PYTHON_VER"].split("-")' \
+          '    def test_python_ver(self):' \
+          '        self.assertEqual(sys.version[:3], self.ver[-1])' \
+          '        self.assertTrue(sys.version.startswith(self.ver[-1]))' \
+          '        self.assertIn(self.ver[0], sys.version.lower())' \
+          '    def test_python_impl(self):' \
+          '        self.assertIn(platform.python_implementation().lower(), (os.environ["PYTHON_IMPL"], self.ver[0]))' \
+          > test/test_python.py
+    #-------- TESTS -------
     - name: Run tests
+      if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }}
       continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
       env:
         YTDL_TEST_SET: ${{ matrix.ytdl-test-set }}
-      run: ./devscripts/run_tests.${{ matrix.run-tests-ext }}
+        PYTHON_VER: ${{ matrix.python-version }}
+        PYTHON_IMPL: ${{ matrix.python-impl }}
+
+      run: |
+        ./devscripts/run_tests.${{ matrix.run-tests-ext }}
+
   flake8:
     name: Linter
     runs-on: ubuntu-latest
@@ -81,3 +357,4 @@ jobs:
       run: pip install flake8
     - name: Run flake8
       run: flake8 .
+
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index edc19183d..4bddca047 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -6,6 +6,10 @@ import os
 from os.path import dirname as dirn
 import sys
 
+from youtube_dl.compat import compat_register_utf8
+
+compat_register_utf8()
+
 print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
 
 sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
diff --git a/test/test_execution.py b/test/test_execution.py
index 704e14612..1dee53a0f 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -10,10 +10,13 @@ import os
 import subprocess
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+from youtube_dl.compat import compat_register_utf8
+
 from youtube_dl.utils import encodeArgument
 
 rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
+compat_register_utf8()
 
 try:
     _DEV_NULL = subprocess.DEVNULL
@@ -25,13 +28,14 @@ class TestExecution(unittest.TestCase):
     def test_import(self):
         subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir)
 
+    @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution')
     def test_module_exec(self):
-        if sys.version_info >= (2, 7):  # Python 2.6 doesn't support package execution
-            subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+        subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL)
 
     def test_main_exec(self):
         subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
 
+    @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution')
     def test_cmdline_umlauts(self):
         p = subprocess.Popen(
             [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'],
diff --git a/test/test_unicode_literals.py b/test/test_unicode_literals.py
index 6c1b7ec91..c7c2252f5 100644
--- a/test/test_unicode_literals.py
+++ b/test/test_unicode_literals.py
@@ -15,6 +15,7 @@ IGNORED_FILES = [
     'setup.py',  # http://bugs.python.org/issue13943
     'conf.py',
     'buildserver.py',
+    'get-pip.py',
 ]
 
 IGNORED_DIRS = [
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index e1bd67919..cc8285eba 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -5,7 +5,6 @@ from __future__ import unicode_literals
 
 __license__ = 'Public Domain'
 
-import codecs
 import io
 import os
 import random
@@ -17,6 +16,7 @@ from .options import (
 )
 from .compat import (
     compat_getpass,
+    compat_register_utf8,
     compat_shlex_split,
     workaround_optparse_bug9161,
 )
@@ -46,10 +46,8 @@ from .YoutubeDL import YoutubeDL
 
 
 def _real_main(argv=None):
-    # Compatibility fixes for Windows
-    if sys.platform == 'win32':
-        # https://github.com/ytdl-org/youtube-dl/issues/820
-        codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
+    # Compatibility fix for Windows
+    compat_register_utf8()
 
     workaround_optparse_bug9161()
 
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index fe62caf80..0f4d3756f 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -31,13 +31,17 @@ try:
     compat_str, compat_basestring, compat_chr = (
         unicode, basestring, unichr
     )
-    from .casefold import casefold as compat_casefold
-
 except NameError:
     compat_str, compat_basestring, compat_chr = (
         str, str, chr
     )
+
+# casefold
+try:
+    compat_str.casefold
     compat_casefold = lambda s: s.casefold()
+except AttributeError:
+    from .casefold import casefold as compat_casefold
 
 try:
     import collections.abc as compat_collections_abc
@@ -3137,6 +3141,15 @@ else:
     compat_open = open
 
 
+# compat_register_utf8
+def compat_register_utf8():
+    if sys.platform == 'win32':
+        # https://github.com/ytdl-org/youtube-dl/issues/820
+        from codecs import register, lookup
+        register(
+            lambda name: lookup('utf-8') if name == 'cp65001' else None)
+
+
 legacy = [
     'compat_HTMLParseError',
     'compat_HTMLParser',
@@ -3203,6 +3216,7 @@ __all__ = [
     'compat_print',
     'compat_re_Match',
     'compat_re_Pattern',
+    'compat_register_utf8',
     'compat_setenv',
     'compat_shlex_quote',
     'compat_shlex_split',

From b08a58090635777f1001d5cde2cd141a5565177c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 30 Jun 2023 03:52:39 +0100
Subject: [PATCH 239/312] [workflows/ci.yml] Fix test support for Py 2.6

---
 .github/workflows/ci.yml           | 115 ++++++++++++++++++-----------
 devscripts/make_lazy_extractors.py |   8 +-
 test/test_execution.py             |  16 ++--
 3 files changed, 83 insertions(+), 56 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8d8e654fb..ce878c1b1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -6,9 +6,8 @@ env:
   all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9
   main-cpython-versions: 2.7, 3.2, 3.5, 3.9
   pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7
-  cpython-versions: all
-  # test-set: both
-  test-set: core
+  cpython-versions: main
+  test-set: both
 
 on:
   push:
@@ -75,6 +74,10 @@ jobs:
       contents: read
       packages: write
     runs-on: ${{ matrix.os }}
+    env:
+      PIP: python -m pip
+      PIP_DISABLE_PIP_VERSION_CHECK: true
+      PIP_NO_PYTHON_VERSION_WARNING: true
     strategy:
       fail-fast: true
       matrix:
@@ -152,12 +155,14 @@ jobs:
     - name: Set up Python 2.7
       if: ${{ matrix.python-version == '2.7' }}
       # install 2.7
+      shell: bash
       run: |
         sudo apt-get install -y python2 python-is-python2
         echo "PYTHONHOME=/usr" >> "$GITHUB_ENV"
     #-------- Python 2.6 --
     - name: Set up Python 2.6 environment
       if: ${{ matrix.python-version == '2.6' }}
+      shell: bash
       run: |
         openssl_name=openssl-1.0.2u
         echo "openssl_name=${openssl_name}" >> "$GITHUB_ENV"
@@ -178,6 +183,7 @@ jobs:
     - name: Build and set up Python 2.6
       if: ${{ matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }}
       # dl and build locally
+      shell: bash
       run: |
         # Install build environment
         sudo apt-get install -y build-essential llvm libssl-dev tk-dev  \
@@ -203,8 +209,7 @@ jobs:
         # Download PyEnv from its GitHub repository.
         export PYENV_ROOT=${{ env.PYENV_ROOT }}
         export PATH=$PYENV_ROOT/bin:$PATH
-        git clone https://github.com/pyenv/pyenv.git $PYENV_ROOT
-        eval "$(pyenv init --path)"
+        git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
 
         # Prevent pyenv build trying (and failing) to update pip
         export GET_PIP=get-pip-2.6.py
@@ -216,17 +221,14 @@ jobs:
         export LDFLAGS="-L$openssl_lib"
         export LD_LIBRARY_PATH="$openssl_lib"
         pyenv install 2.6.9
-        echo "PYTHONHOME=${PYENV_ROOT}" >> "$GITHUB_ENV"
-        echo "PATH=$PYENV_ROOT/bin:$PATH" >> "$GITHUB_ENV"
-    - name: Set up cached Python 2.6
-      if: ${{ steps.cache26.outputs.cache-hit }}
+    - name: Locate Python 2.6
+      if: ${{ matrix.python-version == '2.6' }}
+      shell: bash
       run: |
-        export PYENV_ROOT
-        export PATH=$PYENV_ROOT/bin:$PATH
-        eval "$(pyenv init --path)"
-        pyenv local 2.6.9
-        echo "PYTHONHOME=${PYENV_ROOT}" >> "$GITHUB_ENV"
-        echo "PATH=$PYENV_ROOT/bin:$PATH" >> "$GITHUB_ENV"
+        PYTHONHOME="${{ env.PYENV_ROOT }}/versions/2.6.9"
+        echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV"
+        echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV"
+        echo "LD_LIBRARY_PATH=${{ env.openssl_dir }}/lib${LD_LIBRARY_PATH:+:}${LD_LIBRARY_PATH}" >> "$GITHUB_ENV"
     #-------- Jython ------
     - name: Set up Java 8
       if: ${{ matrix.python-impl == 'jython' }}
@@ -236,8 +238,10 @@ jobs:
         distribution: 'zulu'
     - name: Setup Jython environment
       if: ${{ matrix.python-impl == 'jython' }}
+      shell: bash
       run: |
         echo "JYTHON_ROOT=${HOME}/jython" >> "$GITHUB_ENV"
+        echo "PIP=pip" >> "$GITHUB_ENV"
     - name: Cache Jython
       id: cachejy
       if: ${{ matrix.python-impl == 'jython' }}
@@ -249,19 +253,21 @@ jobs:
           ${{ env.JYTHON_ROOT }}
     - name: Install Jython
       if: ${{ matrix.python-impl == 'jython' && ! steps.cachejy.outputs.cache-hit }}
+      shell: bash
       run: |
         JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
         curl -L "https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar" -o jython-installer.jar
         java -jar jython-installer.jar -s -d "${JYTHON_ROOT}"
-        echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH
+        echo "${JYTHON_ROOT}/bin" >> "$GITHUB_PATH"
     - name: Set up cached Jython
       if: ${{ steps.cachejy.outputs.cache-hit }}
+      shell: bash
       run: |
         JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
         echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH
     #-------- pip ---------
     - name: Set up supported Python ${{ matrix.python-version }} pip
-      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.6' || matrix.python-version == '2.7' }}
+      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }}
       # This step may run in either Linux or Windows
       shell: bash
       run: |
@@ -272,48 +278,66 @@ jobs:
           get_pip="${{ contains(needs.select.outputs.own-pip-versions, matrix.python-version) && format('{0}/', matrix.python-version) || '' }}"; \
           curl -L -O "https://bootstrap.pypa.io/pip/${get_pip}get-pip.py"; \
           python get-pip.py; }
+    - name: Set up Python 2.6 pip
+      if: ${{ matrix.python-version == '2.6' }}
+      shell: bash
+      run: |
+        python -m pip --version || { \
+          curl -L -O "https://bootstrap.pypa.io/pip/2.6/get-pip.py"; \
+          curl -L -O "https://files.pythonhosted.org/packages/ac/95/a05b56bb975efa78d3557efa36acaf9cf5d2fd0ee0062060493687432e03/pip-9.0.3-py2.py3-none-any.whl"; \
+          python get-pip.py --no-setuptools --no-wheel pip-9.0.3-py2.py3-none-any.whl; }
+        # work-around to invoke pip module on 2.6: https://bugs.python.org/issue2751
+        echo "PIP=python -m pip.__main__" >> "$GITHUB_ENV"
     - name: Set up other Python ${{ matrix.python-version }} pip
       if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }}
       shell: bash
       run: |
-        # https://files.pythonhosted.org/packages/8a/e9/8468cd68b582b06ef554be0b96b59f59779627131aad48f8a5bce4b13450/wheel-0.29.0-py2.py3-none-any.whl
-        # https://files.pythonhosted.org/packages/06/4b/86a670fd21f7849adb092e40883c48dcd0d66b8a878fc8d63b7f0ea04213/setuptools-29.0.1-py2.py3-none-any.whl
         python -m pip --version || { \
           curl -L -O "https://bootstrap.pypa.io/pip/3.2/get-pip.py"; \
           curl -L -O "https://files.pythonhosted.org/packages/b2/d0/cd115fe345dd6f07ec1c780020a7dfe74966fceeb171e0f20d1d4905b0b7/pip-7.1.2-py2.py3-none-any.whl"; \
-          python -v get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; }
-
+          python get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; }
+    #-------- unittest ----
+    - name: Upgrade Unittest for Python 2.6
+      if: ${{ matrix.python-version == '2.6' }}
+      shell: bash
+      run: |
+        # see pip for Jython
+        $PIP -qq show unittest2 || { \
+          for u in "65/26/32b8464df2a97e6dd1b656ed26b2c194606c16fe163c695a992b36c11cdf/six-1.13.0-py2.py3-none-any.whl" \
+              "f2/94/3af39d34be01a24a6e65433d19e107099374224905f1e0cc6bbe1fd22a2f/argparse-1.4.0-py2.py3-none-any.whl" \
+              "c7/a3/c5da2a44c85bfbb6eebcfc1dde24933f8704441b98fdde6528f4831757a6/linecache2-1.0.0-py2.py3-none-any.whl" \
+              "17/0a/6ac05a3723017a967193456a2efa0aa9ac4b51456891af1e2353bb9de21e/traceback2-1.4.0-py2.py3-none-any.whl" \
+              "72/20/7f0f433060a962200b7272b8c12ba90ef5b903e218174301d0abfd523813/unittest2-1.1.0-py2.py3-none-any.whl"; do \
+            curl -L -O "https://files.pythonhosted.org/packages/${u}"; \
+            $PIP install ${u##*/}; \
+          done; }
+        # make tests use unittest2
+        for test in ./test/test_*.py; do
+          sed -r -i -e '/^import unittest$/s/test/test2 as unittest/' "$test"
+        done
     #-------- nose --------
     - name: Install nose for Python ${{ matrix.python-version }}
-      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.6' || matrix.python-version == '2.7' }}
+      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }}
       shell: bash
       run: |
         echo "$PATH"
         echo "$PYTHONHOME"
-        python --version
-        python -m pip --version
-        python -m pip nose --version || python -m pip install nose
-    - name: Install nose for other Python ${{ matrix.python-version }}
+        $PIP -qq show nose || $PIP install nose
+    - name: Install nose for other Python 2
+      if: ${{ matrix.python-impl == 'jython' || matrix.python-version == '2.6' }}
+      shell: bash
+      run: |
+        # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
+        $PIP -qq show nose || { \
+          curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \
+          $PIP install nose-1.3.7-py2-none-any.whl; }
+    - name: Install nose for other Python 3
       if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }}
       shell: bash
       run: |
-        python -m pip nose --version || { \
+        $PIP -qq show nose || { \
           curl -L -O "https://files.pythonhosted.org/packages/15/d8/dd071918c040f50fa1cf80da16423af51ff8ce4a0f2399b7bf8de45ac3d9/nose-1.3.7-py3-none-any.whl"; \
-          python --version; \
-          printf '%s\n' \
-            'import sys' \
-            'print(sys.path)' \
-            | python -; \
-          python -m pip --version; \
-          python -m pip install nose-1.3.7-py3-none-any.whl; }
-    - name: Install nose (Jython)
-      if: ${{ matrix.python-impl == 'jython' }}
-      # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
-      run: |
-        pip nose --version || { \
-          curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \
-          pip --version; \
-          pip install nose-1.3.7-py2-none-any.whl; }
+          $PIP install nose-1.3.7-py3-none-any.whl; }
     - name: Set up nosetest test
       if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }}
       shell: bash
@@ -321,7 +345,11 @@ jobs:
         # define a test to validate the Python version used by nosetests
         printf '%s\n' \
           'from __future__ import unicode_literals' \
-          'import sys, os, platform, unittest' \
+          'import sys, os, platform' \
+          'try:' \
+          '    import unittest2 as unittest' \
+          'except ImportError:' \
+          '    import unittest' \
           'class TestPython(unittest.TestCase):' \
           '    def setUp(self):' \
           '        self.ver = os.environ["PYTHON_VER"].split("-")' \
@@ -340,7 +368,6 @@ jobs:
         YTDL_TEST_SET: ${{ matrix.ytdl-test-set }}
         PYTHON_VER: ${{ matrix.python-version }}
         PYTHON_IMPL: ${{ matrix.python-impl }}
-
       run: |
         ./devscripts/run_tests.${{ matrix.run-tests-ext }}
 
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index 4bddca047..a8b6ff1b9 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -6,10 +6,6 @@ import os
 from os.path import dirname as dirn
 import sys
 
-from youtube_dl.compat import compat_register_utf8
-
-compat_register_utf8()
-
 print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
 
 sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
@@ -23,6 +19,10 @@ try:
 except OSError:
     pass
 
+from youtube_dl.compat import compat_register_utf8
+
+compat_register_utf8()
+
 from youtube_dl.extractor import _ALL_CLASSES
 from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
 
diff --git a/test/test_execution.py b/test/test_execution.py
index 1dee53a0f..35e7a5651 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -11,13 +11,12 @@ import subprocess
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from youtube_dl.compat import compat_register_utf8
-
 from youtube_dl.utils import encodeArgument
 
-rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-
 compat_register_utf8()
 
+rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
 try:
     _DEV_NULL = subprocess.DEVNULL
 except AttributeError:
@@ -33,21 +32,22 @@ class TestExecution(unittest.TestCase):
         subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL)
 
     def test_main_exec(self):
-        subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+        subprocess.check_call([sys.executable, os.path.normpath('youtube_dl/__main__.py'), '--version'], cwd=rootDir, stdout=_DEV_NULL)
 
     @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution')
     def test_cmdline_umlauts(self):
+        os.environ['PYTHONIOENCODING'] = 'utf-8'
         p = subprocess.Popen(
-            [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'],
+            [sys.executable, os.path.normpath('youtube_dl/__main__.py'), encodeArgument('ä'), '--version'],
             cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
         _, stderr = p.communicate()
         self.assertFalse(stderr)
 
     def test_lazy_extractors(self):
-        lazy_extractors = 'youtube_dl/extractor/lazy_extractors.py'
+        lazy_extractors = os.path.normpath('youtube_dl/extractor/lazy_extractors.py')
         try:
-            subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', lazy_extractors], cwd=rootDir, stdout=_DEV_NULL)
-            subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
+            subprocess.check_call([sys.executable, os.path.normpath('devscripts/make_lazy_extractors.py'), lazy_extractors], cwd=rootDir, stdout=_DEV_NULL)
+            subprocess.check_call([sys.executable, os.path.normpath('test/test_all_urls.py')], cwd=rootDir, stdout=_DEV_NULL)
         finally:
             for x in ['', 'c'] if sys.version_info[0] < 3 else ['']:
                 try:

From f24bc9272e9b74efc4c4af87c862f5f78921d424 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 4 Jul 2023 16:06:21 +0100
Subject: [PATCH 240/312] [Misc] Fixes for 2.6 compatibility

---
 test/test_jsinterp.py   | 10 ++++++----
 test/test_utils.py      |  2 +-
 youtube_dl/YoutubeDL.py |  6 +++++-
 youtube_dl/compat.py    | 12 ++++++++++++
 youtube_dl/jsinterp.py  | 13 ++++++++++++-
 youtube_dl/utils.py     |  3 ++-
 6 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index ecd6ab3c9..91b12f544 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -492,10 +492,12 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('''
         function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
         ''')
-        attrs = set(('findall', 'finditer', 'flags', 'groupindex',
-                     'groups', 'match', 'pattern', 'scanner',
-                     'search', 'split', 'sub', 'subn'))
-        self.assertTrue(set(dir(jsi.call_function('x'))) > attrs)
+        attrs = set(('findall', 'finditer', 'match', 'scanner', 'search',
+                     'split', 'sub', 'subn'))
+        if sys.version_info >= (2, 7):
+            # documented for 2.6 but may not be found
+            attrs.update(('flags', 'groupindex', 'groups', 'pattern'))
+        self.assertSetEqual(set(dir(jsi.call_function('x'))) & attrs, attrs)
 
         jsi = JSInterpreter('''
         function x() { let a=/,,[/,913,/](,)}/i; return a; }
diff --git a/test/test_utils.py b/test/test_utils.py
index b85d397d0..5fab05f7c 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1612,7 +1612,7 @@ Line 1
         self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
                          [_TEST_DATA['urls']],
                          msg='function as query key should perform a filter based on (key, value)')
-        self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), {'str'},
+        self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), ('str',),
                               msg='exceptions in the query function should be caught')
 
         # Test alternative paths
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 98b878fc1..068029d3e 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -25,7 +25,11 @@ import tokenize
 import traceback
 import random
 
-from ssl import OPENSSL_VERSION
+try:
+    from ssl import OPENSSL_VERSION
+except ImportError:
+    # Must be Python 2.6, should be built against 1.0.2
+    OPENSSL_VERSION = 'OpenSSL 1.0.2(?)'
 from string import ascii_letters
 
 from .compat import (
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 0f4d3756f..2554fd1c3 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -1,10 +1,12 @@
 # coding: utf-8
 from __future__ import unicode_literals
+from __future__ import division
 
 import base64
 import binascii
 import collections
 import ctypes
+import datetime
 import email
 import getpass
 import io
@@ -3150,6 +3152,15 @@ def compat_register_utf8():
             lambda name: lookup('utf-8') if name == 'cp65001' else None)
 
 
+# compat_datetime_timedelta_total_seconds
+try:
+    compat_datetime_timedelta_total_seconds = datetime.timedelta.total_seconds
+except AttributeError:
+    # Py 2.6
+    def compat_datetime_timedelta_total_seconds(td):
+        return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
+
+
 legacy = [
     'compat_HTMLParseError',
     'compat_HTMLParser',
@@ -3187,6 +3198,7 @@ __all__ = [
     'compat_chr',
     'compat_collections_abc',
     'compat_collections_chain_map',
+    'compat_datetime_timedelta_total_seconds',
     'compat_http_cookiejar',
     'compat_http_cookiejar_Cookie',
     'compat_http_cookies',
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 1ba9c3d67..882432b80 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -277,9 +277,20 @@ class JSInterpreter(object):
 
         def __getattr__(self, name):
             self.__instantiate()
+            # make Py 2.6 conform to its lying documentation
+            if name == 'flags':
+                self.flags = self.__flags
+            elif name == 'pattern':
+                self.pattern = self.__pattern_txt
+            elif name in ('groupindex', 'groups'):
+                # in case these get set after a match?
+                if hasattr(self.__self, name):
+                    setattr(self, name, getattr(self.__self, name))
+                else:
+                    return 0 if name == 'groupindex' else {}
             if hasattr(self, name):
                 return getattr(self, name)
-            return super(JSInterpreter.JS_RegExp, self).__getattr__(name)
+            raise AttributeError('{0} has no attribute named {1}'.format(self, name))
 
         @classmethod
         def regex_flags(cls, expr):
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 584581b6a..83f67bd95 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -47,6 +47,7 @@ from .compat import (
     compat_collections_abc,
     compat_cookiejar,
     compat_ctypes_WINFUNCTYPE,
+    compat_datetime_timedelta_total_seconds,
     compat_etree_fromstring,
     compat_expanduser,
     compat_html_entities,
@@ -3102,7 +3103,7 @@ def unified_timestamp(date_str, day_first=True):
             pass
     timetuple = email.utils.parsedate_tz(date_str)
     if timetuple:
-        return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
+        return calendar.timegm(timetuple) + pm_delta * 3600 - compat_datetime_timedelta_total_seconds(timezone)
 
 
 def determine_ext(url, default_ext='unknown_video'):

From b6dff4073d469cceadb099c00ccbf3bd6fc515a6 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 7 Jul 2023 18:41:32 +0100
Subject: [PATCH 241/312] [core] Revert version display from b8a86dc

---
 youtube_dl/YoutubeDL.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 068029d3e..4e7fd1063 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -2378,10 +2378,12 @@ class YoutubeDL(object):
                 self.get_encoding()))
         write_string(encoding_str, encoding=None)
 
-        self._write_string('[debug] youtube-dl version ' + __version__ + (' (single file build)\n' if ytdl_is_updateable() else '\n'))
+        writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))
+        writeln_debug('youtube-dl version ', __version__)
         if _LAZY_LOADER:
-            self._write_string('[debug] Lazy loading extractors enabled\n')
-        writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))  # moved down for easier merge
+            writeln_debug('Lazy loading extractors enabled')
+        if ytdl_is_updateable():
+            writeln_debug('Single file build')
         try:
             sp = subprocess.Popen(
                 ['git', 'rev-parse', '--short', 'HEAD'],

From f47fdb9564d3ca1c0fa70ed6031148ec908fdc7b Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 6 Jul 2023 15:46:22 +0100
Subject: [PATCH 242/312] [utils] Add {expected_type} and Iterable support to
 traverse_obj()

---
 test/test_utils.py  | 153 ++++++++++++++++++++++++++------
 youtube_dl/utils.py | 211 +++++++++++++++++++++++++++++---------------
 2 files changed, 265 insertions(+), 99 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index 5fab05f7c..1fc16ed05 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -79,10 +79,12 @@ from youtube_dl.utils import (
     rot47,
     shell_quote,
     smuggle_url,
+    str_or_none,
     str_to_int,
     strip_jsonp,
     strip_or_none,
     subtitles_filename,
+    T,
     timeconvert,
     traverse_obj,
     try_call,
@@ -1566,6 +1568,7 @@ Line 1
         self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
 
     def test_traverse_obj(self):
+        str = compat_str
         _TEST_DATA = {
             100: 100,
             1.2: 1.2,
@@ -1598,8 +1601,8 @@ Line 1
 
         # Test Ellipsis behavior
         self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
-                              (item for item in _TEST_DATA.values() if item is not None),
-                              msg='`...` should give all values except `None`')
+                              (item for item in _TEST_DATA.values() if item not in (None, {})),
+                              msg='`...` should give all non discarded values')
         self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
                               msg='`...` selection for dicts should select all values')
         self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
@@ -1607,13 +1610,51 @@ Line 1
                          msg='nested `...` queries should work')
         self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), range(4),
                               msg='`...` query result should be flattened')
+        self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)),
+                         msg='`...` should accept iterables')
 
         # Test function as key
         self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
                          [_TEST_DATA['urls']],
                          msg='function as query key should perform a filter based on (key, value)')
-        self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), ('str',),
-                              msg='exceptions in the query function should be caught')
+        self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'},
+                              msg='exceptions in the query function should be catched')
+        self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
+                         msg='function key should accept iterables')
+        if __debug__:
+            with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
+                traverse_obj(_TEST_DATA, lambda a: Ellipsis)
+            with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
+                traverse_obj(_TEST_DATA, lambda a, b, c: Ellipsis)
+
+        # Test set as key (transformation/type, like `expected_type`)
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper), )), ['STR'],
+                         msg='Function in set should be a transformation')
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str))), ['str'],
+                         msg='Type in set should be a type filter')
+        self.assertEqual(traverse_obj(_TEST_DATA, T(dict)), _TEST_DATA,
+                         msg='A single set should be wrapped into a path')
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper))), ['STR'],
+                         msg='Transformation function should not raise')
+        self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str_or_none))),
+                         [item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
+                         msg='Function in set should be a transformation')
+        if __debug__:
+            with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
+                traverse_obj(_TEST_DATA, set())
+            with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
+                traverse_obj(_TEST_DATA, {str.upper, str})
+
+        # Test `slice` as a key
+        _SLICE_DATA = [0, 1, 2, 3, 4]
+        self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
+                         msg='slice on a dictionary should not throw')
+        self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
+                         msg='slice key should apply slice to sequence')
+        self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
+                         msg='slice key should apply slice to sequence')
+        self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
+                         msg='slice key should apply slice to sequence')
 
         # Test alternative paths
         self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
@@ -1659,15 +1700,23 @@ Line 1
                          {0: ['https://www.example.com/1', 'https://www.example.com/0']},
                          msg='triple nesting in dict path should be treated as branches')
         self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
-                         msg='remove `None` values when dict key')
+                         msg='remove `None` values when top level dict key fails')
         self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
-                         msg='do not remove `None` values if `default`')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {0: {}},
-                         msg='do not remove empty values when dict key')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: {}},
-                         msg='do not remove empty values when dict key and a default')
-        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {0: []},
-                         msg='if branch in dict key not successful, return `[]`')
+                         msg='use `default` if key fails and `default`')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
+                         msg='remove empty values when dict key')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis},
+                         msg='use `default` when dict key and `default`')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
+                         msg='remove empty values when nested dict key fails')
+        self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
+                         msg='default to dict if pruned')
+        self.assertEqual(traverse_obj(None, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
+                         msg='default to dict if pruned and default is given')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=Ellipsis), {0: {0: Ellipsis}},
+                         msg='use nested `default` when nested dict key fails and `default`')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {},
+                         msg='remove key if branch in dict key not successful')
 
         # Testing default parameter behavior
         _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
@@ -1691,20 +1740,55 @@ Line 1
                          msg='if branched but not successful return `[]`, not `default`')
         self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
                          msg='if branched but object is empty return `[]`, not `default`')
+        self.assertEqual(traverse_obj(None, Ellipsis), [],
+                         msg='if branched but object is `None` return `[]`, not `default`')
+        self.assertEqual(traverse_obj({0: None}, (0, Ellipsis)), [],
+                         msg='if branched but state is `None` return `[]`, not `default`')
+
+        branching_paths = [
+            ('fail', Ellipsis),
+            (Ellipsis, 'fail'),
+            100 * ('fail',) + (Ellipsis,),
+            (Ellipsis,) + 100 * ('fail',),
+        ]
+        for branching_path in branching_paths:
+            self.assertEqual(traverse_obj({}, branching_path), [],
+                             msg='if branched but state is `None`, return `[]` (not `default`)')
+            self.assertEqual(traverse_obj({}, 'fail', branching_path), [],
+                             msg='if branching in last alternative and previous did not match, return `[]` (not `default`)')
+            self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x',
+                             msg='if branching in last alternative and previous did match, return single value')
+            self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x',
+                             msg='if branching in first alternative and non-branching path does match, return single value')
+            self.assertEqual(traverse_obj({}, branching_path, 'fail'), None,
+                             msg='if branching in first alternative and non-branching path does not match, return `default`')
 
         # Testing expected_type behavior
         _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
-        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=compat_str), 'str',
-                         msg='accept matching `expected_type` type')
-        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), None,
-                         msg='reject non matching `expected_type` type')
-        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: compat_str(x)), '0',
-                         msg='transform type using type function')
-        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str',
-                                      expected_type=lambda _: 1 / 0), None,
-                         msg='wrap expected_type function in try_call')
-        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=compat_str), ['str'],
-                         msg='eliminate items that expected_type fails on')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
+                         'str', msg='accept matching `expected_type` type')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
+                         None, msg='reject non matching `expected_type` type')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
+                         '0', msg='transform type using type function')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
+                         None, msg='wrap expected_type function in try_call')
+        self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=str),
+                         ['str'], msg='eliminate items that expected_type fails on')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int),
+                         {0: 100}, msg='type as expected_type should filter dict values')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
+                         {0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
+        self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int),
+                         1, msg='expected_type should not filter non final dict values')
+        self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
+                         {0: {0: 100}}, msg='expected_type should transform deep dict values')
+        self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)),
+                         [{0: Ellipsis}, {0: Ellipsis}], msg='expected_type should transform branched dict values')
+        self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int),
+                         [4], msg='expected_type regression for type matching in tuple branching')
+        self.assertEqual(traverse_obj(_TEST_DATA, ['data', Ellipsis], expected_type=int),
+                         [], msg='expected_type regression for type matching in dict result')
 
         # Test get_all behavior
         _GET_ALL_DATA = {'key': [0, 1, 2]}
@@ -1749,14 +1833,23 @@ Line 1
                                       _traverse_string=True), '.',
                          msg='traverse into converted data if `traverse_string`')
         self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
-                                      _traverse_string=True), list('str'),
-                         msg='`...` branching into string should result in list')
+                                      _traverse_string=True), 'str',
+                         msg='`...` should result in string (same value) if `traverse_string`')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
+                                      _traverse_string=True), 'sr',
+                         msg='`slice` should result in string if `traverse_string`')
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"),
+                                      _traverse_string=True), 'str',
+                         msg='function should result in string if `traverse_string`')
         self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
                                       _traverse_string=True), ['s', 'r'],
-                         msg='branching into string should result in list')
-        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda _, x: x),
-                                      _traverse_string=True), list('str'),
-                         msg='function branching into string should result in list')
+                         msg='branching should result in list if `traverse_string`')
+        self.assertEqual(traverse_obj({}, (0, Ellipsis), _traverse_string=True), [],
+                         msg='branching should result in list if `traverse_string`')
+        self.assertEqual(traverse_obj({}, (0, lambda x, y: True), _traverse_string=True), [],
+                         msg='branching should result in list if `traverse_string`')
+        self.assertEqual(traverse_obj({}, (0, slice(1)), _traverse_string=True), [],
+                         msg='branching should result in list if `traverse_string`')
 
         # Test is_user_input behavior
         _IS_USER_INPUT_DATA = {'range8': list(range(8))}
@@ -1793,6 +1886,8 @@ Line 1
                          msg='failing str key on a `re.Match` should return `default`')
         self.assertEqual(traverse_obj(mobj, 8), None,
                          msg='failing int key on a `re.Match` should return `default`')
+        self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
+                         msg='function on a `re.Match` should give group name as well')
 
     def test_get_first(self):
         self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 83f67bd95..dbdbe5f59 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -16,6 +16,7 @@ import email.header
 import errno
 import functools
 import gzip
+import inspect
 import io
 import itertools
 import json
@@ -3881,7 +3882,7 @@ def detect_exe_version(output, version_re=None, unrecognized='present'):
         return unrecognized
 
 
-class LazyList(compat_collections_abc.Sequence):
+class LazyList(compat_collections_abc.Iterable):
     """Lazy immutable list from an iterable
     Note that slices of a LazyList are lists and not LazyList"""
 
@@ -4223,10 +4224,16 @@ def multipart_encode(data, boundary=None):
     return out, content_type
 
 
-def variadic(x, allowed_types=(compat_str, bytes, dict)):
-    if not isinstance(allowed_types, tuple) and isinstance(allowed_types, compat_collections_abc.Iterable):
+def is_iterable_like(x, allowed_types=compat_collections_abc.Iterable, blocked_types=NO_DEFAULT):
+    if blocked_types is NO_DEFAULT:
+        blocked_types = (compat_str, bytes, compat_collections_abc.Mapping)
+    return isinstance(x, allowed_types) and not isinstance(x, blocked_types)
+
+
+def variadic(x, allowed_types=NO_DEFAULT):
+    if isinstance(allowed_types, compat_collections_abc.Iterable):
         allowed_types = tuple(allowed_types)
-    return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,)
+    return x if is_iterable_like(x, blocked_types=allowed_types) else (x,)
 
 
 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
@@ -5993,7 +6000,7 @@ def clean_podcast_url(url):
 
 def traverse_obj(obj, *paths, **kwargs):
     """
-    Safely traverse nested `dict`s and `Sequence`s
+    Safely traverse nested `dict`s and `Iterable`s
 
     >>> obj = [{}, {"key": "value"}]
     >>> traverse_obj(obj, (1, "key"))
@@ -6001,14 +6008,17 @@ def traverse_obj(obj, *paths, **kwargs):
 
     Each of the provided `paths` is tested and the first producing a valid result will be returned.
     The next path will also be tested if the path branched but no results could be found.
-    Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
-    A value of None is treated as the absence of a value.
+    Supported values for traversal are `Mapping`, `Iterable` and `re.Match`.
+    Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
 
     The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
 
     The keys in the path can be one of:
         - `None`:           Return the current object.
-        - `str`/`int`:      Return `obj[key]`. For `re.Match, return `obj.group(key)`.
+        - `set`:            Requires the only item in the set to be a type or function,
+                            like `{type}`/`{func}`. If a `type`, returns only values
+                            of this type. If a function, returns `func(obj)`.
+        - `str`/`int`:      Return `obj[key]`. For `re.Match`, return `obj.group(key)`.
         - `slice`:          Branch out and return all values in `obj[key]`.
         - `Ellipsis`:       Branch out and return a list of all values.
         - `tuple`/`list`:   Branch out and return a list of all matching values.
@@ -6016,6 +6026,9 @@ def traverse_obj(obj, *paths, **kwargs):
         - `function`:       Branch out and return values filtered by the function.
                             Read as: `[value for key, value in obj if function(key, value)]`.
                             For `Sequence`s, `key` is the index of the value.
+                            For `Iterable`s, `key` is the enumeration count of the value.
+                            For `re.Match`es, `key` is the group number (0 = full match)
+                            as well as additionally any group names, if given.
         - `dict`            Transform the current object and return a matching dict.
                             Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
 
@@ -6024,8 +6037,12 @@ def traverse_obj(obj, *paths, **kwargs):
     @params paths           Paths which to traverse by.
     Keyword arguments:
     @param default          Value to return if the paths do not match.
+                            If the last key in the path is a `dict`, it will apply to each value inside
+                            the dict instead, depth first. Try to avoid if using nested `dict` keys.
     @param expected_type    If a `type`, only accept final values of this type.
                             If any other callable, try to call the function on each result.
+                            If the last key in the path is a `dict`, it will apply to each value inside
+                            the dict instead, recursively. This does respect branching paths.
     @param get_all          If `False`, return the first matching result, otherwise all matching ones.
     @param casesense        If `False`, consider string dictionary keys as case insensitive.
 
@@ -6036,12 +6053,15 @@ def traverse_obj(obj, *paths, **kwargs):
     @param _traverse_string  Whether to traverse into objects as strings.
                             If `True`, any non-compatible object will first be
                             converted into a string and then traversed into.
+                            The return value of that path will be a string instead,
+                            not respecting any further branching.
 
 
     @returns                The result of the object traversal.
                             If successful, `get_all=True`, and the path branches at least once,
                             then a list of results is returned instead.
                             A list is always returned if the last path branches and no `default` is given.
+                            If a path ends on a `dict` that result will always be a `dict`.
     """
 
     # parameter defaults
@@ -6055,7 +6075,6 @@ def traverse_obj(obj, *paths, **kwargs):
     # instant compat
     str = compat_str
 
-    is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes))
     casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k
 
     if isinstance(expected_type, type):
@@ -6063,128 +6082,180 @@ def traverse_obj(obj, *paths, **kwargs):
     else:
         type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
 
+    def lookup_or_none(v, k, getter=None):
+        try:
+            return getter(v, k) if getter else v[k]
+        except IndexError:
+            return None
+
     def from_iterable(iterables):
         # chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
         for it in iterables:
             for item in it:
                 yield item
 
-    def apply_key(key, obj):
-        if obj is None:
-            return
+    def apply_key(key, obj, is_last):
+        branching = False
+
+        if obj is None and _traverse_string:
+            if key is Ellipsis or callable(key) or isinstance(key, slice):
+                branching = True
+                result = ()
+            else:
+                result = None
 
         elif key is None:
-            yield obj
+            result = obj
+
+        elif isinstance(key, set):
+            assert len(key) == 1, 'Set should only be used to wrap a single item'
+            item = next(iter(key))
+            if isinstance(item, type):
+                result = obj if isinstance(obj, item) else None
+            else:
+                result = try_call(item, args=(obj,))
 
         elif isinstance(key, (list, tuple)):
-            for branch in key:
-                _, result = apply_path(obj, branch)
-                for item in result:
-                    yield item
+            branching = True
+            result = from_iterable(
+                apply_path(obj, branch, is_last)[0] for branch in key)
 
         elif key is Ellipsis:
-            result = []
+            branching = True
             if isinstance(obj, compat_collections_abc.Mapping):
                 result = obj.values()
-            elif is_sequence(obj):
+            elif is_iterable_like(obj):
                 result = obj
             elif isinstance(obj, compat_re_Match):
                 result = obj.groups()
             elif _traverse_string:
+                branching = False
                 result = str(obj)
-            for item in result:
-                yield item
+            else:
+                result = ()
 
         elif callable(key):
-            if is_sequence(obj):
-                iter_obj = enumerate(obj)
-            elif isinstance(obj, compat_collections_abc.Mapping):
+            branching = True
+            if isinstance(obj, compat_collections_abc.Mapping):
                 iter_obj = obj.items()
+            elif is_iterable_like(obj):
+                iter_obj = enumerate(obj)
             elif isinstance(obj, compat_re_Match):
-                iter_obj = enumerate(itertools.chain([obj.group()], obj.groups()))
+                iter_obj = itertools.chain(
+                    enumerate(itertools.chain((obj.group(),), obj.groups())),
+                    obj.groupdict().items())
             elif _traverse_string:
+                branching = False
                 iter_obj = enumerate(str(obj))
             else:
-                return
-            for item in (v for k, v in iter_obj if try_call(key, args=(k, v))):
-                yield item
+                iter_obj = ()
+
+            result = (v for k, v in iter_obj if try_call(key, args=(k, v)))
+            if not branching:  # string traversal
+                result = ''.join(result)
 
         elif isinstance(key, dict):
-            iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
-            yield dict((k, v if v is not None else default) for k, v in iter_obj
-                       if v is not None or default is not NO_DEFAULT)
+            iter_obj = ((k, _traverse_obj(obj, v, False, is_last)) for k, v in key.items())
+            result = dict((k, v if v is not None else default) for k, v in iter_obj
+                          if v is not None or default is not NO_DEFAULT) or None
 
         elif isinstance(obj, compat_collections_abc.Mapping):
-            yield (obj.get(key) if casesense or (key in obj)
-                   else next((v for k, v in obj.items() if casefold(k) == key), None))
+            result = (try_call(obj.get, args=(key,))
+                      if casesense or try_call(obj.__contains__, args=(key,))
+                      else next((v for k, v in obj.items() if casefold(k) == key), None))
 
         elif isinstance(obj, compat_re_Match):
+            result = None
             if isinstance(key, int) or casesense:
-                try:
-                    yield obj.group(key)
-                    return
-                except IndexError:
-                    pass
-            if not isinstance(key, str):
-                return
+                result = lookup_or_none(obj, key, getter=compat_re_Match.group)
 
-            yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
+            elif isinstance(key, str):
+                result = next((v for k, v in obj.groupdict().items()
+                              if casefold(k) == key), None)
 
         else:
-            if _is_user_input:
-                key = (int_or_none(key) if ':' not in key
-                       else slice(*map(int_or_none, key.split(':'))))
+            result = None
+            if isinstance(key, (int, slice)):
+                if is_iterable_like(obj, compat_collections_abc.Sequence):
+                    branching = isinstance(key, slice)
+                    result = lookup_or_none(obj, key)
+                elif _traverse_string:
+                    result = lookup_or_none(str(obj), key)
 
-            if not isinstance(key, (int, slice)):
-                return
+        return branching, result if branching else (result,)
 
-            if not is_sequence(obj):
-                if not _traverse_string:
-                    return
-                obj = str(obj)
+    def lazy_last(iterable):
+        iterator = iter(iterable)
+        prev = next(iterator, NO_DEFAULT)
+        if prev is NO_DEFAULT:
+            return
 
-            try:
-                yield obj[key]
-            except IndexError:
-                pass
+        for item in iterator:
+            yield False, prev
+            prev = item
 
-    def apply_path(start_obj, path):
+        yield True, prev
+
+    def apply_path(start_obj, path, test_type):
         objs = (start_obj,)
         has_branched = False
 
-        for key in variadic(path):
-            if _is_user_input and key == ':':
-                key = Ellipsis
+        key = None
+        for last, key in lazy_last(variadic(path, (str, bytes, dict, set))):
+            if _is_user_input and isinstance(key, str):
+                if key == ':':
+                    key = Ellipsis
+                elif ':' in key:
+                    key = slice(*map(int_or_none, key.split(':')))
+                elif int_or_none(key) is not None:
+                    key = int(key)
 
             if not casesense and isinstance(key, str):
                 key = compat_casefold(key)
 
-            if key is Ellipsis or isinstance(key, (list, tuple)) or callable(key):
-                has_branched = True
+            if __debug__ and callable(key):
+                # Verify function signature
+                inspect.getcallargs(key, None, None)
 
-            key_func = functools.partial(apply_key, key)
-            objs = from_iterable(map(key_func, objs))
+            new_objs = []
+            for obj in objs:
+                branching, results = apply_key(key, obj, last)
+                has_branched |= branching
+                new_objs.append(results)
 
-        return has_branched, objs
+            objs = from_iterable(new_objs)
 
-    def _traverse_obj(obj, path, use_list=True):
-        has_branched, results = apply_path(obj, path)
-        results = LazyList(x for x in map(type_test, results) if x is not None)
+        if test_type and not isinstance(key, (dict, list, tuple)):
+            objs = map(type_test, objs)
+
+        return objs, has_branched, isinstance(key, dict)
+
+    def _traverse_obj(obj, path, allow_empty, test_type):
+        results, has_branched, is_dict = apply_path(obj, path, test_type)
+        results = LazyList(x for x in results if x not in (None, {}))
 
         if get_all and has_branched:
-            return results.exhaust() if results or use_list else None
+            if results:
+                return results.exhaust()
+            if allow_empty:
+                return [] if default is NO_DEFAULT else default
+            return None
 
-        return results[0] if results else None
+        return results[0] if results else {} if allow_empty and is_dict else None
 
     for index, path in enumerate(paths, 1):
-        use_list = default is NO_DEFAULT and index == len(paths)
-        result = _traverse_obj(obj, path, use_list)
+        result = _traverse_obj(obj, path, index == len(paths), True)
         if result is not None:
             return result
 
     return None if default is NO_DEFAULT else default
 
 
+def T(x):
+    """ For use in yt-dl instead of {type} or set((type,)) """
+    return set((x,))
+
+
 def get_first(obj, keys, **kwargs):
     return traverse_obj(obj, (Ellipsis,) + tuple(variadic(keys)), get_all=False, **kwargs)
 

From d5ef405c5d533c85cebd205a5b7958614c7013f3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 7 Jul 2023 18:45:31 +0100
Subject: [PATCH 243/312] [core] Align error reporting methods with yt-dlp

---
 test/helper.py          |  3 ++-
 test/test_YoutubeDL.py  | 10 ++--------
 youtube_dl/YoutubeDL.py | 39 ++++++++++++++++++++++++++++++++-------
 3 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/test/helper.py b/test/helper.py
index 883b2e877..e3314b03e 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -72,7 +72,8 @@ class FakeYDL(YoutubeDL):
     def to_screen(self, s, skip_eol=None):
         print(s)
 
-    def trouble(self, s, tb=None):
+    def trouble(self, *args, **kwargs):
+        s = args[0] if len(args) > 0 else kwargs.get('message', 'Missing message')
         raise Exception(s)
 
     def download(self, x):
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index f8c8e619c..60780b8a7 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -930,17 +930,11 @@ class TestYoutubeDL(unittest.TestCase):
     # Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
     def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
 
-        class _YDL(YDL):
-            def __init__(self, *args, **kwargs):
-                super(_YDL, self).__init__(*args, **kwargs)
-
-            def trouble(self, s, tb=None):
-                pass
-
-        ydl = _YDL({
+        ydl = YDL({
             'format': 'extra',
             'ignoreerrors': True,
         })
+        ydl.trouble = lambda *_, **__: None
 
         class VideoIE(InfoExtractor):
             _VALID_URL = r'video:(?P<id>\d+)'
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 4e7fd1063..1435754c2 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -582,7 +582,7 @@ class YoutubeDL(object):
         if self.params.get('cookiefile') is not None:
             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 
-    def trouble(self, message=None, tb=None):
+    def trouble(self, *args, **kwargs):
         """Determine action to take when a download problem appears.
 
         Depending on if the downloader has been configured to ignore
@@ -591,6 +591,11 @@ class YoutubeDL(object):
 
         tb, if given, is additional traceback information.
         """
+        # message=None, tb=None, is_error=True
+        message = args[0] if len(args) > 0 else kwargs.get('message', None)
+        tb = args[1] if len(args) > 1 else kwargs.get('tb', None)
+        is_error = args[2] if len(args) > 2 else kwargs.get('is_error', True)
+
         if message is not None:
             self.to_stderr(message)
         if self.params.get('verbose'):
@@ -603,7 +608,10 @@ class YoutubeDL(object):
                 else:
                     tb_data = traceback.format_list(traceback.extract_stack())
                     tb = ''.join(tb_data)
-            self.to_stderr(tb)
+            if tb:
+                self.to_stderr(tb)
+        if not is_error:
+            return
         if not self.params.get('ignoreerrors', False):
             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
                 exc_info = sys.exc_info()[1].exc_info
@@ -612,11 +620,18 @@ class YoutubeDL(object):
             raise DownloadError(message, exc_info)
         self._download_retcode = 1
 
-    def report_warning(self, message):
+    def report_warning(self, message, only_once=False, _cache={}):
         '''
         Print the message to stderr, it will be prefixed with 'WARNING:'
         If stderr is a tty file the 'WARNING:' will be colored
         '''
+        if only_once:
+            m_hash = hash((self, message))
+            m_cnt = _cache.setdefault(m_hash, 0)
+            _cache[m_hash] = m_cnt + 1
+            if m_cnt > 0:
+                return
+
         if self.params.get('logger') is not None:
             self.params['logger'].warning(message)
         else:
@@ -629,7 +644,7 @@ class YoutubeDL(object):
             warning_message = '%s %s' % (_msg_header, message)
             self.to_stderr(warning_message)
 
-    def report_error(self, message, tb=None):
+    def report_error(self, message, *args, **kwargs):
         '''
         Do the same as trouble, but prefixes the message with 'ERROR:', colored
         in red if stderr is a tty file.
@@ -638,8 +653,18 @@ class YoutubeDL(object):
             _msg_header = '\033[0;31mERROR:\033[0m'
         else:
             _msg_header = 'ERROR:'
-        error_message = '%s %s' % (_msg_header, message)
-        self.trouble(error_message, tb)
+        kwargs['message'] = '%s %s' % (_msg_header, message)
+        self.trouble(*args, **kwargs)
+
+    def report_unscoped_cookies(self, *args, **kwargs):
+        # message=None, tb=False, is_error=False
+        if len(args) <= 2:
+            kwargs.setdefault('is_error', False)
+            if len(args) <= 0:
+                kwargs.setdefault(
+                    'message',
+                    'Unscoped cookies are not allowed: please specify some sort of scoping')
+        self.report_error(*args, **kwargs)
 
     def report_file_already_downloaded(self, file_name):
         """Report file has already been fully downloaded."""
@@ -835,7 +860,7 @@ class YoutubeDL(object):
                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
                 self.report_error(msg)
             except ExtractorError as e:  # An error we somewhat expected
-                self.report_error(compat_str(e), e.format_traceback())
+                self.report_error(compat_str(e), tb=e.format_traceback())
             except MaxDownloadsReached:
                 raise
             except Exception as e:

From 1720c04dc56fa0d2caa0a455b1acbd569347482e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 17 Jul 2023 20:47:58 +0100
Subject: [PATCH 244/312] [test] Make skipped tests in test_execution work with
 Py 2.6

---
 test/test_execution.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/test/test_execution.py b/test/test_execution.py
index 35e7a5651..ae59e562a 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -24,21 +24,24 @@ except AttributeError:
 
 
 class TestExecution(unittest.TestCase):
+    def setUp(self):
+        self.module = 'youtube_dl'
+        if sys.version_info < (2, 7):
+            self.module += '.__main__'
+
     def test_import(self):
         subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir)
 
-    @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution')
     def test_module_exec(self):
-        subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+        subprocess.check_call([sys.executable, '-m', self.module, '--version'], cwd=rootDir, stdout=_DEV_NULL)
 
     def test_main_exec(self):
         subprocess.check_call([sys.executable, os.path.normpath('youtube_dl/__main__.py'), '--version'], cwd=rootDir, stdout=_DEV_NULL)
 
-    @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution')
     def test_cmdline_umlauts(self):
         os.environ['PYTHONIOENCODING'] = 'utf-8'
         p = subprocess.Popen(
-            [sys.executable, os.path.normpath('youtube_dl/__main__.py'), encodeArgument('ä'), '--version'],
+            [sys.executable, '-m', self.module, encodeArgument('ä'), '--version'],
             cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
         _, stderr = p.communicate()
         self.assertFalse(stderr)

From 648dc5304cb2476592ff142988b8c62675011fcc Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 7 Jul 2023 18:51:38 +0100
Subject: [PATCH 245/312] [compat] Add Request and HTTPClient compat for
 redirect

* support `method` parameter of `Request.__init__`  (Py 2 and old Py 3)
* support `getcode` method of compat_http_client.HTTPResponse (Py 2)
---
 youtube_dl/compat.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 2554fd1c3..cd11ba5aa 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -21,6 +21,7 @@ import socket
 import struct
 import subprocess
 import sys
+import types
 import xml.etree.ElementTree
 
 # naming convention
@@ -55,6 +56,22 @@ try:
 except ImportError:  # Python 2
     import urllib2 as compat_urllib_request
 
+# Also fix up lack of method arg in old Pythons
+try:
+    _req = compat_urllib_request.Request
+    _req('http://127.0.0.1', method='GET')
+except TypeError:
+    class _request(object):
+        def __new__(cls, url, *args, **kwargs):
+            method = kwargs.pop('method', None)
+            r = _req(url, *args, **kwargs)
+            if method:
+                r.get_method = types.MethodType(lambda _: method, r)
+            return r
+
+    compat_urllib_request.Request = _request
+
+
 try:
     import urllib.error as compat_urllib_error
 except ImportError:  # Python 2
@@ -79,6 +96,12 @@ try:
 except ImportError:  # Python 2
     import urllib as compat_urllib_response
 
+try:
+    compat_urllib_response.addinfourl.status
+except AttributeError:
+    # .getcode() is deprecated in Py 3.
+    compat_urllib_response.addinfourl.status = property(lambda self: self.getcode())
+
 try:
     import http.cookiejar as compat_cookiejar
 except ImportError:  # Python 2
@@ -2360,6 +2383,11 @@ try:
     import http.client as compat_http_client
 except ImportError:  # Python 2
     import httplib as compat_http_client
+try:
+    compat_http_client.HTTPResponse.getcode
+except AttributeError:
+    # Py < 3.1
+    compat_http_client.HTTPResponse.getcode = lambda self: self.status
 
 try:
     from urllib.error import HTTPError as compat_HTTPError

From 46fde7caeeab13a6277aab22a0e8a29e10c30cc3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 7 Jun 2023 14:51:50 +0100
Subject: [PATCH 246/312] [core] Update redirect handling from yt-dlp

* Thx coletdjnz: https://github.com/yt-dlp/yt-dlp/pull/7094
* add test that redirected `POST` loses its `Content-Type`
---
 test/test_http.py   | 489 +++++++++++++++++++++++++++++++++++++++-----
 youtube_dl/utils.py |  74 ++++---
 2 files changed, 484 insertions(+), 79 deletions(-)

diff --git a/test/test_http.py b/test/test_http.py
index 487a9bc77..1a65df9e0 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -8,33 +8,160 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+import gzip
+import io
+import ssl
+import tempfile
+import threading
+import zlib
+
+# avoid deprecated alias assertRaisesRegexp
+if hasattr(unittest.TestCase, 'assertRaisesRegex'):
+    unittest.TestCase.assertRaisesRegexp = unittest.TestCase.assertRaisesRegex
+
+try:
+    import brotli
+except ImportError:
+    brotli = None
+try:
+    from urllib.request import pathname2url
+except ImportError:
+    from urllib import pathname2url
+
+from youtube_dl.compat import (
+    compat_http_cookiejar_Cookie,
+    compat_http_server,
+    compat_str as str,
+    compat_urllib_error,
+    compat_urllib_HTTPError,
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+
+from youtube_dl.utils import (
+    sanitized_Request,
+    urlencode_postdata,
+)
+
 from test.helper import (
+    FakeYDL,
     FakeLogger,
     http_server_port,
 )
 from youtube_dl import YoutubeDL
-from youtube_dl.compat import compat_http_server, compat_urllib_request
-import ssl
-import threading
 
 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 
 
 class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+    protocol_version = 'HTTP/1.1'
+
+    # work-around old/new -style class inheritance
+    def super(self, meth_name, *args, **kwargs):
+        from types import MethodType
+        try:
+            super()
+            fn = lambda s, m, *a, **k: getattr(super(), m)(*a, **k)
+        except TypeError:
+            fn = lambda s, m, *a, **k: getattr(compat_http_server.BaseHTTPRequestHandler, m)(s, *a, **k)
+        self.super = MethodType(fn, self)
+        return self.super(meth_name, *args, **kwargs)
+
     def log_message(self, format, *args):
         pass
 
+    def _headers(self):
+        payload = str(self.headers).encode('utf-8')
+        self.send_response(200)
+        self.send_header('Content-Type', 'application/json')
+        self.send_header('Content-Length', str(len(payload)))
+        self.end_headers()
+        self.wfile.write(payload)
+
+    def _redirect(self):
+        self.send_response(int(self.path[len('/redirect_'):]))
+        self.send_header('Location', '/method')
+        self.send_header('Content-Length', '0')
+        self.end_headers()
+
+    def _method(self, method, payload=None):
+        self.send_response(200)
+        self.send_header('Content-Length', str(len(payload or '')))
+        self.send_header('Method', method)
+        self.end_headers()
+        if payload:
+            self.wfile.write(payload)
+
+    def _status(self, status):
+        payload = '<html>{0} NOT FOUND</html>'.format(status).encode('utf-8')
+        self.send_response(int(status))
+        self.send_header('Content-Type', 'text/html; charset=utf-8')
+        self.send_header('Content-Length', str(len(payload)))
+        self.end_headers()
+        self.wfile.write(payload)
+
+    def _read_data(self):
+        if 'Content-Length' in self.headers:
+            return self.rfile.read(int(self.headers['Content-Length']))
+
+    def _test_url(self, path, host='127.0.0.1', scheme='http', port=None):
+        return '{0}://{1}:{2}/{3}'.format(
+            scheme, host,
+            port if port is not None
+            else http_server_port(self.server), path)
+
+    def do_POST(self):
+        data = self._read_data()
+        if self.path.startswith('/redirect_'):
+            self._redirect()
+        elif self.path.startswith('/method'):
+            self._method('POST', data)
+        elif self.path.startswith('/headers'):
+            self._headers()
+        else:
+            self._status(404)
+
+    def do_HEAD(self):
+        if self.path.startswith('/redirect_'):
+            self._redirect()
+        elif self.path.startswith('/method'):
+            self._method('HEAD')
+        else:
+            self._status(404)
+
+    def do_PUT(self):
+        data = self._read_data()
+        if self.path.startswith('/redirect_'):
+            self._redirect()
+        elif self.path.startswith('/method'):
+            self._method('PUT', data)
+        else:
+            self._status(404)
+
     def do_GET(self):
+
+        def respond(payload=b'<html><video src="/vid.mp4" /></html>',
+                    payload_type='text/html; charset=utf-8',
+                    payload_encoding=None,
+                    resp_code=200):
+            self.send_response(resp_code)
+            self.send_header('Content-Type', payload_type)
+            if payload_encoding:
+                self.send_header('Content-Encoding', payload_encoding)
+            self.send_header('Content-Length', str(len(payload)))  # required for persistent connections
+            self.end_headers()
+            self.wfile.write(payload)
+
+        def gzip_compress(p):
+            buf = io.BytesIO()
+            with gzip.GzipFile(fileobj=buf, mode='wb') as f:
+                f.write(p)
+            return buf.getvalue()
+
         if self.path == '/video.html':
-            self.send_response(200)
-            self.send_header('Content-Type', 'text/html; charset=utf-8')
-            self.end_headers()
-            self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
+            respond()
         elif self.path == '/vid.mp4':
-            self.send_response(200)
-            self.send_header('Content-Type', 'video/mp4')
-            self.end_headers()
-            self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
+            respond(b'\x00\x00\x00\x00\x20\x66\x74[video]', 'video/mp4')
         elif self.path == '/302':
             if sys.version_info[0] == 3:
                 # XXX: Python 3 http server does not allow non-ASCII header values
@@ -42,60 +169,284 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
                 self.end_headers()
                 return
 
-            new_url = 'http://127.0.0.1:%d/中文.html' % http_server_port(self.server)
+            new_url = self._test_url('中文.html')
             self.send_response(302)
             self.send_header(b'Location', new_url.encode('utf-8'))
             self.end_headers()
         elif self.path == '/%E4%B8%AD%E6%96%87.html':
-            self.send_response(200)
-            self.send_header('Content-Type', 'text/html; charset=utf-8')
+            respond()
+        elif self.path == '/%c7%9f':
+            respond()
+        elif self.path.startswith('/redirect_'):
+            self._redirect()
+        elif self.path.startswith('/method'):
+            self._method('GET')
+        elif self.path.startswith('/headers'):
+            self._headers()
+        elif self.path == '/trailing_garbage':
+            payload = b'<html><video src="/vid.mp4" /></html>'
+            compressed = gzip_compress(payload) + b'trailing garbage'
+            respond(compressed, payload_encoding='gzip')
+        elif self.path == '/302-non-ascii-redirect':
+            new_url = self._test_url('中文.html')
+            # actually respond with permanent redirect
+            self.send_response(301)
+            self.send_header('Location', new_url)
+            self.send_header('Content-Length', '0')
             self.end_headers()
-            self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
+        elif self.path == '/content-encoding':
+            encodings = self.headers.get('ytdl-encoding', '')
+            payload = b'<html><video src="/vid.mp4" /></html>'
+            for encoding in filter(None, (e.strip() for e in encodings.split(','))):
+                if encoding == 'br' and brotli:
+                    payload = brotli.compress(payload)
+                elif encoding == 'gzip':
+                    payload = gzip_compress(payload)
+                elif encoding == 'deflate':
+                    payload = zlib.compress(payload)
+                elif encoding == 'unsupported':
+                    payload = b'raw'
+                    break
+                else:
+                    self._status(415)
+                    return
+            respond(payload, payload_encoding=encodings)
+
         else:
-            assert False
+            self._status(404)
+
+    def send_header(self, keyword, value):
+        """
+        Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
+        This is against what is defined in RFC 3986: but we need to test that we support this
+        since some sites incorrectly do this.
+        """
+        if keyword.lower() == 'connection':
+            return self.super('send_header', keyword, value)
+
+        if not hasattr(self, '_headers_buffer'):
+            self._headers_buffer = []
+
+        self._headers_buffer.append('{0}: {1}\r\n'.format(keyword, value).encode('utf-8'))
+
+    def end_headers(self):
+        if hasattr(self, '_headers_buffer'):
+            self.wfile.write(b''.join(self._headers_buffer))
+            self._headers_buffer = []
+        self.super('end_headers')
 
 
 class TestHTTP(unittest.TestCase):
     def setUp(self):
-        self.httpd = compat_http_server.HTTPServer(
+        # HTTP server
+        self.http_httpd = compat_http_server.HTTPServer(
             ('127.0.0.1', 0), HTTPTestRequestHandler)
-        self.port = http_server_port(self.httpd)
-        self.server_thread = threading.Thread(target=self.httpd.serve_forever)
-        self.server_thread.daemon = True
-        self.server_thread.start()
+        self.http_port = http_server_port(self.http_httpd)
 
-    def test_unicode_path_redirection(self):
-        # XXX: Python 3 http server does not allow non-ASCII header values
-        if sys.version_info[0] == 3:
-            return
+        self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever)
+        self.http_server_thread.daemon = True
+        self.http_server_thread.start()
 
-        ydl = YoutubeDL({'logger': FakeLogger()})
-        r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port)
-        self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port)
+        try:
+            from http.server import ThreadingHTTPServer
+        except ImportError:
+            try:
+                from socketserver import ThreadingMixIn
+            except ImportError:
+                from SocketServer import ThreadingMixIn
 
+            class ThreadingHTTPServer(ThreadingMixIn, compat_http_server.HTTPServer):
+                pass
 
-class TestHTTPS(unittest.TestCase):
-    def setUp(self):
+        # HTTPS server
         certfn = os.path.join(TEST_DIR, 'testcert.pem')
-        self.httpd = compat_http_server.HTTPServer(
+        self.https_httpd = ThreadingHTTPServer(
             ('127.0.0.1', 0), HTTPTestRequestHandler)
-        self.httpd.socket = ssl.wrap_socket(
-            self.httpd.socket, certfile=certfn, server_side=True)
-        self.port = http_server_port(self.httpd)
-        self.server_thread = threading.Thread(target=self.httpd.serve_forever)
-        self.server_thread.daemon = True
-        self.server_thread.start()
+        try:
+            sslctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+            sslctx.verify_mode = ssl.CERT_NONE
+            sslctx.check_hostname = False
+            sslctx.load_cert_chain(certfn, None)
+            self.https_httpd.socket = sslctx.wrap_socket(
+                self.https_httpd.socket, server_side=True)
+        except AttributeError:
+            self.https_httpd.socket = ssl.wrap_socket(
+                self.https_httpd.socket, certfile=certfn, server_side=True)
+
+        self.https_port = http_server_port(self.https_httpd)
+        self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever)
+        self.https_server_thread.daemon = True
+        self.https_server_thread.start()
+
+    def tearDown(self):
+
+        def closer(svr):
+            def _closer():
+                svr.shutdown()
+                svr.server_close()
+            return _closer
+
+        shutdown_thread = threading.Thread(target=closer(self.http_httpd))
+        shutdown_thread.start()
+        self.http_server_thread.join(2.0)
+
+        shutdown_thread = threading.Thread(target=closer(self.https_httpd))
+        shutdown_thread.start()
+        self.https_server_thread.join(2.0)
+
+    def _test_url(self, path, host='127.0.0.1', scheme='http', port=None):
+        return '{0}://{1}:{2}/{3}'.format(
+            scheme, host,
+            port if port is not None
+            else self.https_port if scheme == 'https'
+            else self.http_port, path)
 
     def test_nocheckcertificate(self):
-        if sys.version_info >= (2, 7, 9):  # No certificate checking anyways
-            ydl = YoutubeDL({'logger': FakeLogger()})
-            self.assertRaises(
-                Exception,
-                ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port)
+        with FakeYDL({'logger': FakeLogger()}) as ydl:
+            with self.assertRaises(compat_urllib_error.URLError):
+                ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https')))
 
-        ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
-        r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
-        self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
+        with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl:
+            r = ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https')))
+            self.assertEqual(r.getcode(), 200)
+            r.close()
+
+    def test_percent_encode(self):
+        with FakeYDL() as ydl:
+            # Unicode characters should be encoded with uppercase percent-encoding
+            res = ydl.urlopen(sanitized_Request(self._test_url('中文.html')))
+            self.assertEqual(res.getcode(), 200)
+            res.close()
+            # don't normalize existing percent encodings
+            res = ydl.urlopen(sanitized_Request(self._test_url('%c7%9f')))
+            self.assertEqual(res.getcode(), 200)
+            res.close()
+
+    def test_unicode_path_redirection(self):
+        with FakeYDL() as ydl:
+            r = ydl.urlopen(sanitized_Request(self._test_url('302-non-ascii-redirect')))
+            self.assertEqual(r.url, self._test_url('%E4%B8%AD%E6%96%87.html'))
+            r.close()
+
+    def test_redirect(self):
+        with FakeYDL() as ydl:
+            def do_req(redirect_status, method, check_no_content=False):
+                data = b'testdata' if method in ('POST', 'PUT') else None
+                res = ydl.urlopen(sanitized_Request(
+                    self._test_url('redirect_{0}'.format(redirect_status)),
+                    method=method, data=data))
+                if check_no_content:
+                    self.assertNotIn('Content-Type', res.headers)
+                return res.read().decode('utf-8'), res.headers.get('method', '')
+            # A 303 must either use GET or HEAD for subsequent request
+            self.assertEqual(do_req(303, 'POST'), ('', 'GET'))
+            self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD'))
+
+            self.assertEqual(do_req(303, 'PUT'), ('', 'GET'))
+
+            # 301 and 302 turn POST only into a GET, with no Content-Type
+            self.assertEqual(do_req(301, 'POST', True), ('', 'GET'))
+            self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD'))
+            self.assertEqual(do_req(302, 'POST', True), ('', 'GET'))
+            self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD'))
+
+            self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT'))
+            self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT'))
+
+            # 307 and 308 should not change method
+            for m in ('POST', 'PUT'):
+                self.assertEqual(do_req(307, m), ('testdata', m))
+                self.assertEqual(do_req(308, m), ('testdata', m))
+
+            self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD'))
+            self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD'))
+
+            # These should not redirect and instead raise an HTTPError
+            for code in (300, 304, 305, 306):
+                with self.assertRaises(compat_urllib_HTTPError):
+                    do_req(code, 'GET')
+
+    def test_content_type(self):
+        # https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28
+        with FakeYDL({'nocheckcertificate': True}) as ydl:
+            # method should be auto-detected as POST
+            r = sanitized_Request(self._test_url('headers', scheme='https'), data=urlencode_postdata({'test': 'test'}))
+
+            headers = ydl.urlopen(r).read().decode('utf-8')
+            self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
+
+            # test http
+            r = sanitized_Request(self._test_url('headers'), data=urlencode_postdata({'test': 'test'}))
+            headers = ydl.urlopen(r).read().decode('utf-8')
+            self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
+
+    def test_cookiejar(self):
+        with FakeYDL() as ydl:
+            ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
+                0, 'test', 'ytdl', None, False, '127.0.0.1', True,
+                False, '/headers', True, False, None, False, None, None, {}))
+            data = ydl.urlopen(sanitized_Request(self._test_url('headers'))).read()
+            self.assertIn(b'Cookie: test=ytdl', data)
+
+    def test_no_compression_compat_header(self):
+        with FakeYDL() as ydl:
+            data = ydl.urlopen(
+                sanitized_Request(
+                    self._test_url('headers'),
+                    headers={'Youtubedl-no-compression': True})).read()
+            self.assertIn(b'Accept-Encoding: identity', data)
+            self.assertNotIn(b'youtubedl-no-compression', data.lower())
+
+    def test_gzip_trailing_garbage(self):
+        # https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5
+        # https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f
+        with FakeYDL() as ydl:
+            data = ydl.urlopen(sanitized_Request(self._test_url('trailing_garbage'))).read().decode('utf-8')
+            self.assertEqual(data, '<html><video src="/vid.mp4" /></html>')
+
+    def __test_compression(self, encoding):
+        with FakeYDL() as ydl:
+            res = ydl.urlopen(
+                sanitized_Request(
+                    self._test_url('content-encoding'),
+                    headers={'ytdl-encoding': encoding}))
+            self.assertEqual(res.headers.get('Content-Encoding'), encoding)
+            self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
+
+    @unittest.skipUnless(brotli, 'brotli support is not installed')
+    @unittest.expectedFailure
+    def test_brotli(self):
+        self.__test_compression('br')
+
+    @unittest.expectedFailure
+    def test_deflate(self):
+        self.__test_compression('deflate')
+
+    @unittest.expectedFailure
+    def test_gzip(self):
+        self.__test_compression('gzip')
+
+    @unittest.expectedFailure  # not yet implemented
+    def test_multiple_encodings(self):
+        # https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4
+        with FakeYDL() as ydl:
+            for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
+                res = ydl.urlopen(
+                    sanitized_Request(
+                        self._test_url('content-encoding'),
+                        headers={'ytdl-encoding': pair}))
+                self.assertEqual(res.headers.get('Content-Encoding'), pair)
+                self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
+
+    def test_unsupported_encoding(self):
+        # it should return the raw content
+        with FakeYDL() as ydl:
+            res = ydl.urlopen(
+                sanitized_Request(
+                    self._test_url('content-encoding'),
+                    headers={'ytdl-encoding': 'unsupported'}))
+            self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')
+            self.assertEqual(res.read(), b'raw')
 
 
 def _build_proxy_handler(name):
@@ -109,7 +460,7 @@ def _build_proxy_handler(name):
             self.send_response(200)
             self.send_header('Content-Type', 'text/plain; charset=utf-8')
             self.end_headers()
-            self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
+            self.wfile.write('{0}: {1}'.format(self.proxy_name, self.path).encode('utf-8'))
     return HTTPTestRequestHandler
 
 
@@ -129,10 +480,30 @@ class TestProxy(unittest.TestCase):
         self.geo_proxy_thread.daemon = True
         self.geo_proxy_thread.start()
 
+    def tearDown(self):
+
+        def closer(svr):
+            def _closer():
+                svr.shutdown()
+                svr.server_close()
+            return _closer
+
+        shutdown_thread = threading.Thread(target=closer(self.proxy))
+        shutdown_thread.start()
+        self.proxy_thread.join(2.0)
+
+        shutdown_thread = threading.Thread(target=closer(self.geo_proxy))
+        shutdown_thread.start()
+        self.geo_proxy_thread.join(2.0)
+
+    def _test_proxy(self, host='127.0.0.1', port=None):
+        return '{0}:{1}'.format(
+            host, port if port is not None else self.port)
+
     def test_proxy(self):
-        geo_proxy = '127.0.0.1:{0}'.format(self.geo_port)
+        geo_proxy = self._test_proxy(port=self.geo_port)
         ydl = YoutubeDL({
-            'proxy': '127.0.0.1:{0}'.format(self.port),
+            'proxy': self._test_proxy(),
             'geo_verification_proxy': geo_proxy,
         })
         url = 'http://foo.com/bar'
@@ -146,7 +517,7 @@ class TestProxy(unittest.TestCase):
 
     def test_proxy_with_idn(self):
         ydl = YoutubeDL({
-            'proxy': '127.0.0.1:{0}'.format(self.port),
+            'proxy': self._test_proxy(),
         })
         url = 'http://中文.tw/'
         response = ydl.urlopen(url).read().decode('utf-8')
@@ -154,5 +525,25 @@ class TestProxy(unittest.TestCase):
         self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
 
 
+class TestFileURL(unittest.TestCase):
+    # See https://github.com/ytdl-org/youtube-dl/issues/8227
+    def test_file_urls(self):
+        tf = tempfile.NamedTemporaryFile(delete=False)
+        tf.write(b'foobar')
+        tf.close()
+        url = compat_urllib_parse.urljoin('file://', pathname2url(tf.name))
+        with FakeYDL() as ydl:
+            self.assertRaisesRegexp(
+                compat_urllib_error.URLError, 'file:// scheme is explicitly disabled in youtube-dl for security reasons', ydl.urlopen, url)
+        # not yet implemented
+        """
+        with FakeYDL({'enable_file_urls': True}) as ydl:
+            res = ydl.urlopen(url)
+            self.assertEqual(res.read(), b'foobar')
+            res.close()
+        """
+        os.unlink(tf.name)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index dbdbe5f59..58c710b08 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -41,7 +41,6 @@ import zlib
 from .compat import (
     compat_HTMLParseError,
     compat_HTMLParser,
-    compat_HTTPError,
     compat_basestring,
     compat_casefold,
     compat_chr,
@@ -64,6 +63,7 @@ from .compat import (
     compat_struct_pack,
     compat_struct_unpack,
     compat_urllib_error,
+    compat_urllib_HTTPError,
     compat_urllib_parse,
     compat_urllib_parse_parse_qs as compat_parse_qs,
     compat_urllib_parse_urlencode,
@@ -2614,7 +2614,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 
     Part of this code was copied from:
 
-    http://techknack.net/python-urllib2-handlers/
+    http://techknack.net/python-urllib2-handlers/, archived at
+    https://web.archive.org/web/20130527205558/http://techknack.net/python-urllib2-handlers/
 
     Andrew Rowls, the author of that code, agreed to release it to the
     public domain.
@@ -2672,7 +2673,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
             req._Request__original = req._Request__original.partition('#')[0]
             req._Request__r_type = req._Request__r_type.partition('#')[0]
 
-        return req
+        # Use the totally undocumented AbstractHTTPHandler per
+        # https://github.com/yt-dlp/yt-dlp/pull/4158
+        return compat_urllib_request.AbstractHTTPHandler.do_request_(self, req)
 
     def http_response(self, req, resp):
         old_resp = resp
@@ -2683,7 +2686,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
             try:
                 uncompressed = io.BytesIO(gz.read())
             except IOError as original_ioerror:
-                # There may be junk add the end of the file
+                # There may be junk at the end of the file
                 # See http://stackoverflow.com/q/4928560/35070 for details
                 for i in range(1, 1024):
                     try:
@@ -2710,9 +2713,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
             if location:
                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
                 if sys.version_info >= (3, 0):
-                    location = location.encode('iso-8859-1').decode('utf-8')
-                else:
-                    location = location.decode('utf-8')
+                    location = location.encode('iso-8859-1')
+                location = location.decode('utf-8')
                 location_escaped = escape_url(location)
                 if location != location_escaped:
                     del resp.headers['Location']
@@ -2940,17 +2942,16 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
 
     The code is based on HTTPRedirectHandler implementation from CPython [1].
 
-    This redirect handler solves two issues:
-     - ensures redirect URL is always unicode under python 2
-     - introduces support for experimental HTTP response status code
-       308 Permanent Redirect [2] used by some sites [3]
+    This redirect handler fixes and improves the logic to better align with RFC7261
+    and what browsers tend to do [2][3]
 
     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
-    2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
-    3. https://github.com/ytdl-org/youtube-dl/issues/28768
+    2. https://datatracker.ietf.org/doc/html/rfc7231
+    3. https://github.com/python/cpython/issues/91306
     """
 
-    http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
+    # Supply possibly missing alias
+    http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
 
     def redirect_request(self, req, fp, code, msg, headers, newurl):
         """Return a Request or None in response to a redirect.
@@ -2962,19 +2963,16 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
         else should try to handle this url.  Return None if you can't
         but another Handler might.
         """
-        m = req.get_method()
-        if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
-                 or code in (301, 302, 303) and m == "POST")):
-            raise compat_HTTPError(req.full_url, code, msg, headers, fp)
-        # Strictly (according to RFC 2616), 301 or 302 in response to
-        # a POST MUST NOT cause a redirection without confirmation
-        # from the user (of urllib.request, in this case).  In practice,
-        # essentially all clients do redirect in this case, so we do
-        # the same.
+        if code not in (301, 302, 303, 307, 308):
+            raise compat_urllib_HTTPError(req.full_url, code, msg, headers, fp)
+
+        new_method = req.get_method()
+        new_data = req.data
+        remove_headers = []
 
         # On python 2 urlh.geturl() may sometimes return redirect URL
-        # as byte string instead of unicode. This workaround allows
-        # to force it always return unicode.
+        # as a byte string instead of unicode. This workaround forces
+        # it to return unicode.
         if sys.version_info[0] < 3:
             newurl = compat_str(newurl)
 
@@ -2983,13 +2981,29 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
         # but it is kept for compatibility with other callers.
         newurl = newurl.replace(' ', '%20')
 
-        CONTENT_HEADERS = ("content-length", "content-type")
+        # A 303 must either use GET or HEAD for subsequent request
+        # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
+        if code == 303 and req.get_method() != 'HEAD':
+            new_method = 'GET'
+        # 301 and 302 redirects are commonly turned into a GET from a POST
+        # for subsequent requests by browsers, so we'll do the same.
+        # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
+        # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
+        elif code in (301, 302) and req.get_method() == 'POST':
+            new_method = 'GET'
+
+        # only remove payload if method changed (e.g. POST to GET)
+        if new_method != req.get_method():
+            new_data = None
+            remove_headers.extend(['Content-Length', 'Content-Type'])
+
         # NB: don't use dict comprehension for python 2.6 compatibility
-        newheaders = dict((k, v) for k, v in req.headers.items()
-                          if k.lower() not in CONTENT_HEADERS)
+        new_headers = dict((k, v) for k, v in req.header_items()
+                           if k.lower() not in remove_headers)
+
         return compat_urllib_request.Request(
-            newurl, headers=newheaders, origin_req_host=req.origin_req_host,
-            unverifiable=True)
+            newurl, headers=new_headers, origin_req_host=req.origin_req_host,
+            unverifiable=True, method=new_method, data=new_data)
 
 
 def extract_timezone(date_str):

From b383be98874d4dded67ee8a679fae30340722709 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 7 Jun 2023 19:38:54 +0100
Subject: [PATCH 247/312] [core] Remove `Cookie` header on redirect to prevent
 leaks

Adated from yt-dlp/yt-dlp-ghsa-v8mc-9377-rwjj/pull/1/commits/101caac
Thx coletdjnz
---
 test/test_http.py   | 32 ++++++++++++++++++++++++++++++--
 youtube_dl/utils.py |  8 ++++++--
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/test/test_http.py b/test/test_http.py
index 1a65df9e0..cd180b51f 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -183,6 +183,11 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
             self._method('GET')
         elif self.path.startswith('/headers'):
             self._headers()
+        elif self.path.startswith('/308-to-headers'):
+            self.send_response(308)
+            self.send_header('Location', '/headers')
+            self.send_header('Content-Length', '0')
+            self.end_headers()
         elif self.path == '/trailing_garbage':
             payload = b'<html><video src="/vid.mp4" /></html>'
             compressed = gzip_compress(payload) + b'trailing garbage'
@@ -385,8 +390,31 @@ class TestHTTP(unittest.TestCase):
             ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
                 0, 'test', 'ytdl', None, False, '127.0.0.1', True,
                 False, '/headers', True, False, None, False, None, None, {}))
-            data = ydl.urlopen(sanitized_Request(self._test_url('headers'))).read()
-            self.assertIn(b'Cookie: test=ytdl', data)
+            data = ydl.urlopen(sanitized_Request(
+                self._test_url('headers'))).read().decode('utf-8')
+            self.assertIn('Cookie: test=ytdl', data)
+
+    def test_passed_cookie_header(self):
+        # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
+        with FakeYDL() as ydl:
+            # Specified Cookie header should be used
+            res = ydl.urlopen(sanitized_Request(
+                self._test_url('headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
+            self.assertIn('Cookie: test=test', res)
+
+            # Specified Cookie header should be removed on any redirect
+            res = ydl.urlopen(sanitized_Request(
+                self._test_url('308-to-headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
+            self.assertNotIn('Cookie: test=test', res)
+
+            # Specified Cookie header should override global cookiejar for that request
+            ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
+                0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
+                False, '/headers', True, False, None, False, None, None, {}))
+            data = ydl.urlopen(sanitized_Request(
+                self._test_url('headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
+            self.assertNotIn('Cookie: test=ytdlp', data)
+            self.assertIn('Cookie: test=test', data)
 
     def test_no_compression_compat_header(self):
         with FakeYDL() as ydl:
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 58c710b08..c21cd3687 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2968,7 +2968,6 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
 
         new_method = req.get_method()
         new_data = req.data
-        remove_headers = []
 
         # On python 2 urlh.geturl() may sometimes return redirect URL
         # as a byte string instead of unicode. This workaround forces
@@ -2981,6 +2980,11 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
         # but it is kept for compatibility with other callers.
         newurl = newurl.replace(' ', '%20')
 
+        # Technically the Cookie header should be in unredirected_hdrs;
+        # however in practice some may set it in normal headers anyway.
+        # We will remove it here to prevent any leaks.
+        remove_headers = ['Cookie']
+
         # A 303 must either use GET or HEAD for subsequent request
         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
         if code == 303 and req.get_method() != 'HEAD':
@@ -2999,7 +3003,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
 
         # NB: don't use dict comprehension for python 2.6 compatibility
         new_headers = dict((k, v) for k, v in req.header_items()
-                           if k.lower() not in remove_headers)
+                           if k.title() not in remove_headers)
 
         return compat_urllib_request.Request(
             newurl, headers=new_headers, origin_req_host=req.origin_req_host,

From 3801d36416d6e3e6031dc4fcac01891ce7ddb55b Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Tue, 4 Jul 2023 14:03:39 -0500
Subject: [PATCH 248/312] [utils] `YoutubeDLCookieJar`: Add `get_cookie_header`
 and `get_cookies_for_url` methods

---
 youtube_dl/utils.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index c21cd3687..ac6c81465 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2912,6 +2912,19 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
                 cookie.expires = None
                 cookie.discard = True
 
+    def get_cookie_header(self, url):
+        """Generate a Cookie HTTP header for a given url"""
+        cookie_req = sanitized_Request(url)
+        self.add_cookie_header(cookie_req)
+        return cookie_req.get_header('Cookie')
+
+    def get_cookies_for_url(self, url):
+        """Generate a list of Cookie objects for a given url"""
+        # Policy `_now` attribute must be set before calling `_cookies_for_request`
+        # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
+        self._policy._now = self._now = int(time.time())
+        return self._cookies_for_request(sanitized_Request(url))
+
 
 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
     def __init__(self, cookiejar=None):

From 8334ec961b802ad7ef8571b776c5fc727206dc9b Mon Sep 17 00:00:00 2001
From: Simon Sawicki <contact@grub4k.xyz>
Date: Tue, 4 Jul 2023 21:41:04 +0200
Subject: [PATCH 249/312] [core] Process header cookies on loading

---
 test/test_YoutubeDL.py          | 185 +++++++++++++++++++++++++++++++-
 test/test_YoutubeDLCookieJar.py |  14 +++
 youtube_dl/YoutubeDL.py         | 182 ++++++++++++++++++++++++++-----
 youtube_dl/downloader/common.py |   9 ++
 4 files changed, 357 insertions(+), 33 deletions(-)

diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 60780b8a7..6cf555827 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -10,14 +10,30 @@ import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 import copy
+import json
 
-from test.helper import FakeYDL, assertRegexpMatches
+from test.helper import (
+    FakeYDL,
+    assertRegexpMatches,
+    try_rm,
+)
 from youtube_dl import YoutubeDL
-from youtube_dl.compat import compat_str, compat_urllib_error
+from youtube_dl.compat import (
+    compat_http_cookiejar_Cookie,
+    compat_http_cookies_SimpleCookie,
+    compat_kwargs,
+    compat_str,
+    compat_urllib_error,
+)
+
 from youtube_dl.extractor import YoutubeIE
 from youtube_dl.extractor.common import InfoExtractor
 from youtube_dl.postprocessor.common import PostProcessor
-from youtube_dl.utils import ExtractorError, match_filter_func
+from youtube_dl.utils import (
+    ExtractorError,
+    match_filter_func,
+    traverse_obj,
+)
 
 TEST_URL = 'http://localhost/sample.mp4'
 
@@ -29,11 +45,14 @@ class YDL(FakeYDL):
         self.msgs = []
 
     def process_info(self, info_dict):
-        self.downloaded_info_dicts.append(info_dict)
+        self.downloaded_info_dicts.append(info_dict.copy())
 
     def to_screen(self, msg):
         self.msgs.append(msg)
 
+    def dl(self, *args, **kwargs):
+        assert False, 'Downloader must not be invoked for test_YoutubeDL'
+
 
 def _make_result(formats, **kwargs):
     res = {
@@ -42,8 +61,9 @@ def _make_result(formats, **kwargs):
         'title': 'testttitle',
         'extractor': 'testex',
         'extractor_key': 'TestEx',
+        'webpage_url': 'http://example.com/watch?v=shenanigans',
     }
-    res.update(**kwargs)
+    res.update(**compat_kwargs(kwargs))
     return res
 
 
@@ -1011,5 +1031,160 @@ class TestYoutubeDL(unittest.TestCase):
         self.assertEqual(out_info['release_date'], '20210930')
 
 
+class TestYoutubeDLCookies(unittest.TestCase):
+
+    @staticmethod
+    def encode_cookie(cookie):
+        if not isinstance(cookie, dict):
+            cookie = vars(cookie)
+        for name, value in cookie.items():
+            yield name, compat_str(value)
+
+    @classmethod
+    def comparable_cookies(cls, cookies):
+        # Work around cookiejar cookies not being unicode strings
+        return sorted(map(tuple, map(sorted, map(cls.encode_cookie, cookies))))
+
+    def assertSameCookies(self, c1, c2, msg=None):
+        return self.assertEqual(
+            *map(self.comparable_cookies, (c1, c2)),
+            msg=msg)
+
+    def assertSameCookieStrings(self, c1, c2, msg=None):
+        return self.assertSameCookies(
+            *map(lambda c: compat_http_cookies_SimpleCookie(c).values(), (c1, c2)),
+            msg=msg)
+
+    def test_header_cookies(self):
+
+        ydl = FakeYDL()
+        ydl.report_warning = lambda *_, **__: None
+
+        def cookie(name, value, version=None, domain='', path='', secure=False, expires=None):
+            return compat_http_cookiejar_Cookie(
+                version or 0, name, value, None, False,
+                domain, bool(domain), bool(domain), path, bool(path),
+                secure, expires, False, None, None, rest={})
+
+        test_url, test_domain = (t % ('yt.dl',) for t in ('https://%s/test', '.%s'))
+
+        def test(encoded_cookies, cookies, headers=False, round_trip=None, error_re=None):
+            def _test():
+                ydl.cookiejar.clear()
+                ydl._load_cookies(encoded_cookies, autoscope=headers)
+                if headers:
+                    ydl._apply_header_cookies(test_url)
+                data = {'url': test_url}
+                ydl._calc_headers(data)
+                self.assertSameCookies(
+                    cookies, ydl.cookiejar,
+                    'Extracted cookiejar.Cookie is not the same')
+                if not headers:
+                    self.assertSameCookieStrings(
+                        data.get('cookies'), round_trip or encoded_cookies,
+                        msg='Cookie is not the same as round trip')
+                ydl.__dict__['_YoutubeDL__header_cookies'] = []
+
+            try:
+                _test()
+            except AssertionError:
+                raise
+            except Exception as e:
+                if not error_re:
+                    raise
+                assertRegexpMatches(self, e.args[0], error_re.join(('.*',) * 2))
+
+        test('test=value; Domain=' + test_domain, [cookie('test', 'value', domain=test_domain)])
+        test('test=value', [cookie('test', 'value')], error_re='Unscoped cookies are not allowed')
+        test('cookie1=value1; Domain={0}; Path=/test; cookie2=value2; Domain={0}; Path=/'.format(test_domain), [
+            cookie('cookie1', 'value1', domain=test_domain, path='/test'),
+            cookie('cookie2', 'value2', domain=test_domain, path='/')])
+        cookie_kw = compat_kwargs(
+            {'domain': test_domain, 'path': '/test', 'secure': True, 'expires': '9999999999', })
+        test('test=value; Domain={domain}; Path={path}; Secure; Expires={expires}'.format(**cookie_kw), [
+            cookie('test', 'value', **cookie_kw)])
+        test('test="value; "; path=/test; domain=' + test_domain, [
+            cookie('test', 'value; ', domain=test_domain, path='/test')],
+            round_trip='test="value\\073 "; Domain={0}; Path=/test'.format(test_domain))
+        test('name=; Domain=' + test_domain, [cookie('name', '', domain=test_domain)],
+             round_trip='name=""; Domain=' + test_domain)
+        test('test=value', [cookie('test', 'value', domain=test_domain)], headers=True)
+        test('cookie1=value; Domain={0}; cookie2=value'.format(test_domain), [],
+             headers=True, error_re='Invalid syntax')
+        ydl.report_warning = ydl.report_error
+        test('test=value', [], headers=True, error_re='Passing cookies as a header is a potential security risk')
+
+    def test_infojson_cookies(self):
+        TEST_FILE = 'test_infojson_cookies.info.json'
+        TEST_URL = 'https://example.com/example.mp4'
+        COOKIES = 'a=b; Domain=.example.com; c=d; Domain=.example.com'
+        COOKIE_HEADER = {'Cookie': 'a=b; c=d'}
+
+        ydl = FakeYDL()
+        ydl.process_info = lambda x: ydl._write_info_json('test', x, TEST_FILE)
+
+        def make_info(info_header_cookies=False, fmts_header_cookies=False, cookies_field=False):
+            fmt = {'url': TEST_URL}
+            if fmts_header_cookies:
+                fmt['http_headers'] = COOKIE_HEADER
+            if cookies_field:
+                fmt['cookies'] = COOKIES
+            return _make_result([fmt], http_headers=COOKIE_HEADER if info_header_cookies else None)
+
+        def test(initial_info, note):
+
+            def failure_msg(why):
+                return ' when '.join((why, note))
+
+            result = {}
+            result['processed'] = ydl.process_ie_result(initial_info)
+            self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
+                            msg=failure_msg('No cookies set in cookiejar after initial process'))
+            ydl.cookiejar.clear()
+            with open(TEST_FILE) as infojson:
+                result['loaded'] = ydl.sanitize_info(json.load(infojson), True)
+            result['final'] = ydl.process_ie_result(result['loaded'].copy(), download=False)
+            self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
+                            msg=failure_msg('No cookies set in cookiejar after final process'))
+            ydl.cookiejar.clear()
+            for key in ('processed', 'loaded', 'final'):
+                info = result[key]
+                self.assertIsNone(
+                    traverse_obj(info, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False),
+                    msg=failure_msg('Cookie header not removed in {0} result'.format(key)))
+                self.assertSameCookieStrings(
+                    traverse_obj(info, ((None, ('formats', 0)), 'cookies'), get_all=False), COOKIES,
+                    msg=failure_msg('No cookies field found in {0} result'.format(key)))
+
+        test({'url': TEST_URL, 'http_headers': COOKIE_HEADER, 'id': '1', 'title': 'x'}, 'no formats field')
+        test(make_info(info_header_cookies=True), 'info_dict header cokies')
+        test(make_info(fmts_header_cookies=True), 'format header cookies')
+        test(make_info(info_header_cookies=True, fmts_header_cookies=True), 'info_dict and format header cookies')
+        test(make_info(info_header_cookies=True, fmts_header_cookies=True, cookies_field=True), 'all cookies fields')
+        test(make_info(cookies_field=True), 'cookies format field')
+        test({'url': TEST_URL, 'cookies': COOKIES, 'id': '1', 'title': 'x'}, 'info_dict cookies field only')
+
+        try_rm(TEST_FILE)
+
+    def test_add_headers_cookie(self):
+        def check_for_cookie_header(result):
+            return traverse_obj(result, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False)
+
+        ydl = FakeYDL({'http_headers': {'Cookie': 'a=b'}})
+        ydl._apply_header_cookies(_make_result([])['webpage_url'])  # Scope to input webpage URL: .example.com
+
+        fmt = {'url': 'https://example.com/video.mp4'}
+        result = ydl.process_ie_result(_make_result([fmt]), download=False)
+        self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies in result info_dict')
+        self.assertEqual(result.get('cookies'), 'a=b; Domain=.example.com', msg='No cookies were set in cookies field')
+        self.assertIn('a=b', ydl.cookiejar.get_cookie_header(fmt['url']), msg='No cookies were set in cookiejar')
+
+        fmt = {'url': 'https://wrong.com/video.mp4'}
+        result = ydl.process_ie_result(_make_result([fmt]), download=False)
+        self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies for wrong domain')
+        self.assertFalse(result.get('cookies'), msg='Cookies set in cookies field for wrong domain')
+        self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain')
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py
index 05f48bd74..4f9dd71ae 100644
--- a/test/test_YoutubeDLCookieJar.py
+++ b/test/test_YoutubeDLCookieJar.py
@@ -46,6 +46,20 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
         # will be ignored
         self.assertFalse(cookiejar._cookies)
 
+    def test_get_cookie_header(self):
+        cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
+        cookiejar.load(ignore_discard=True, ignore_expires=True)
+        header = cookiejar.get_cookie_header('https://www.foobar.foobar')
+        self.assertIn('HTTPONLY_COOKIE', header)
+
+    def test_get_cookies_for_url(self):
+        cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
+        cookiejar.load(ignore_discard=True, ignore_expires=True)
+        cookies = cookiejar.get_cookies_for_url('https://www.foobar.foobar/')
+        self.assertEqual(len(cookies), 2)
+        cookies = cookiejar.get_cookies_for_url('https://foobar.foobar/')
+        self.assertFalse(cookies)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 1435754c2..98d080f43 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -5,6 +5,7 @@ from __future__ import absolute_import, unicode_literals
 
 import collections
 import contextlib
+import copy
 import datetime
 import errno
 import fileinput
@@ -34,10 +35,12 @@ from string import ascii_letters
 
 from .compat import (
     compat_basestring,
-    compat_cookiejar,
+    compat_collections_chain_map as ChainMap,
     compat_filter as filter,
     compat_get_terminal_size,
     compat_http_client,
+    compat_http_cookiejar_Cookie,
+    compat_http_cookies_SimpleCookie,
     compat_integer_types,
     compat_kwargs,
     compat_map as map,
@@ -53,6 +56,7 @@ from .compat import (
 from .utils import (
     age_restricted,
     args_to_str,
+    bug_reports_message,
     ContentTooShortError,
     date_from_str,
     DateRange,
@@ -97,6 +101,7 @@ from .utils import (
     std_headers,
     str_or_none,
     subtitles_filename,
+    traverse_obj,
     UnavailableVideoError,
     url_basename,
     version_tuple,
@@ -376,6 +381,9 @@ class YoutubeDL(object):
         self.params.update(params)
         self.cache = Cache(self)
 
+        self._header_cookies = []
+        self._load_cookies_from_headers(self.params.get('http_headers'))
+
         def check_deprecated(param, option, suggestion):
             if self.params.get(param) is not None:
                 self.report_warning(
@@ -870,8 +878,83 @@ class YoutubeDL(object):
                     raise
         return wrapper
 
+    def _remove_cookie_header(self, http_headers):
+        """Filters out `Cookie` header from an `http_headers` dict
+        The `Cookie` header is removed to prevent leaks as a result of unscoped cookies.
+        See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
+
+        @param http_headers     An `http_headers` dict from which any `Cookie` header
+                                should be removed, or None
+        """
+        return dict(filter(lambda pair: pair[0].lower() != 'cookie', (http_headers or {}).items()))
+
+    def _load_cookies(self, data, **kwargs):
+        """Loads cookies from a `Cookie` header
+
+        This tries to work around the security vulnerability of passing cookies to every domain.
+
+        @param data         The Cookie header as a string to load the cookies from
+        @param autoscope    If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
+                            If `True`, save cookies for later to be stored in the jar with a limited scope
+                            If a URL, save cookies in the jar with the domain of the URL
+        """
+        # autoscope=True (kw-only)
+        autoscope = kwargs.get('autoscope', True)
+
+        for cookie in compat_http_cookies_SimpleCookie(data).values() if data else []:
+            if autoscope and any(cookie.values()):
+                raise ValueError('Invalid syntax in Cookie Header')
+
+            domain = cookie.get('domain') or ''
+            expiry = cookie.get('expires')
+            if expiry == '':  # 0 is valid so we check for `''` explicitly
+                expiry = None
+            prepared_cookie = compat_http_cookiejar_Cookie(
+                cookie.get('version') or 0, cookie.key, cookie.value, None, False,
+                domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
+                bool(cookie.get('secure')), expiry, False, None, None, {})
+
+            if domain:
+                self.cookiejar.set_cookie(prepared_cookie)
+            elif autoscope is True:
+                self.report_warning(
+                    'Passing cookies as a header is a potential security risk; '
+                    'they will be scoped to the domain of the downloaded urls. '
+                    'Please consider loading cookies from a file or browser instead.',
+                    only_once=True)
+                self._header_cookies.append(prepared_cookie)
+            elif autoscope:
+                self.report_warning(
+                    'The extractor result contains an unscoped cookie as an HTTP header. '
+                    'If you are specifying an input URL, ' + bug_reports_message(),
+                    only_once=True)
+                self._apply_header_cookies(autoscope, [prepared_cookie])
+            else:
+                self.report_unscoped_cookies()
+
+    def _load_cookies_from_headers(self, headers):
+        self._load_cookies(traverse_obj(headers, 'cookie', casesense=False))
+
+    def _apply_header_cookies(self, url, cookies=None):
+        """This method applies stray header cookies to the provided url
+
+        This loads header cookies and scopes them to the domain provided in `url`.
+        While this is not ideal, it helps reduce the risk of them being sent to
+        an unintended destination.
+        """
+        parsed = compat_urllib_parse.urlparse(url)
+        if not parsed.hostname:
+            return
+
+        for cookie in map(copy.copy, cookies or self._header_cookies):
+            cookie.domain = '.' + parsed.hostname
+            self.cookiejar.set_cookie(cookie)
+
     @__handle_extraction_exceptions
     def __extract_info(self, url, ie, download, extra_info, process):
+        # Compat with passing cookies in http headers
+        self._apply_header_cookies(url)
+
         ie_result = ie.extract(url)
         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
             return
@@ -897,7 +980,7 @@ class YoutubeDL(object):
 
     def process_ie_result(self, ie_result, download=True, extra_info={}):
         """
-        Take the result of the ie(may be modified) and resolve all unresolved
+        Take the result of the ie (may be modified) and resolve all unresolved
         references (URLs, playlist items).
 
         It will also download the videos if 'download'.
@@ -1468,23 +1551,45 @@ class YoutubeDL(object):
         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
         return _build_selector_function(parsed_selector)
 
-    def _calc_headers(self, info_dict):
-        res = std_headers.copy()
+    def _calc_headers(self, info_dict, load_cookies=False):
+        if load_cookies:  # For --load-info-json
+            # load cookies from http_headers in legacy info.json
+            self._load_cookies(traverse_obj(info_dict, ('http_headers', 'Cookie'), casesense=False),
+                               autoscope=info_dict['url'])
+            # load scoped cookies from info.json
+            self._load_cookies(info_dict.get('cookies'), autoscope=False)
 
-        add_headers = info_dict.get('http_headers')
-        if add_headers:
-            res.update(add_headers)
-
-        cookies = self._calc_cookies(info_dict)
+        cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
         if cookies:
-            res['Cookie'] = cookies
+            # Make a string like name1=val1; attr1=a_val1; ...name2=val2; ...
+            # By convention a cookie name can't be a well-known attribute name
+            # so this syntax is unambiguous and can be parsed by (eg) SimpleCookie
+            encoder = compat_http_cookies_SimpleCookie()
+            values = []
+            attributes = (('Domain', '='), ('Path', '='), ('Secure',), ('Expires', '='), ('Version', '='))
+            attributes = tuple([x[0].lower()] + list(x) for x in attributes)
+            for cookie in cookies:
+                _, value = encoder.value_encode(cookie.value)
+                # Py 2 '' --> '', Py 3 '' --> '""'
+                if value == '':
+                    value = '""'
+                values.append('='.join((cookie.name, value)))
+                for attr in attributes:
+                    value = getattr(cookie, attr[0], None)
+                    if value:
+                        values.append('%s%s' % (''.join(attr[1:]), value if len(attr) == 3 else ''))
+            info_dict['cookies'] = '; '.join(values)
+
+        res = std_headers.copy()
+        res.update(info_dict.get('http_headers') or {})
+        res = self._remove_cookie_header(res)
 
         if 'X-Forwarded-For' not in res:
             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
             if x_forwarded_for_ip:
                 res['X-Forwarded-For'] = x_forwarded_for_ip
 
-        return res
+        return res or None
 
     def _calc_cookies(self, info_dict):
         pr = sanitized_Request(info_dict['url'])
@@ -1663,10 +1768,13 @@ class YoutubeDL(object):
                 format['protocol'] = determine_protocol(format)
             # Add HTTP headers, so that external programs can use them from the
             # json output
-            full_format_info = info_dict.copy()
-            full_format_info.update(format)
-            format['http_headers'] = self._calc_headers(full_format_info)
-        # Remove private housekeeping stuff
+            format['http_headers'] = self._calc_headers(ChainMap(format, info_dict), load_cookies=True)
+
+        # Safeguard against old/insecure infojson when using --load-info-json
+        info_dict['http_headers'] = self._remove_cookie_header(
+            info_dict.get('http_headers') or {}) or None
+
+        # Remove private housekeeping stuff (copied to http_headers in _calc_headers())
         if '__x_forwarded_for_ip' in info_dict:
             del info_dict['__x_forwarded_for_ip']
 
@@ -1927,17 +2035,9 @@ class YoutubeDL(object):
                                                 (sub_lang, error_to_compat_str(err)))
                             continue
 
-        if self.params.get('writeinfojson', False):
-            infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
-            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
-                self.to_screen('[info] Video description metadata is already present')
-            else:
-                self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
-                try:
-                    write_json_file(self.filter_requested_info(info_dict), infofn)
-                except (OSError, IOError):
-                    self.report_error('Cannot write metadata to JSON file ' + infofn)
-                    return
+        self._write_info_json(
+            'video description', info_dict,
+            replace_extension(filename, 'info.json', info_dict.get('ext')))
 
         self._write_thumbnails(info_dict, filename)
 
@@ -1958,7 +2058,11 @@ class YoutubeDL(object):
                         fd.add_progress_hook(ph)
                     if self.params.get('verbose'):
                         self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
-                    return fd.download(name, info)
+
+                    new_info = dict((k, v) for k, v in info.items() if not k.startswith('__p'))
+                    new_info['http_headers'] = self._calc_headers(new_info)
+
+                    return fd.download(name, new_info)
 
                 if info_dict.get('requested_formats') is not None:
                     downloaded = []
@@ -2484,7 +2588,7 @@ class YoutubeDL(object):
         opts_proxy = self.params.get('proxy')
 
         if opts_cookiefile is None:
-            self.cookiejar = compat_cookiejar.CookieJar()
+            self.cookiejar = YoutubeDLCookieJar()
         else:
             opts_cookiefile = expand_path(opts_cookiefile)
             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
@@ -2545,6 +2649,28 @@ class YoutubeDL(object):
             encoding = preferredencoding()
         return encoding
 
+    def _write_info_json(self, label, info_dict, infofn, overwrite=None):
+        if not self.params.get('writeinfojson', False):
+            return False
+
+        def msg(fmt, lbl):
+            return fmt % (lbl + ' metadata',)
+
+        if overwrite is None:
+            overwrite = not self.params.get('nooverwrites', False)
+
+        if not overwrite and os.path.exists(encodeFilename(infofn)):
+            self.to_screen(msg('[info] %s is already present', label.title()))
+            return 'exists'
+        else:
+            self.to_screen(msg('[info] Writing %s as JSON to: ' + infofn, label))
+            try:
+                write_json_file(self.filter_requested_info(info_dict), infofn)
+                return True
+            except (OSError, IOError):
+                self.report_error(msg('Cannot write %s to JSON file ' + infofn, label))
+                return
+
     def _write_thumbnails(self, info_dict, filename):
         if self.params.get('writethumbnail', False):
             thumbnails = info_dict.get('thumbnails')
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index c86ce2aa5..08c98b336 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -13,7 +13,9 @@ from ..utils import (
     error_to_compat_str,
     format_bytes,
     shell_quote,
+    T,
     timeconvert,
+    traverse_obj,
 )
 
 
@@ -339,6 +341,10 @@ class FileDownloader(object):
     def download(self, filename, info_dict):
         """Download to a filename using the info from info_dict
         Return True on success and False otherwise
+
+        This method filters the `Cookie` header from the info_dict to prevent leaks.
+        Downloaders have their own way of handling cookies.
+        See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
         """
 
         nooverwrites_and_exists = (
@@ -373,6 +379,9 @@ class FileDownloader(object):
                     else '%.2f' % sleep_interval))
             time.sleep(sleep_interval)
 
+        info_dict['http_headers'] = dict(traverse_obj(info_dict, (
+            'http_headers', T(dict.items), lambda _, pair: pair[0].lower() != 'cookie'))) or None
+
         return self.real_download(filename, info_dict)
 
     def real_download(self, filename, info_dict):

From 21438a4194376c3a9b1e5c322c825d43a1b03d6e Mon Sep 17 00:00:00 2001
From: bashonly <bashonly@bashonly.com>
Date: Tue, 4 Jul 2023 16:40:56 -0500
Subject: [PATCH 250/312] [downloader/external] Fix cookie support

---
 test/test_downloader_external.py  | 157 ++++++++++++++++++++++++++++--
 youtube_dl/downloader/common.py   |   5 -
 youtube_dl/downloader/external.py | 124 +++++++++++++++++++----
 3 files changed, 256 insertions(+), 30 deletions(-)

diff --git a/test/test_downloader_external.py b/test/test_downloader_external.py
index c0239502b..029f9b05f 100644
--- a/test/test_downloader_external.py
+++ b/test/test_downloader_external.py
@@ -12,20 +12,65 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from test.helper import (
     FakeLogger,
+    FakeYDL,
     http_server_port,
     try_rm,
 )
 from youtube_dl import YoutubeDL
-from youtube_dl.compat import compat_http_server
-from youtube_dl.utils import encodeFilename
-from youtube_dl.downloader.external import Aria2pFD
+from youtube_dl.compat import (
+    compat_http_cookiejar_Cookie,
+    compat_http_server,
+    compat_kwargs,
+)
+from youtube_dl.utils import (
+    encodeFilename,
+    join_nonempty,
+)
+from youtube_dl.downloader.external import (
+    Aria2cFD,
+    Aria2pFD,
+    AxelFD,
+    CurlFD,
+    FFmpegFD,
+    HttpieFD,
+    WgetFD,
+)
 import threading
 
-TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-
-
 TEST_SIZE = 10 * 1024
 
+TEST_COOKIE = {
+    'version': 0,
+    'name': 'test',
+    'value': 'ytdlp',
+    'port': None,
+    'port_specified': False,
+    'domain': '.example.com',
+    'domain_specified': True,
+    'domain_initial_dot': False,
+    'path': '/',
+    'path_specified': True,
+    'secure': False,
+    'expires': None,
+    'discard': False,
+    'comment': None,
+    'comment_url': None,
+    'rest': {},
+}
+
+TEST_COOKIE_VALUE = join_nonempty('name', 'value', delim='=', from_dict=TEST_COOKIE)
+
+TEST_INFO = {'url': 'http://www.example.com/'}
+
+
+def cookiejar_Cookie(**cookie_args):
+    return compat_http_cookiejar_Cookie(**compat_kwargs(cookie_args))
+
+
+def ifExternalFDAvailable(externalFD):
+    return unittest.skipUnless(externalFD.available(),
+                               externalFD.get_basename() + ' not found')
+
 
 class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
     def log_message(self, format, *args):
@@ -70,7 +115,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
             assert False, 'unrecognised server path'
 
 
-@unittest.skipUnless(Aria2pFD.available(), 'aria2p module not found')
+@ifExternalFDAvailable(Aria2pFD)
 class TestAria2pFD(unittest.TestCase):
     def setUp(self):
         self.httpd = compat_http_server.HTTPServer(
@@ -111,5 +156,103 @@ class TestAria2pFD(unittest.TestCase):
         })
 
 
+@ifExternalFDAvailable(HttpieFD)
+class TestHttpieFD(unittest.TestCase):
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = HttpieFD(ydl, {})
+            self.assertEqual(
+                downloader._make_cmd('test', TEST_INFO),
+                ['http', '--download', '--output', 'test', 'http://www.example.com/'])
+
+            # Test cookie header is added
+            ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+            self.assertEqual(
+                downloader._make_cmd('test', TEST_INFO),
+                ['http', '--download', '--output', 'test',
+                 'http://www.example.com/', 'Cookie:' + TEST_COOKIE_VALUE])
+
+
+@ifExternalFDAvailable(AxelFD)
+class TestAxelFD(unittest.TestCase):
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = AxelFD(ydl, {})
+            self.assertEqual(
+                downloader._make_cmd('test', TEST_INFO),
+                ['axel', '-o', 'test', '--', 'http://www.example.com/'])
+
+            # Test cookie header is added
+            ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+            self.assertEqual(
+                downloader._make_cmd('test', TEST_INFO),
+                ['axel', '-o', 'test', '-H', 'Cookie: ' + TEST_COOKIE_VALUE,
+                 '--max-redirect=0', '--', 'http://www.example.com/'])
+
+
+@ifExternalFDAvailable(WgetFD)
+class TestWgetFD(unittest.TestCase):
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = WgetFD(ydl, {})
+            self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
+            # Test cookiejar tempfile arg is added
+            ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+            self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
+
+
+@ifExternalFDAvailable(CurlFD)
+class TestCurlFD(unittest.TestCase):
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = CurlFD(ydl, {})
+            self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO))
+            # Test cookie header is added
+            ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+            self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO))
+            self.assertIn(TEST_COOKIE_VALUE, downloader._make_cmd('test', TEST_INFO))
+
+
+@ifExternalFDAvailable(Aria2cFD)
+class TestAria2cFD(unittest.TestCase):
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = Aria2cFD(ydl, {})
+            downloader._make_cmd('test', TEST_INFO)
+            self.assertFalse(hasattr(downloader, '_cookies_tempfile'))
+
+            # Test cookiejar tempfile arg is added
+            ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+            cmd = downloader._make_cmd('test', TEST_INFO)
+            self.assertIn('--load-cookies=%s' % downloader._cookies_tempfile, cmd)
+
+
+@ifExternalFDAvailable(FFmpegFD)
+class TestFFmpegFD(unittest.TestCase):
+    _args = []
+
+    def _test_cmd(self, args):
+        self._args = args
+
+    def test_make_cmd(self):
+        with FakeYDL() as ydl:
+            downloader = FFmpegFD(ydl, {})
+            downloader._debug_cmd = self._test_cmd
+            info_dict = TEST_INFO.copy()
+            info_dict['ext'] = 'mp4'
+
+            downloader._call_downloader('test', info_dict)
+            self.assertEqual(self._args, [
+                'ffmpeg', '-y', '-i', 'http://www.example.com/',
+                '-c', 'copy', '-f', 'mp4', 'file:test'])
+
+            # Test cookies arg is added
+            ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+            downloader._call_downloader('test', info_dict)
+            self.assertEqual(self._args, [
+                'ffmpeg', '-y', '-cookies', TEST_COOKIE_VALUE + '; path=/; domain=.example.com;\r\n',
+                '-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test'])
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index 08c98b336..afb4ee33d 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -13,9 +13,7 @@ from ..utils import (
     error_to_compat_str,
     format_bytes,
     shell_quote,
-    T,
     timeconvert,
-    traverse_obj,
 )
 
 
@@ -379,9 +377,6 @@ class FileDownloader(object):
                     else '%.2f' % sleep_interval))
             time.sleep(sleep_interval)
 
-        info_dict['http_headers'] = dict(traverse_obj(info_dict, (
-            'http_headers', T(dict.items), lambda _, pair: pair[0].lower() != 'cookie'))) or None
-
         return self.real_download(filename, info_dict)
 
     def real_download(self, filename, info_dict):
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index 1b6bd1fa2..7fc864e85 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -1,9 +1,10 @@
 from __future__ import unicode_literals
 
-import os.path
+import os
 import re
 import subprocess
 import sys
+import tempfile
 import time
 
 from .common import FileDownloader
@@ -23,6 +24,8 @@ from ..utils import (
     check_executable,
     is_outdated_version,
     process_communicate_or_kill,
+    T,
+    traverse_obj,
 )
 
 
@@ -30,6 +33,7 @@ class ExternalFD(FileDownloader):
     def real_download(self, filename, info_dict):
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
+        self._cookies_tempfile = None
 
         try:
             started = time.time()
@@ -42,6 +46,13 @@ class ExternalFD(FileDownloader):
             # should take place
             retval = 0
             self.to_screen('[%s] Interrupted by user' % self.get_basename())
+        finally:
+            if self._cookies_tempfile and os.path.isfile(self._cookies_tempfile):
+                try:
+                    os.remove(self._cookies_tempfile)
+                except OSError:
+                    self.report_warning(
+                        'Unable to delete temporary cookies file "{0}"'.format(self._cookies_tempfile))
 
         if retval == 0:
             status = {
@@ -97,6 +108,16 @@ class ExternalFD(FileDownloader):
     def _configuration_args(self, default=[]):
         return cli_configuration_args(self.params, 'external_downloader_args', default)
 
+    def _write_cookies(self):
+        if not self.ydl.cookiejar.filename:
+            tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
+            tmp_cookies.close()
+            self._cookies_tempfile = tmp_cookies.name
+            self.to_screen('[download] Writing temporary cookies file to "{0}"'.format(self._cookies_tempfile))
+        # real_download resets _cookies_tempfile; if it's None, save() will write to cookiejar.filename
+        self.ydl.cookiejar.save(self._cookies_tempfile, ignore_discard=True, ignore_expires=True)
+        return self.ydl.cookiejar.filename or self._cookies_tempfile
+
     def _call_downloader(self, tmpfilename, info_dict):
         """ Either overwrite this or implement _make_cmd """
         cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
@@ -110,13 +131,21 @@ class ExternalFD(FileDownloader):
             self.to_stderr(stderr.decode('utf-8', 'replace'))
         return p.returncode
 
+    @staticmethod
+    def _header_items(info_dict):
+        return traverse_obj(
+            info_dict, ('http_headers', T(dict.items), Ellipsis))
+
 
 class CurlFD(ExternalFD):
     AVAILABLE_OPT = '-V'
 
     def _make_cmd(self, tmpfilename, info_dict):
-        cmd = [self.exe, '--location', '-o', tmpfilename]
-        for key, val in info_dict['http_headers'].items():
+        cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
+        cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+        if cookie_header:
+            cmd += ['--cookie', cookie_header]
+        for key, val in self._header_items(info_dict):
             cmd += ['--header', '%s: %s' % (key, val)]
         cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
         cmd += self._valueless_option('--silent', 'noprogress')
@@ -151,8 +180,11 @@ class AxelFD(ExternalFD):
 
     def _make_cmd(self, tmpfilename, info_dict):
         cmd = [self.exe, '-o', tmpfilename]
-        for key, val in info_dict['http_headers'].items():
+        for key, val in self._header_items(info_dict):
             cmd += ['-H', '%s: %s' % (key, val)]
+        cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+        if cookie_header:
+            cmd += ['-H', 'Cookie: {0}'.format(cookie_header), '--max-redirect=0']
         cmd += self._configuration_args()
         cmd += ['--', info_dict['url']]
         return cmd
@@ -162,8 +194,10 @@ class WgetFD(ExternalFD):
     AVAILABLE_OPT = '--version'
 
     def _make_cmd(self, tmpfilename, info_dict):
-        cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
-        for key, val in info_dict['http_headers'].items():
+        cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
+        if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
+            cmd += ['--load-cookies', self._write_cookies()]
+        for key, val in self._header_items(info_dict):
             cmd += ['--header', '%s: %s' % (key, val)]
         cmd += self._option('--limit-rate', 'ratelimit')
         retry = self._option('--tries', 'retries')
@@ -182,21 +216,58 @@ class WgetFD(ExternalFD):
 class Aria2cFD(ExternalFD):
     AVAILABLE_OPT = '-v'
 
+    @staticmethod
+    def _aria2c_filename(fn):
+        return fn if os.path.isabs(fn) else os.path.join('.', fn)
+
     def _make_cmd(self, tmpfilename, info_dict):
-        cmd = [self.exe, '-c']
-        cmd += self._configuration_args([
-            '--min-split-size', '1M', '--max-connection-per-server', '4'])
-        dn = os.path.dirname(tmpfilename)
-        if dn:
-            cmd += ['--dir', dn]
-        cmd += ['--out', os.path.basename(tmpfilename)]
-        for key, val in info_dict['http_headers'].items():
+        cmd = [self.exe, '-c',
+               '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
+               '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
+        if 'fragments' in info_dict:
+            cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
+        else:
+            cmd += ['--min-split-size', '1M']
+
+        if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
+            cmd += ['--load-cookies={0}'.format(self._write_cookies())]
+        for key, val in self._header_items(info_dict):
             cmd += ['--header', '%s: %s' % (key, val)]
+        cmd += self._configuration_args(['--max-connection-per-server', '4'])
+        cmd += ['--out', os.path.basename(tmpfilename)]
+        cmd += self._option('--max-overall-download-limit', 'ratelimit')
         cmd += self._option('--interface', 'source_address')
         cmd += self._option('--all-proxy', 'proxy')
         cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
         cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
-        cmd += ['--', info_dict['url']]
+        cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
+        cmd += self._configuration_args()
+
+        # aria2c strips out spaces from the beginning/end of filenames and paths.
+        # We work around this issue by adding a "./" to the beginning of the
+        # filename and relative path, and adding a "/" at the end of the path.
+        # See: https://github.com/yt-dlp/yt-dlp/issues/276
+        # https://github.com/ytdl-org/youtube-dl/issues/20312
+        # https://github.com/aria2/aria2/issues/1373
+        dn = os.path.dirname(tmpfilename)
+        if dn:
+            cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep]
+        if 'fragments' not in info_dict:
+            cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))]
+        cmd += ['--auto-file-renaming=false']
+        if 'fragments' in info_dict:
+            cmd += ['--file-allocation=none', '--uri-selector=inorder']
+            url_list_file = '%s.frag.urls' % (tmpfilename, )
+            url_list = []
+            for frag_index, fragment in enumerate(info_dict['fragments']):
+                fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
+                url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
+            stream, _ = self.sanitize_open(url_list_file, 'wb')
+            stream.write('\n'.join(url_list).encode())
+            stream.close()
+            cmd += ['-i', self._aria2c_filename(url_list_file)]
+        else:
+            cmd += ['--', info_dict['url']]
         return cmd
 
 
@@ -235,8 +306,10 @@ class Aria2pFD(ExternalFD):
         }
         options['dir'] = os.path.dirname(tmpfilename) or os.path.abspath('.')
         options['out'] = os.path.basename(tmpfilename)
+        if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
+            options['load-cookies'] = self._write_cookies()
         options['header'] = []
-        for key, val in info_dict['http_headers'].items():
+        for key, val in self._header_items(info_dict):
             options['header'].append('{0}: {1}'.format(key, val))
         download = aria2.add_uris([info_dict['url']], options)
         status = {
@@ -265,8 +338,16 @@ class HttpieFD(ExternalFD):
 
     def _make_cmd(self, tmpfilename, info_dict):
         cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
-        for key, val in info_dict['http_headers'].items():
+        for key, val in self._header_items(info_dict):
             cmd += ['%s:%s' % (key, val)]
+
+        # httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
+        # If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
+        # 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
+        # 2: https://httpie.io/docs/cli/sessions
+        cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+        if cookie_header:
+            cmd += ['Cookie:%s' % cookie_header]
         return cmd
 
 
@@ -312,7 +393,14 @@ class FFmpegFD(ExternalFD):
         # if end_time:
         #     args += ['-t', compat_str(end_time - start_time)]
 
-        if info_dict['http_headers'] and re.match(r'^https?://', url):
+        cookies = self.ydl.cookiejar.get_cookies_for_url(url)
+        if cookies:
+            args.extend(['-cookies', ''.join(
+                '{0}={1}; path={2}; domain={3};\r\n'.format(
+                    cookie.name, cookie.value, cookie.path, cookie.domain)
+                for cookie in cookies)])
+
+        if info_dict.get('http_headers') and re.match(r'^https?://', url):
             # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
             # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
             headers = handle_youtubedl_headers(info_dict['http_headers'])

From 1634b1d61efa36c31c86b8c64c88dc297a7af28a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 11 Jul 2023 21:51:32 +0100
Subject: [PATCH 251/312] [doc] Warn against setting cookies with --add-header

---
 youtube_dl/options.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index d802b7e59..434f520d3 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -544,12 +544,14 @@ def parseOpts(overrideArguments=None):
     workarounds.add_option(
         '--referer',
         metavar='URL', dest='referer', default=None,
-        help='Specify a custom referer, use if the video access is restricted to one domain',
+        help='Specify a custom Referer: use if the video access is restricted to one domain',
     )
     workarounds.add_option(
         '--add-header',
         metavar='FIELD:VALUE', dest='headers', action='append',
-        help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
+        help=('Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times. '
+              'NB Use --cookies rather than adding a Cookie header if its contents may be sensitive; '
+              'data from a Cookie header will be sent to all domains, not just the one intended')
     )
     workarounds.add_option(
         '--bidi-workaround',

From 1d8d5a93f7187438587c3a754b53fdf30322cef0 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 13 Jul 2023 20:14:50 +0100
Subject: [PATCH 252/312] [test] Fixes for old Pythons

---
 .github/workflows/ci.yml |  4 ++--
 test/helper.py           |  6 ++++++
 test/test_http.py        |  7 ++++++-
 test/test_utils.py       |  6 +++---
 youtube_dl/jsinterp.py   | 14 +++++++-------
 youtube_dl/utils.py      |  8 ++++++--
 6 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ce878c1b1..c3aabde47 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -301,7 +301,7 @@ jobs:
       if: ${{ matrix.python-version == '2.6' }}
       shell: bash
       run: |
-        # see pip for Jython
+        # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
         $PIP -qq show unittest2 || { \
           for u in "65/26/32b8464df2a97e6dd1b656ed26b2c194606c16fe163c695a992b36c11cdf/six-1.13.0-py2.py3-none-any.whl" \
               "f2/94/3af39d34be01a24a6e65433d19e107099374224905f1e0cc6bbe1fd22a2f/argparse-1.4.0-py2.py3-none-any.whl" \
@@ -312,7 +312,7 @@ jobs:
             $PIP install ${u##*/}; \
           done; }
         # make tests use unittest2
-        for test in ./test/test_*.py; do
+        for test in ./test/test_*.py ./test/helper.py; do
           sed -r -i -e '/^import unittest$/s/test/test2 as unittest/' "$test"
         done
     #-------- nose --------
diff --git a/test/helper.py b/test/helper.py
index e3314b03e..aa99001b2 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -9,6 +9,7 @@ import re
 import types
 import ssl
 import sys
+import unittest
 
 import youtube_dl.extractor
 from youtube_dl import YoutubeDL
@@ -17,6 +18,7 @@ from youtube_dl.compat import (
     compat_str,
 )
 from youtube_dl.utils import (
+    IDENTITY,
     preferredencoding,
     write_string,
 )
@@ -298,3 +300,7 @@ def http_server_port(httpd):
     else:
         sock = httpd.socket
     return sock.getsockname()[1]
+
+
+def expectedFailureIf(cond):
+    return unittest.expectedFailure if cond else IDENTITY
diff --git a/test/test_http.py b/test/test_http.py
index cd180b51f..1a6b2e878 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -8,6 +8,7 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+import contextlib
 import gzip
 import io
 import ssl
@@ -154,7 +155,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
 
         def gzip_compress(p):
             buf = io.BytesIO()
-            with gzip.GzipFile(fileobj=buf, mode='wb') as f:
+            with contextlib.closing(gzip.GzipFile(fileobj=buf, mode='wb')) as f:
                 f.write(p)
             return buf.getvalue()
 
@@ -306,6 +307,10 @@ class TestHTTP(unittest.TestCase):
             else self.https_port if scheme == 'https'
             else self.http_port, path)
 
+    @unittest.skipUnless(
+        sys.version_info >= (3, 2)
+        or (sys.version_info[0] == 2 and sys.version_info[1:] >= (7, 9)),
+        'No support for certificate check in SSL')
     def test_nocheckcertificate(self):
         with FakeYDL({'logger': FakeLogger()}) as ydl:
             with self.assertRaises(compat_urllib_error.URLError):
diff --git a/test/test_utils.py b/test/test_utils.py
index 1fc16ed05..2ee727caf 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1617,7 +1617,7 @@ Line 1
         self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
                          [_TEST_DATA['urls']],
                          msg='function as query key should perform a filter based on (key, value)')
-        self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'},
+        self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), set(('str',)),
                               msg='exceptions in the query function should be catched')
         self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
                          msg='function key should accept iterables')
@@ -1643,7 +1643,7 @@ Line 1
             with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
                 traverse_obj(_TEST_DATA, set())
             with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
-                traverse_obj(_TEST_DATA, {str.upper, str})
+                traverse_obj(_TEST_DATA, set((str.upper, str)))
 
         # Test `slice` as a key
         _SLICE_DATA = [0, 1, 2, 3, 4]
@@ -1779,7 +1779,7 @@ Line 1
                          {0: 100}, msg='type as expected_type should filter dict values')
         self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
                          {0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
-        self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int),
+        self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, set((int_or_none,))), expected_type=int),
                          1, msg='expected_type should not filter non final dict values')
         self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
                          {0: {0: 100}}, msg='expected_type should transform deep dict values')
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 882432b80..86d902248 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -280,16 +280,16 @@ class JSInterpreter(object):
             # make Py 2.6 conform to its lying documentation
             if name == 'flags':
                 self.flags = self.__flags
+                return self.flags
             elif name == 'pattern':
                 self.pattern = self.__pattern_txt
+                return self.pattern
+            elif hasattr(self.__self, name):
+                v = getattr(self.__self, name)
+                setattr(self, name, v)
+                return v
             elif name in ('groupindex', 'groups'):
-                # in case these get set after a match?
-                if hasattr(self.__self, name):
-                    setattr(self, name, getattr(self.__self, name))
-                else:
-                    return 0 if name == 'groupindex' else {}
-            if hasattr(self, name):
-                return getattr(self, name)
+                return 0 if name == 'groupindex' else {}
             raise AttributeError('{0} has no attribute named {1}'.format(self, name))
 
         @classmethod
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index ac6c81465..494f8341b 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -6198,7 +6198,8 @@ def traverse_obj(obj, *paths, **kwargs):
         elif isinstance(obj, compat_re_Match):
             result = None
             if isinstance(key, int) or casesense:
-                result = lookup_or_none(obj, key, getter=compat_re_Match.group)
+                # Py 2.6 doesn't have methods in the Match class/type
+                result = lookup_or_none(obj, key, getter=lambda _, k: obj.group(k))
 
             elif isinstance(key, str):
                 result = next((v for k, v in obj.groupdict().items()
@@ -6246,7 +6247,10 @@ def traverse_obj(obj, *paths, **kwargs):
 
             if __debug__ and callable(key):
                 # Verify function signature
-                inspect.getcallargs(key, None, None)
+                args = inspect.getargspec(key)
+                if len(args.args) != 2:
+                    # crash differently in 2.6 !
+                    inspect.getcallargs(key, None, None)
 
             new_objs = []
             for obj in objs:

From 47214e46d852e9d7ddf81d69a8e70806e2396c6c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 17 Jul 2023 20:39:11 +0100
Subject: [PATCH 253/312] [compat] Fix old Pythons broken loading of valueless
 cookie attributes

Cookie string parsing in Py 2.6.9, probably earlier, requires `=`.
Also 3.2, though the CPython code appears to be OK: 3.1 was also wrong.
---
 youtube_dl/compat.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index cd11ba5aa..1d784d90f 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -126,12 +126,24 @@ except ImportError:  # Python 2
     import Cookie as compat_cookies
 compat_http_cookies = compat_cookies
 
-if sys.version_info[0] == 2:
+if sys.version_info[0] == 2 or sys.version_info < (3, 3):
     class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
         def load(self, rawdata):
-            if isinstance(rawdata, compat_str):
-                rawdata = str(rawdata)
-            return super(compat_cookies_SimpleCookie, self).load(rawdata)
+            must_have_value = 0
+            if not isinstance(rawdata, dict):
+                if sys.version_info[:2] != (2, 7):
+                    # attribute must have value for parsing
+                    rawdata, must_have_value = re.subn(
+                        r'(?i)(;\s*)(secure|httponly)(\s*(?:;|$))', r'\1\2=\2\3', rawdata)
+                if sys.version_info[0] == 2:
+                    if isinstance(rawdata, compat_str):
+                        rawdata = str(rawdata)
+            super(compat_cookies_SimpleCookie, self).load(rawdata)
+            if must_have_value > 0:
+                for morsel in self.values():
+                    for attr in ('secure', 'httponly'):
+                        if morsel.get(attr):
+                            morsel[attr] = True
 else:
     compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
 compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie

From 825a40744bf9aeb743452db24e43d3eb61feb6c2 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 3 May 2023 12:40:09 +0100
Subject: [PATCH 254/312] [utils] Align traverse_obj() with yt-dlp

Thanks Grub4k for these:
* traverse `Iterable`s, from https://github.com/yt-dlp/yt-dlp/pull/6902, etc
* traverse `set` key for transformations/filters, `re.Match` group names, from
  https://github.com/yt-dlp/yt-dlp/commit/776995bc109c5cd1aa56b684fada2ce718a386ec, etc
* traverse `re.Match`es, from https://github.com/yt-dlp/yt-dlp/pull/5174
* always return list when branching, from https://github.com/yt-dlp/yt-dlp/pull/5170
---
 test/test_utils.py  | 37 +++++++++++++++++++++----------------
 youtube_dl/utils.py |  9 ++-------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index 2ee727caf..1b5d170fe 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -20,7 +20,7 @@ import xml.etree.ElementTree
 from youtube_dl.utils import (
     age_restricted,
     args_to_str,
-    encode_base_n,
+    base_url,
     caesar,
     clean_html,
     clean_podcast_url,
@@ -29,10 +29,12 @@ from youtube_dl.utils import (
     detect_exe_version,
     determine_ext,
     dict_get,
+    encode_base_n,
     encode_compat_str,
     encodeFilename,
     escape_rfc3986,
     escape_url,
+    expand_path,
     extract_attributes,
     ExtractorError,
     find_xpath_attr,
@@ -51,6 +53,7 @@ from youtube_dl.utils import (
     js_to_json,
     LazyList,
     limit_length,
+    lowercase_escape,
     merge_dicts,
     mimetype2ext,
     month_by_name,
@@ -66,17 +69,16 @@ from youtube_dl.utils import (
     parse_resolution,
     parse_bitrate,
     pkcs1pad,
-    read_batch_urls,
-    sanitize_filename,
-    sanitize_path,
-    sanitize_url,
-    expand_path,
     prepend_extension,
-    replace_extension,
+    read_batch_urls,
     remove_start,
     remove_end,
     remove_quotes,
+    replace_extension,
     rot47,
+    sanitize_filename,
+    sanitize_path,
+    sanitize_url,
     shell_quote,
     smuggle_url,
     str_or_none,
@@ -93,10 +95,8 @@ from youtube_dl.utils import (
     unified_timestamp,
     unsmuggle_url,
     uppercase_escape,
-    lowercase_escape,
     url_basename,
     url_or_none,
-    base_url,
     urljoin,
     urlencode_postdata,
     urshift,
@@ -1586,6 +1586,11 @@ Line 1
             'dict': {},
         }
 
+        # define a pukka Iterable
+        def iter_range(stop):
+            for from_ in range(stop):
+                yield from_
+
         # Test base functionality
         self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
                          msg='allow tuple path')
@@ -1602,13 +1607,13 @@ Line 1
         # Test Ellipsis behavior
         self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
                               (item for item in _TEST_DATA.values() if item not in (None, {})),
-                              msg='`...` should give all non discarded values')
+                              msg='`...` should give all non-discarded values')
         self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
                               msg='`...` selection for dicts should select all values')
         self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
                          ['https://www.example.com/0', 'https://www.example.com/1'],
                          msg='nested `...` queries should work')
-        self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), range(4),
+        self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), iter_range(4),
                               msg='`...` query result should be flattened')
         self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)),
                          msg='`...` should accept iterables')
@@ -1618,7 +1623,7 @@ Line 1
                          [_TEST_DATA['urls']],
                          msg='function as query key should perform a filter based on (key, value)')
         self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), set(('str',)),
-                              msg='exceptions in the query function should be catched')
+                              msg='exceptions in the query function should be caught')
         self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
                          msg='function key should accept iterables')
         if __debug__:
@@ -1706,7 +1711,7 @@ Line 1
         self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
                          msg='remove empty values when dict key')
         self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis},
-                         msg='use `default` when dict key and `default`')
+                         msg='use `default` when dict key and a default')
         self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
                          msg='remove empty values when nested dict key fails')
         self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
@@ -1768,7 +1773,7 @@ Line 1
         self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
                          'str', msg='accept matching `expected_type` type')
         self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
-                         None, msg='reject non matching `expected_type` type')
+                         None, msg='reject non-matching `expected_type` type')
         self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
                          '0', msg='transform type using type function')
         self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
@@ -1780,7 +1785,7 @@ Line 1
         self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
                          {0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
         self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, set((int_or_none,))), expected_type=int),
-                         1, msg='expected_type should not filter non final dict values')
+                         1, msg='expected_type should not filter non-final dict values')
         self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
                          {0: {0: 100}}, msg='expected_type should transform deep dict values')
         self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)),
@@ -1838,7 +1843,7 @@ Line 1
         self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
                                       _traverse_string=True), 'sr',
                          msg='`slice` should result in string if `traverse_string`')
-        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"),
+        self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == 's'),
                                       _traverse_string=True), 'str',
                          msg='function should result in string if `traverse_string`')
         self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 494f8341b..b77a7fb0e 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4268,13 +4268,8 @@ def variadic(x, allowed_types=NO_DEFAULT):
 
 
 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
-    if isinstance(key_or_keys, (list, tuple)):
-        for key in key_or_keys:
-            if key not in d or d[key] is None or skip_false_values and not d[key]:
-                continue
-            return d[key]
-        return default
-    return d.get(key_or_keys, default)
+    exp = (lambda x: x or None) if skip_false_values else IDENTITY
+    return traverse_obj(d, *variadic(key_or_keys), expected_type=exp, default=default)
 
 
 def try_call(*funcs, **kwargs):

From d9d07a95815a992bf5f876a62f25c831eb3f32ac Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 3 May 2023 12:06:34 +0100
Subject: [PATCH 255/312] [utils] Improve js_to_json, align with yt-dlp *
 support variable substitution, from
 https://github.com/yt-dlp/yt-dlp/pull/#521 etc,   thanks ChillingPepper,
 Grub4k, pukkandan * improve escape handling, from
 https://github.com/yt-dlp/yt-dlp/pull/#521   thanks Grub4k * support template
 strings from https://github.com/yt-dlp/yt-dlp/pull/6623   thanks Grub4k * add
 limited `!` evaluation (eg, !!0 -> false, see tests)

---
 test/test_utils.py  | 103 ++++++++++++++++++++++++++++++++++++++--
 youtube_dl/utils.py | 112 ++++++++++++++++++++++++++++++++++----------
 2 files changed, 186 insertions(+), 29 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index 1b5d170fe..e83977f29 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -905,6 +905,85 @@ class TestUtil(unittest.TestCase):
         )
         self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
 
+    def test_js_to_json_vars_strings(self):
+        self.assertDictEqual(
+            json.loads(js_to_json(
+                '''{
+                    'null': a,
+                    'nullStr': b,
+                    'true': c,
+                    'trueStr': d,
+                    'false': e,
+                    'falseStr': f,
+                    'unresolvedVar': g,
+                }''',
+                {
+                    'a': 'null',
+                    'b': '"null"',
+                    'c': 'true',
+                    'd': '"true"',
+                    'e': 'false',
+                    'f': '"false"',
+                    'g': 'var',
+                }
+            )),
+            {
+                'null': None,
+                'nullStr': 'null',
+                'true': True,
+                'trueStr': 'true',
+                'false': False,
+                'falseStr': 'false',
+                'unresolvedVar': 'var'
+            }
+        )
+
+        self.assertDictEqual(
+            json.loads(js_to_json(
+                '''{
+                    'int': a,
+                    'intStr': b,
+                    'float': c,
+                    'floatStr': d,
+                }''',
+                {
+                    'a': '123',
+                    'b': '"123"',
+                    'c': '1.23',
+                    'd': '"1.23"',
+                }
+            )),
+            {
+                'int': 123,
+                'intStr': '123',
+                'float': 1.23,
+                'floatStr': '1.23',
+            }
+        )
+
+        self.assertDictEqual(
+            json.loads(js_to_json(
+                '''{
+                    'object': a,
+                    'objectStr': b,
+                    'array': c,
+                    'arrayStr': d,
+                }''',
+                {
+                    'a': '{}',
+                    'b': '"{}"',
+                    'c': '[]',
+                    'd': '"[]"',
+                }
+            )),
+            {
+                'object': {},
+                'objectStr': '{}',
+                'array': [],
+                'arrayStr': '[]',
+            }
+        )
+
     def test_js_to_json_realworld(self):
         inp = '''{
             'clip':{'provider':'pseudo'}
@@ -975,10 +1054,10 @@ class TestUtil(unittest.TestCase):
             !42: 42
         }''')
         self.assertEqual(json.loads(on), {
-            'a': 0,
-            'b': 1,
-            'c': 0,
-            'd': 42.42,
+            'a': True,
+            'b': False,
+            'c': False,
+            'd': True,
             'e': [],
             'f': "abc",
             'g': "",
@@ -1048,10 +1127,26 @@ class TestUtil(unittest.TestCase):
         on = js_to_json('{ "040": "040" }')
         self.assertEqual(json.loads(on), {'040': '040'})
 
+        on = js_to_json('[1,//{},\n2]')
+        self.assertEqual(json.loads(on), [1, 2])
+
+        on = js_to_json(r'"\^\$\#"')
+        self.assertEqual(json.loads(on), R'^$#', msg='Unnecessary escapes should be stripped')
+
+        on = js_to_json('\'"\\""\'')
+        self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
+
     def test_js_to_json_malformed(self):
         self.assertEqual(js_to_json('42a1'), '42"a1"')
         self.assertEqual(js_to_json('42a-1'), '42"a"-1')
 
+    def test_js_to_json_template_literal(self):
+        self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"')
+        self.assertEqual(js_to_json('`${name}${name}`', {'name': '"X"'}), '"XX"')
+        self.assertEqual(js_to_json('`${name}${name}`', {'name': '5'}), '"55"')
+        self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
+        self.assertEqual(js_to_json('`${name}`', {}), '"name"')
+
     def test_extract_attributes(self):
         self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
         self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index b77a7fb0e..b05f65283 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4365,46 +4365,108 @@ def strip_jsonp(code):
         r'\g<callback_data>', code)
 
 
-def js_to_json(code):
-    COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
+def js_to_json(code, *args, **kwargs):
+
+    # vars is a dict of (var, val) pairs to substitute
+    vars = args[0] if len(args) > 0 else kwargs.get('vars', {})
+    strict = kwargs.get('strict', False)
+
+    STRING_QUOTES = '\'"`'
+    STRING_RE = '|'.join(r'{0}(?:\\.|[^\\{0}])*{0}'.format(q) for q in STRING_QUOTES)
+    COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
     INTEGER_TABLE = (
         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
+        (r'(?s)^(\d+){skip}:?$'.format(skip=SKIP_RE), 10),
     )
+    # compat candidate
+    JSONDecodeError = json.JSONDecodeError if 'JSONDecodeError' in dir(json) else ValueError
+
+    def process_escape(match):
+        JSON_PASSTHROUGH_ESCAPES = r'"\bfnrtu'
+        escape = match.group(1) or match.group(2)
+
+        return ('\\' + escape if escape in JSON_PASSTHROUGH_ESCAPES
+                else '\\u00' if escape == 'x'
+                else '' if escape == '\n'
+                else escape)
+
+    def template_substitute(match):
+        evaluated = js_to_json(match.group(1), vars, strict=strict)
+        if evaluated[0] == '"':
+            return json.loads(evaluated)
+        return evaluated
 
     def fix_kv(m):
         v = m.group(0)
         if v in ('true', 'false', 'null'):
             return v
-        elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
-            return ""
+        elif v in ('undefined', 'void 0'):
+            return 'null'
+        elif v.startswith('/*') or v.startswith('//') or v == ',':
+            return ''
 
-        if v[0] in ("'", '"'):
-            v = re.sub(r'(?s)\\.|"', lambda m: {
-                '"': '\\"',
-                "\\'": "'",
-                '\\\n': '',
-                '\\x': '\\u00',
-            }.get(m.group(0), m.group(0)), v[1:-1])
-        else:
-            for regex, base in INTEGER_TABLE:
-                im = re.match(regex, v)
-                if im:
-                    i = int(im.group(1), base)
-                    return '"%d":' % i if v.endswith(':') else '%d' % i
+        if v[0] in STRING_QUOTES:
+            v = re.sub(r'(?s)\${([^}]+)}', template_substitute, v[1:-1]) if v[0] == '`' else v[1:-1]
+            escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v)
+            return '"{0}"'.format(escaped)
 
-        return '"%s"' % v
+        inv = IDENTITY
+        im = re.split(r'^!+', v)
+        if len(im) > 1 and not im[-1].endswith(':'):
+            if (len(v) - len(im[1])) % 2 == 1:
+                inv = lambda x: 'true' if x == 0 else 'false'
+            else:
+                inv = lambda x: 'false' if x == 0 else 'true'
+        if not any(x for x in im):
+            return
+        v = im[-1]
+
+        for regex, base in INTEGER_TABLE:
+            im = re.match(regex, v)
+            if im:
+                i = int(im.group(1), base)
+                return ('"%s":' if v.endswith(':') else '%s') % inv(i)
+
+        if v in vars:
+            try:
+                if not strict:
+                    json.loads(vars[v])
+            except JSONDecodeError:
+                return inv(json.dumps(vars[v]))
+            else:
+                return inv(vars[v])
+
+        if not strict:
+            v = try_call(inv, args=(v,), default=v)
+            if v in ('true', 'false'):
+                return v
+            return '"{0}"'.format(v)
+
+        raise ValueError('Unknown value: ' + v)
+
+    def create_map(mobj):
+        return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
+
+    code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
+    if not strict:
+        code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
+        code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
+        code = re.sub(r'parseInt\([^\d]+(\d+)[^\d]+\)', r'\1', code)
+        code = re.sub(r'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^)]*["\'])\s*\)', r'\1', code)
 
     return re.sub(r'''(?sx)
-        "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
-        '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
-        {comment}|,(?={skip}[\]}}])|
-        (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
-        \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
-        [0-9]+(?={skip}:)|
+        {str_}|
+        {comment}|
+        ,(?={skip}[\]}}])|
+        void\s0|
+        !*(?:(?<!\d)[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
+        (?:\b|!+)0(?:[xX][\da-fA-F]+|[0-7]+)(?:{skip}:)?|
+        !+\d+(?:\.\d*)?(?:{skip}:)?|
+        [0-9]+(?:{skip}:)|
         !+
-        '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
+        '''.format(comment=COMMENT_RE, skip=SKIP_RE, str_=STRING_RE), fix_kv, code)
 
 
 def qualities(quality_ids):

From cb9366eda584fde2421140adf994eadc5bb6b943 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 3 May 2023 10:54:52 +0100
Subject: [PATCH 256/312] [utils] Minor updates (merge_dicts, T)

A couple of mods to ease yt-dlp back-ports:
* add kwargs to merge_dicts:
  `unblank=True` (disallow empty string), `rev=False` (reverse the merge list)
* add `T(x)` shortcut for `{x}`, unsupported in Py2.6
---
 youtube_dl/utils.py | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index b05f65283..0cbbec0f3 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4269,7 +4269,8 @@ def variadic(x, allowed_types=NO_DEFAULT):
 
 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
     exp = (lambda x: x or None) if skip_false_values else IDENTITY
-    return traverse_obj(d, *variadic(key_or_keys), expected_type=exp, default=default)
+    return traverse_obj(d, *variadic(key_or_keys), expected_type=exp,
+                        default=default, get_all=False)
 
 
 def try_call(*funcs, **kwargs):
@@ -4302,16 +4303,38 @@ def try_get(src, getter, expected_type=None):
                 return v
 
 
-def merge_dicts(*dicts):
+def merge_dicts(*dicts, **kwargs):
+    """
+        Merge the `dict`s in `dicts` using the first valid value for each key.
+        Normally valid: not None and not an empty string
+
+        Keyword-only args:
+        unblank:    allow empty string if False (default True)
+        rev:        merge dicts in reverse order (default False)
+
+        merge_dicts(dct1, dct2, ..., unblank=False, rev=True)
+        matches {**dct1, **dct2, ...}
+
+        However, merge_dicts(dct1, dct2, ..., rev=True) may often be better.
+    """
+
+    unblank = kwargs.get('unblank', True)
+    rev = kwargs.get('rev', False)
+
+    if unblank:
+        def can_merge_str(k, v, to_dict):
+            return (isinstance(v, compat_str) and v
+                    and isinstance(to_dict[k], compat_str)
+                    and not to_dict[k])
+    else:
+        can_merge_str = lambda k, v, to_dict: False
+
     merged = {}
-    for a_dict in dicts:
+    for a_dict in reversed(dicts) if rev else dicts:
         for k, v in a_dict.items():
             if v is None:
                 continue
-            if (k not in merged
-                    or (isinstance(v, compat_str) and v
-                        and isinstance(merged[k], compat_str)
-                        and not merged[k])):
+            if (k not in merged) or can_merge_str(k, v, merged):
                 merged[k] = v
     return merged
 

From 1e8ccdd2eb77901e18feb8a9d48e62d11651cd1e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 3 May 2023 13:08:58 +0100
Subject: [PATCH 257/312] [InfoExtractor] Support groups in _`search_regex()`,
 etc

---
 youtube_dl/extractor/common.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 7244e5df6..dbdf456f5 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1005,6 +1005,8 @@ class InfoExtractor(object):
             if group is None:
                 # return the first matching group
                 return next(g for g in mobj.groups() if g is not None)
+            elif isinstance(group, (list, tuple)):
+                return tuple(mobj.group(g) for g in group)
             else:
                 return mobj.group(group)
         elif default is not NO_DEFAULT:
@@ -1020,10 +1022,9 @@ class InfoExtractor(object):
         Like _search_regex, but strips HTML tags and unescapes entities.
         """
         res = self._search_regex(pattern, string, name, default, fatal, flags, group)
-        if res:
-            return clean_html(res).strip()
-        else:
-            return res
+        if isinstance(res, tuple):
+            return tuple(map(clean_html, res))
+        return clean_html(res)
 
     def _get_netrc_login_info(self, netrc_machine=None):
         username = None

From 4566e6e53ebd87c6c548a8414ab5bd742c14c2b0 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 3 May 2023 10:02:25 +0100
Subject: [PATCH 258/312] [GlobalPlayer] Add site extractors back-ported from
 yt-dlp

* from https://github.com/yt-dlp/yt-dlp/pull/6903, thanks garret1317
---
 youtube_dl/extractor/extractors.py   |  15 +-
 youtube_dl/extractor/globalplayer.py | 285 +++++++++++++++++++++++++++
 2 files changed, 296 insertions(+), 4 deletions(-)
 create mode 100644 youtube_dl/extractor/globalplayer.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 3a87f9e33..811a2605a 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -444,6 +444,13 @@ from .gfycat import GfycatIE
 from .giantbomb import GiantBombIE
 from .giga import GigaIE
 from .glide import GlideIE
+from .globalplayer import (
+    GlobalPlayerLiveIE,
+    GlobalPlayerLivePlaylistIE,
+    GlobalPlayerAudioIE,
+    GlobalPlayerAudioEpisodeIE,
+    GlobalPlayerVideoIE
+)
 from .globo import (
     GloboIE,
     GloboArticleIE,
@@ -975,6 +982,10 @@ from .pornhub import (
 from .pornotube import PornotubeIE
 from .pornovoisines import PornoVoisinesIE
 from .pornoxo import PornoXOIE
+from .pr0gramm import (
+    Pr0grammIE,
+    Pr0grammStaticIE,
+)
 from .puhutv import (
     PuhuTVIE,
     PuhuTVSerieIE,
@@ -1678,7 +1689,3 @@ from .zingmp3 import (
 )
 from .zoom import ZoomIE
 from .zype import ZypeIE
-from .pr0gramm import (
-    Pr0grammIE,
-    Pr0grammStaticIE,
-)
diff --git a/youtube_dl/extractor/globalplayer.py b/youtube_dl/extractor/globalplayer.py
new file mode 100644
index 000000000..cceab9e6a
--- /dev/null
+++ b/youtube_dl/extractor/globalplayer.py
@@ -0,0 +1,285 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    join_nonempty,
+    merge_dicts,
+    parse_duration,
+    str_or_none,
+    T,
+    traverse_obj,
+    unified_strdate,
+    unified_timestamp,
+    urlhandle_detect_ext,
+)
+
+
+class GlobalPlayerBaseIE(InfoExtractor):
+
+    import re
+
+    @classmethod
+    def _match_valid_url(cls, url):
+        return cls.re.match(cls._VALID_URL, url)
+
+    def _search_nextjs_data(self, webpage, video_id, **kw):
+        return self._parse_json(
+            self._search_regex(
+                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
+                webpage, 'next.js data', **kw),
+            video_id, **kw)
+
+    def _get_page_props(self, url, video_id):
+        webpage = self._download_webpage(url, video_id)
+        return self._search_nextjs_data(webpage, video_id)['props']['pageProps']
+
+    def _request_ext(self, url, video_id):
+        return urlhandle_detect_ext(self._request_webpage(  # Server rejects HEAD requests
+            url, video_id, note='Determining source extension'))
+
+    def _extract_audio(self, episode, series):
+
+        def clean_desc(x):
+            x = clean_html(x)
+            if x:
+                x = x.replace('\xa0', ' ')
+            return x
+
+        return merge_dicts({
+            'vcodec': 'none',
+        }, traverse_obj(series, {
+            'series': 'title',
+            'series_id': 'id',
+            'thumbnail': 'imageUrl',
+            'uploader': 'itunesAuthor',  # podcasts only
+        }), traverse_obj(episode, {
+            'id': 'id',
+            'description': ('description', T(clean_desc)),
+            'duration': ('duration', T(parse_duration)),
+            'thumbnail': 'imageUrl',
+            'url': 'streamUrl',
+            'timestamp': (('pubDate', 'startDate'), T(unified_timestamp)),
+            'title': 'title',
+        }, get_all=False), rev=True)
+
+
+class GlobalPlayerLiveIE(GlobalPlayerBaseIE):
+    _VALID_URL = r'https?://www\.globalplayer\.com/live/(?P<id>\w+)/\w+'
+    _TESTS = [{
+        'url': 'https://www.globalplayer.com/live/smoothchill/uk/',
+        'info_dict': {
+            'id': '2mx1E',
+            'ext': 'aac',
+            'display_id': 'smoothchill-uk',
+            'title': 're:^Smooth Chill.+$',
+            'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png',
+            'description': 'Music To Chill To',
+            # 'live_status': 'is_live',
+            'is_live': True,
+        },
+    }, {
+        # national station
+        'url': 'https://www.globalplayer.com/live/heart/uk/',
+        'info_dict': {
+            'id': '2mwx4',
+            'ext': 'aac',
+            'description': 'turn up the feel good!',
+            'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
+            # 'live_status': 'is_live',
+            'is_live': True,
+            'title': 're:^Heart UK.+$',
+            'display_id': 'heart-uk',
+        },
+    }, {
+        # regional variation
+        'url': 'https://www.globalplayer.com/live/heart/london/',
+        'info_dict': {
+            'id': 'AMqg',
+            'ext': 'aac',
+            'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
+            'title': 're:^Heart London.+$',
+            # 'live_status': 'is_live',
+            'is_live': True,
+            'display_id': 'heart-london',
+            'description': 'turn up the feel good!',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        station = self._get_page_props(url, video_id)['station']
+        stream_url = station['streamUrl']
+
+        return merge_dicts({
+            'id': station['id'],
+            'display_id': (
+                join_nonempty('brandSlug', 'slug', from_dict=station)
+                or station.get('legacyStationPrefix')),
+            'url': stream_url,
+            'ext': self._request_ext(stream_url, video_id),
+            'vcodec': 'none',
+            'is_live': True,
+        }, {
+            'title': self._live_title(traverse_obj(
+                station, (('name', 'brandName'), T(str_or_none)),
+                get_all=False)),
+        }, traverse_obj(station, {
+            'description': 'tagline',
+            'thumbnail': 'brandLogo',
+        }), rev=True)
+
+
+class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
+    _VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P<id>\w+)'
+    _TESTS = [{
+        # "live playlist"
+        'url': 'https://www.globalplayer.com/playlists/8bLk/',
+        'info_dict': {
+            'id': '8bLk',
+            'ext': 'aac',
+            # 'live_status': 'is_live',
+            'is_live': True,
+            'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d',
+            'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=',
+            'title': 're:^Classic FM Hall of Fame.+$'
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        station = self._get_page_props(url, video_id)['playlistData']
+        stream_url = station['streamUrl']
+
+        return merge_dicts({
+            'id': video_id,
+            'url': stream_url,
+            'ext': self._request_ext(stream_url, video_id),
+            'vcodec': 'none',
+            'is_live': True,
+        }, traverse_obj(station, {
+            'title': 'title',
+            'description': 'description',
+            'thumbnail': 'image',
+        }), rev=True)
+
+
+class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
+    _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)/|catchup/\w+/\w+/)(?P<id>\w+)/?(?:$|[?#])'
+    _TESTS = [{
+        # podcast
+        'url': 'https://www.globalplayer.com/podcasts/42KuaM/',
+        'playlist_mincount': 5,
+        'info_dict': {
+            'id': '42KuaM',
+            'title': 'Filthy Ritual',
+            'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
+            'categories': ['Society & Culture', 'True Crime'],
+            'uploader': 'Global',
+            'description': 'md5:da5b918eac9ae319454a10a563afacf9',
+        },
+    }, {
+        # radio catchup
+        'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/',
+        'playlist_mincount': 2,
+        'info_dict': {
+            'id': '46vyD7z',
+            'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
+            'title': 'Nick Ferrari',
+            'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
+        props = self._get_page_props(url, video_id)
+        series = props['podcastInfo'] if podcast else props['catchupInfo']
+
+        return merge_dicts({
+            '_type': 'playlist',
+            'id': video_id,
+            'entries': [self._extract_audio(ep, series) for ep in traverse_obj(
+                        series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))],
+            'categories': traverse_obj(series, ('categories', Ellipsis, 'name')) or None,
+        }, traverse_obj(series, {
+            'description': 'description',
+            'thumbnail': 'imageUrl',
+            'title': 'title',
+            'uploader': 'itunesAuthor',  # podcasts only
+        }), rev=True)
+
+
+class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE):
+    _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)|catchup/\w+/\w+)/episodes/(?P<id>\w+)/?(?:$|[?#])'
+    _TESTS = [{
+        # podcast
+        'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/',
+        'info_dict': {
+            'id': '7DrfNnE',
+            'ext': 'mp3',
+            'title': 'Filthy Ritual - Trailer',
+            'description': 'md5:1f1562fd0f01b4773b590984f94223e0',
+            'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
+            'duration': 225.0,
+            'timestamp': 1681254900,
+            'series': 'Filthy Ritual',
+            'series_id': '42KuaM',
+            'upload_date': '20230411',
+            'uploader': 'Global',
+        },
+    }, {
+        # radio catchup
+        'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/',
+        'only_matching': True,
+        # expired: refresh the details with a current show for a full test
+        'info_dict': {
+            'id': '2zGq26Vcv1fCWhddC4JAwETXWe',
+            'ext': 'm4a',
+            'timestamp': 1682056800,
+            'series': 'Nick Ferrari',
+            'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
+            'upload_date': '20230421',
+            'series_id': '46vyD7z',
+            'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
+            'title': 'Nick Ferrari',
+            'duration': 10800.0,
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
+        props = self._get_page_props(url, video_id)
+        episode = props['podcastEpisode'] if podcast else props['catchupEpisode']
+
+        return self._extract_audio(
+            episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {})
+
+
+class GlobalPlayerVideoIE(GlobalPlayerBaseIE):
+    _VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P<id>\w+)'
+    _TESTS = [{
+        'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/',
+        'info_dict': {
+            'id': '2JsSZ7Gm2uP',
+            'ext': 'mp4',
+            'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd',
+            'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550',
+            'upload_date': '20230420',
+            'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        meta = self._get_page_props(url, video_id)['videoData']
+
+        return merge_dicts({
+            'id': video_id,
+        }, traverse_obj(meta, {
+            'url': 'url',
+            'thumbnail': ('image', 'url'),
+            'title': 'title',
+            'upload_date': ('publish_date', T(unified_strdate)),
+            'description': 'description',
+        }), rev=True)

From eaaf4c6736b98e20a1923162ae05952c8cb51ee1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 3 May 2023 10:04:46 +0100
Subject: [PATCH 259/312] [Whyp] Add extractor back-ported from yt-dlp

* from https://github.com/yt-dlp/yt-dlp/pull/6803, thanks CoryTibbettsDev
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/whyp.py       | 78 ++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)
 create mode 100644 youtube_dl/extractor/whyp.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 811a2605a..9f247dbbf 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1576,6 +1576,7 @@ from .weibo import (
     WeiboMobileIE
 )
 from .weiqitv import WeiqiTVIE
+from .whyp import WhypIE
 from .wistia import (
     WistiaIE,
     WistiaPlaylistIE,
diff --git a/youtube_dl/extractor/whyp.py b/youtube_dl/extractor/whyp.py
new file mode 100644
index 000000000..16f9154ad
--- /dev/null
+++ b/youtube_dl/extractor/whyp.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    merge_dicts,
+    str_or_none,
+    T,
+    traverse_obj,
+    url_or_none,
+)
+
+
+class WhypIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?whyp\.it/tracks/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.whyp.it/tracks/18337/home-page-example-track-b4kq7',
+        'md5': 'c1187b42ebf8605284e3dc92aeb33d16',
+        'info_dict': {
+            'url': 'https://cdn.whyp.it/50eb17cc-e9ff-4e18-b89b-dc9206a95cb1.mp3',
+            'id': '18337',
+            'title': 'Home Page Example Track',
+            'description': 'md5:bd758000fb93f3159339c852b5b9133c',
+            'ext': 'mp3',
+            'duration': 52.82,
+            'uploader': 'Brad',
+            'uploader_id': '1',
+            'thumbnail': 'https://cdn.whyp.it/a537bb36-3373-4c61-96c8-27fc1b2f427a.jpg',
+        },
+    }, {
+        'url': 'https://www.whyp.it/tracks/18337',
+        'only_matching': True,
+    }]
+
+    def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', fatal=True, traverse=('data', 0)):
+        """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
+
+        import functools
+        import json
+        import re
+        from ..utils import (js_to_json, NO_DEFAULT)
+
+        re_ctx = re.escape(context_name)
+        FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
+        js, arg_keys, arg_vals = self._search_regex(
+            (p.format(re_ctx, FUNCTION_RE) for p in (r'<script>\s*window\.{0}={1}\s*\)\s*;?\s*</script>', r'{0}\(.*?{1}')),
+            webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
+            default=NO_DEFAULT if fatal else (None, None, None))
+        if js is None:
+            return {}
+
+        args = dict(zip(arg_keys.split(','), map(json.dumps, self._parse_json(
+            '[{0}]'.format(arg_vals), video_id, transform_source=js_to_json, fatal=fatal) or ())))
+
+        ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
+        return traverse_obj(ret, traverse) or {}
+
+    def _real_extract(self, url):
+        unique_id = self._match_id(url)
+        webpage = self._download_webpage(url, unique_id)
+        data = self._search_nuxt_data(webpage, unique_id)['rawTrack']
+
+        return merge_dicts({
+            'url': data['audio_url'],
+            'id': unique_id,
+        }, traverse_obj(data, {
+            'title': 'title',
+            'description': 'description',
+            'duration': ('duration', T(float_or_none)),
+            'uploader': ('user', 'username'),
+            'uploader_id': ('user', 'id', T(str_or_none)),
+            'thumbnail': ('artwork_url', T(url_or_none)),
+        }), {
+            'ext': 'mp3',
+            'vcodec': 'none',
+            'http_headers': {'Referer': 'https://whyp.it/'},
+        }, rev=True)

From 4339910df3fe97a054069cb98da594dd1b50c13a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 3 May 2023 10:07:35 +0100
Subject: [PATCH 260/312] [DLF] Add site extractors back-ported from yt-dlp

* from https://github.com/yt-dlp/yt-dlp/pull/6697, thanks nick-cd
---
 youtube_dl/extractor/dlf.py        | 204 +++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |   4 +
 2 files changed, 208 insertions(+)
 create mode 100644 youtube_dl/extractor/dlf.py

diff --git a/youtube_dl/extractor/dlf.py b/youtube_dl/extractor/dlf.py
new file mode 100644
index 000000000..cc3de4582
--- /dev/null
+++ b/youtube_dl/extractor/dlf.py
@@ -0,0 +1,204 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+)
+from ..utils import (
+    determine_ext,
+    extract_attributes,
+    int_or_none,
+    merge_dicts,
+    traverse_obj,
+    url_or_none,
+    variadic,
+)
+
+
+class DLFBaseIE(InfoExtractor):
+    _VALID_URL_BASE = r'https?://(?:www\.)?deutschlandfunk\.de/'
+    _BUTTON_REGEX = r'(<button[^>]+alt="Anhören"[^>]+data-audio-diraid[^>]*>)'
+
+    def _parse_button_attrs(self, button, audio_id=None):
+        attrs = extract_attributes(button)
+        audio_id = audio_id or attrs['data-audio-diraid']
+
+        url = traverse_obj(
+            attrs, 'data-audio-download-src', 'data-audio', 'data-audioreference',
+            'data-audio-src', expected_type=url_or_none)
+        ext = determine_ext(url)
+        formats = (self._extract_m3u8_formats(url, audio_id, fatal=False)
+                   if ext == 'm3u8' else [{'url': url, 'ext': ext, 'vcodec': 'none'}])
+        self._sort_formats(formats)
+
+        def traverse_attrs(path):
+            path = list(variadic(path))
+            t = path.pop() if callable(path[-1]) else None
+            return traverse_obj(attrs, path, expected_type=t, get_all=False)
+
+        def txt_or_none(v, default=None):
+            return default if v is None else (compat_str(v).strip() or default)
+
+        return merge_dicts(*reversed([{
+            'id': audio_id,
+            # 'extractor_key': DLFIE.ie_key(),
+            # 'extractor': DLFIE.IE_NAME,
+            'formats': formats,
+        }, dict((k, traverse_attrs(v)) for k, v in {
+            'title': (('data-audiotitle', 'data-audio-title', 'data-audio-download-tracking-title'), txt_or_none),
+            'duration': (('data-audioduration', 'data-audio-duration'), int_or_none),
+            'thumbnail': ('data-audioimage', url_or_none),
+            'uploader': 'data-audio-producer',
+            'series': 'data-audio-series',
+            'channel': 'data-audio-origin-site-name',
+            'webpage_url': ('data-audio-download-tracking-path', url_or_none),
+        }.items())]))
+
+
+class DLFIE(DLFBaseIE):
+    IE_NAME = 'dlf'
+    _VALID_URL = DLFBaseIE._VALID_URL_BASE + r'[\w-]+-dlf-(?P<id>[\da-f]{8})-100\.html'
+    _TESTS = [
+        # Audio as an HLS stream
+        {
+            'url': 'https://www.deutschlandfunk.de/tanz-der-saiteninstrumente-das-wild-strings-trio-aus-slowenien-dlf-03a3eb19-100.html',
+            'info_dict': {
+                'id': '03a3eb19',
+                'title': r're:Tanz der Saiteninstrumente [-/] Das Wild Strings Trio aus Slowenien',
+                'ext': 'm4a',
+                'duration': 3298,
+                'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
+                'uploader': 'Deutschlandfunk',
+                'series': 'On Stage',
+                'channel': 'deutschlandfunk'
+            },
+            'params': {
+                'skip_download': 'm3u8'
+            },
+            'skip': 'This webpage no longer exists'
+        }, {
+            'url': 'https://www.deutschlandfunk.de/russische-athleten-kehren-zurueck-auf-die-sportbuehne-ein-gefaehrlicher-tueroeffner-dlf-d9cc1856-100.html',
+            'info_dict': {
+                'id': 'd9cc1856',
+                'title': 'Russische Athleten kehren zurück auf die Sportbühne: Ein gefährlicher Türöffner',
+                'ext': 'mp3',
+                'duration': 291,
+                'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
+                'uploader': 'Deutschlandfunk',
+                'series': 'Kommentare und Themen der Woche',
+                'channel': 'deutschlandfunk'
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        audio_id = self._match_id(url)
+        webpage = self._download_webpage(url, audio_id)
+
+        return self._parse_button_attrs(
+            self._search_regex(self._BUTTON_REGEX, webpage, 'button'), audio_id)
+
+
+class DLFCorpusIE(DLFBaseIE):
+    IE_NAME = 'dlf:corpus'
+    IE_DESC = 'DLF Multi-feed Archives'
+    _VALID_URL = DLFBaseIE._VALID_URL_BASE + r'(?P<id>(?![\w-]+-dlf-[\da-f]{8})[\w-]+-\d+)\.html'
+    _TESTS = [
+        # Recorded news broadcast with referrals to related broadcasts
+        {
+            'url': 'https://www.deutschlandfunk.de/fechten-russland-belarus-ukraine-protest-100.html',
+            'info_dict': {
+                'id': 'fechten-russland-belarus-ukraine-protest-100',
+                'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
+                'description': 'md5:91340aab29c71aa7518ad5be13d1e8ad'
+            },
+            'playlist_mincount': 5,
+            'playlist': [{
+                'info_dict': {
+                    'id': '1fc5d64a',
+                    'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
+                    'ext': 'mp3',
+                    'duration': 252,
+                    'thumbnail': 'https://assets.deutschlandfunk.de/aad16241-6b76-4a09-958b-96d0ee1d6f57/512x512.jpg?t=1679480020313',
+                    'uploader': 'Deutschlandfunk',
+                    'series': 'Sport',
+                    'channel': 'deutschlandfunk'
+                }
+            }, {
+                'info_dict': {
+                    'id': '2ada145f',
+                    'title': r're:(?:Sportpolitik / )?Fechtverband votiert für Rückkehr russischer Athleten',
+                    'ext': 'mp3',
+                    'duration': 336,
+                    'thumbnail': 'https://assets.deutschlandfunk.de/FILE_93982766f7317df30409b8a184ac044a/512x512.jpg?t=1678547581005',
+                    'uploader': 'Deutschlandfunk',
+                    'series': 'Deutschlandfunk Nova',
+                    'channel': 'deutschlandfunk-nova'
+                }
+            }, {
+                'info_dict': {
+                    'id': '5e55e8c9',
+                    'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
+                    'ext': 'mp3',
+                    'duration': 187,
+                    'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
+                    'uploader': 'Deutschlandfunk',
+                    'series': 'Sport am Samstag',
+                    'channel': 'deutschlandfunk'
+                }
+            }, {
+                'info_dict': {
+                    'id': '47e1a096',
+                    'title': r're:Rückkehr Russlands im Fechten [-/] "Fassungslos, dass es einfach so passiert ist"',
+                    'ext': 'mp3',
+                    'duration': 602,
+                    'thumbnail': 'https://assets.deutschlandfunk.de/da4c494a-21cc-48b4-9cc7-40e09fd442c2/512x512.jpg?t=1678562155770',
+                    'uploader': 'Deutschlandfunk',
+                    'series': 'Sport am Samstag',
+                    'channel': 'deutschlandfunk'
+                }
+            }, {
+                'info_dict': {
+                    'id': '5e55e8c9',
+                    'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
+                    'ext': 'mp3',
+                    'duration': 187,
+                    'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
+                    'uploader': 'Deutschlandfunk',
+                    'series': 'Sport am Samstag',
+                    'channel': 'deutschlandfunk'
+                }
+            }]
+        },
+        # Podcast feed with tag buttons, playlist count fluctuates
+        {
+            'url': 'https://www.deutschlandfunk.de/kommentare-und-themen-der-woche-100.html',
+            'info_dict': {
+                'id': 'kommentare-und-themen-der-woche-100',
+                'title': 'Meinung - Kommentare und Themen der Woche',
+                'description': 'md5:2901bbd65cd2d45e116d399a099ce5d5',
+            },
+            'playlist_mincount': 10,
+        },
+        # Podcast feed with no description
+        {
+            'url': 'https://www.deutschlandfunk.de/podcast-tolle-idee-100.html',
+            'info_dict': {
+                'id': 'podcast-tolle-idee-100',
+                'title': 'Wissenschaftspodcast - Tolle Idee! - Was wurde daraus?',
+            },
+            'playlist_mincount': 11,
+        },
+    ]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+
+        return self.playlist_result(
+            map(self._parse_button_attrs, re.findall(self._BUTTON_REGEX, webpage)),
+            playlist_id, self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
+            self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage, default=None))
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 9f247dbbf..be73c0665 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -295,6 +295,10 @@ from .dbtv import DBTVIE
 from .dctp import DctpTvIE
 from .deezer import DeezerPlaylistIE
 from .democracynow import DemocracynowIE
+from .dlf import (
+    DLFCorpusIE,
+    DLFIE,
+)
 from .dfb import DFBIE
 from .dhm import DHMIE
 from .digg import DiggIE

From 846522204104e3078c597fa1872465024a684ad6 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 4 May 2023 00:08:26 +0100
Subject: [PATCH 261/312] [Clipchamp] Add new extractor back-ported from yt-dlp

---
 youtube_dl/extractor/clipchamp.py  | 76 ++++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |  1 +
 2 files changed, 77 insertions(+)
 create mode 100644 youtube_dl/extractor/clipchamp.py

diff --git a/youtube_dl/extractor/clipchamp.py b/youtube_dl/extractor/clipchamp.py
new file mode 100644
index 000000000..5a732e808
--- /dev/null
+++ b/youtube_dl/extractor/clipchamp.py
@@ -0,0 +1,76 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    merge_dicts,
+    T,
+    traverse_obj,
+    unified_timestamp,
+    url_or_none,
+)
+
+
+class ClipchampIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU',
+        'info_dict': {
+            'id': 'gRXZ4ZhdDaU',
+            'ext': 'mp4',
+            'title': 'Untitled video',
+            'uploader': 'Alexander Schwartz',
+            'timestamp': 1680805580,
+            'upload_date': '20230406',
+            'thumbnail': r're:^https?://.+\.jpg',
+        },
+        'params': {
+            'skip_download': 'm3u8',
+            'format': 'bestvideo',
+        },
+    }]
+
+    _STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
+    _STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
+
+    def _search_nextjs_data(self, webpage, video_id, **kw):
+        return self._parse_json(
+            self._search_regex(
+                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
+                webpage, 'next.js data', **kw),
+            video_id, **kw)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
+
+        storage_location = data.get('storage_location')
+        if storage_location != 'cf_stream':
+            raise ExtractorError('Unsupported clip storage location "%s"' % (storage_location,))
+
+        path = data['download_url']
+        iframe = self._download_webpage(
+            'https://iframe.cloudflarestream.com/' + path, video_id, 'Downloading player iframe')
+        subdomain = self._search_regex(
+            r'''\bcustomer-domain-prefix\s*=\s*("|')(?P<sd>[\w-]+)\1''', iframe,
+            'subdomain', group='sd', fatal=False) or 'customer-2ut9yn3y6fta1yxe'
+
+        formats = self._extract_mpd_formats(
+            self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id,
+            query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash')
+        formats.extend(self._extract_m3u8_formats(
+            self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4',
+            query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls'))
+
+        return merge_dicts({
+            'id': video_id,
+            'formats': formats,
+            'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), T(compat_str)))) or None,
+        }, traverse_obj(data, {
+            'title': ('project', 'project_name', T(compat_str)),
+            'timestamp': ('created_at', T(unified_timestamp)),
+            'thumbnail': ('thumbnail_url', T(url_or_none)),
+        }), rev=True)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index be73c0665..42b009ef5 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -226,6 +226,7 @@ from .ciscolive import (
     CiscoLiveSearchIE,
 )
 from .cjsw import CJSWIE
+from .clipchamp import ClipchampIE
 from .cliphunter import CliphunterIE
 from .clippit import ClippitIE
 from .cliprs import ClipRsIE

From b2741f2654e6ddfebc1771b5d5fadb5fd6fe3863 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 5 May 2023 19:25:42 +0100
Subject: [PATCH 262/312] [InfoExtractor] Add search methods for Next/Nuxt.js
 from yt-dlp * add _search_nextjs_data(), from
 https://github.com/yt-dlp/yt-dlp/pull/1386   thanks selfisekai * add
 _search_nuxt_data(), from https://github.com/yt-dlp/yt-dlp/pull/1921,  
 thanks Lesmiscore, pukkandan * add tests for the above * also fix HTML5 type
 recognition and tests, from  
 https://github.com/yt-dlp/yt-dlp/commit/222a230871fe4fe63f35c49590379c9a77116819,
   thanks Lesmiscore * update extractors in PR using above, fix tests.

---
 test/test_InfoExtractor.py           | 111 +++++++++++++++++++++++++--
 youtube_dl/extractor/clipchamp.py    |   7 --
 youtube_dl/extractor/common.py       |  51 +++++++++++-
 youtube_dl/extractor/globalplayer.py |  32 ++++----
 youtube_dl/extractor/whyp.py         |  25 +-----
 5 files changed, 168 insertions(+), 58 deletions(-)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 6d25441db..34773fbd0 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -7,15 +7,33 @@ import io
 import os
 import sys
 import unittest
+
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
-from youtube_dl.compat import compat_etree_fromstring, compat_http_server
-from youtube_dl.extractor.common import InfoExtractor
-from youtube_dl.extractor import YoutubeIE, get_info_extractor
-from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
 import threading
 
+from test.helper import (
+    expect_dict,
+    expect_value,
+    FakeYDL,
+    http_server_port,
+)
+from youtube_dl.compat import (
+    compat_etree_fromstring,
+    compat_http_server,
+)
+from youtube_dl.extractor.common import InfoExtractor
+from youtube_dl.extractor import (
+    get_info_extractor,
+    YoutubeIE,
+)
+from youtube_dl.utils import (
+    encode_data_uri,
+    ExtractorError,
+    RegexNotFoundError,
+    strip_jsonp,
+)
+
 
 TEAPOT_RESPONSE_STATUS = 418
 TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
@@ -100,6 +118,71 @@ class TestInfoExtractor(unittest.TestCase):
         self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
         self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
 
+    def test_search_nextjs_data(self):
+        html = '''
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv="content-type" content=
+  "text/html; charset=utf-8">
+  <meta name="viewport" content="width=device-width">
+  <title>Test _search_nextjs_data()</title>
+</head>
+<body>
+  <div id="__next">
+    <div style="background-color:#17171E" class="FU" dir="ltr">
+      <div class="sc-93de261d-0 dyzzYE">
+        <div>
+          <header class="HD"></header>
+          <main class="MN">
+            <div style="height:0" class="HT0">
+              <div style="width:NaN%" data-testid=
+              "stream-container" class="WDN"></div>
+            </div>
+          </main>
+        </div>
+        <footer class="sc-6e5faf91-0 dEGaHS"></footer>
+      </div>
+    </div>
+  </div>
+  <script id="__NEXT_DATA__" type="application/json">
+  {"props":{"pageProps":{"video":{"id":"testid"}}}}
+  </script>
+</body>
+</html>
+'''
+        search = self.ie._search_nextjs_data(html, 'testID')
+        self.assertEqual(search['props']['pageProps']['video']['id'], 'testid')
+
+    def test_search_nuxt_data(self):
+        html = '''
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv="content-type" content=
+  "text/html; charset=utf-8">
+  <title>Nuxt.js Test Page</title>
+  <meta name="viewport" content=
+  "width=device-width, initial-scale=1">
+  <meta data-hid="robots" name="robots" content="all">
+</head>
+<body class="BD">
+  <div id="__layout">
+    <h1 class="H1">Example heading</h1>
+    <div class="IN">
+      <p>Decoy text</p>
+    </div>
+  </div>
+  <script>
+  window.__NUXT__=(function(a,b,c,d,e,f,g,h){return {decoy:" default",data:[{track:{id:f,title:g}}]}}(null,null,"c",null,null,"testid","Nuxt.js title",null));
+  </script>
+  <script src="/_nuxt/a12345b.js" defer="defer"></script>
+</body>
+</html>
+'''
+        search = self.ie._search_nuxt_data(html, 'testID')
+        self.assertEqual(search['track']['id'], 'testid')
+
     def test_search_json_ld_realworld(self):
         # https://github.com/ytdl-org/youtube-dl/issues/23306
         expect_dict(
@@ -348,6 +431,24 @@ class TestInfoExtractor(unittest.TestCase):
                 }],
             })
 
+        # from https://0000.studio/
+        # with type attribute but without extension in URL
+        expect_dict(
+            self,
+            self.ie._parse_html5_media_entries(
+                'https://0000.studio',
+                r'''
+                <video src="https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92"
+                    controls="controls" type="video/mp4" preload="metadata" autoplay="autoplay" playsinline class="object-contain">
+                </video>
+                ''', None)[0],
+            {
+                'formats': [{
+                    'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92',
+                    'ext': 'mp4',
+                }],
+            })
+
     def test_extract_jwplayer_data_realworld(self):
         # from http://www.suffolk.edu/sjc/
         expect_dict(
diff --git a/youtube_dl/extractor/clipchamp.py b/youtube_dl/extractor/clipchamp.py
index 5a732e808..3b485eaab 100644
--- a/youtube_dl/extractor/clipchamp.py
+++ b/youtube_dl/extractor/clipchamp.py
@@ -35,13 +35,6 @@ class ClipchampIE(InfoExtractor):
     _STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
     _STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
 
-    def _search_nextjs_data(self, webpage, video_id, **kw):
-        return self._parse_json(
-            self._search_regex(
-                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
-                webpage, 'next.js data', **kw),
-            video_id, **kw)
-
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index dbdf456f5..549781186 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 
 import base64
 import datetime
+import functools
 import hashlib
 import json
 import netrc
@@ -23,6 +24,7 @@ from ..compat import (
     compat_getpass,
     compat_integer_types,
     compat_http_client,
+    compat_map as map,
     compat_os_name,
     compat_str,
     compat_urllib_error,
@@ -31,6 +33,7 @@ from ..compat import (
     compat_urllib_request,
     compat_urlparse,
     compat_xml_parse_error,
+    compat_zip as zip,
 )
 from ..downloader.f4m import (
     get_base_url,
@@ -70,6 +73,7 @@ from ..utils import (
     str_or_none,
     str_to_int,
     strip_or_none,
+    traverse_obj,
     try_get,
     unescapeHTML,
     unified_strdate,
@@ -1349,6 +1353,44 @@ class InfoExtractor(object):
                     break
         return dict((k, v) for k, v in info.items() if v is not None)
 
+    def _search_nextjs_data(self, webpage, video_id, **kw):
+        nkw = dict((k, v) for k, v in kw.items() if k in ('transform_source', 'fatal'))
+        kw.pop('transform_source', None)
+        next_data = self._search_regex(
+            r'''<script[^>]+\bid\s*=\s*('|")__NEXT_DATA__\1[^>]*>(?P<nd>[^<]+)</script>''',
+            webpage, 'next.js data', group='nd', **kw)
+        if not next_data:
+            return {}
+        return self._parse_json(next_data, video_id, **nkw)
+
+    def _search_nuxt_data(self, webpage, video_id, *args, **kwargs):
+        """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
+
+        # self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)
+        context_name = args[0] if len(args) > 0 else kwargs.get('context_name', '__NUXT__')
+        fatal = kwargs.get('fatal', True)
+        traverse = kwargs.get('traverse', ('data', 0))
+
+        re_ctx = re.escape(context_name)
+
+        FUNCTION_RE = (r'\(\s*function\s*\((?P<arg_keys>[\s\S]*?)\)\s*\{\s*'
+                       r'return\s+(?P<js>\{[\s\S]*?})\s*;?\s*}\s*\((?P<arg_vals>[\s\S]*?)\)')
+
+        js, arg_keys, arg_vals = self._search_regex(
+            (p.format(re_ctx, FUNCTION_RE) for p in
+             (r'<script>\s*window\s*\.\s*{0}\s*=\s*{1}\s*\)\s*;?\s*</script>',
+              r'{0}\s*\([\s\S]*?{1}')),
+            webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
+            default=NO_DEFAULT if fatal else (None, None, None))
+        if js is None:
+            return {}
+
+        args = dict(zip(arg_keys.split(','), map(json.dumps, self._parse_json(
+            '[{0}]'.format(arg_vals), video_id, transform_source=js_to_json, fatal=fatal) or ())))
+
+        ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
+        return traverse_obj(ret, traverse) or {}
+
     @staticmethod
     def _hidden_inputs(html):
         html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
@@ -2496,7 +2538,8 @@ class InfoExtractor(object):
                 return f
             return {}
 
-        def _media_formats(src, cur_media_type, type_info={}):
+        def _media_formats(src, cur_media_type, type_info=None):
+            type_info = type_info or {}
             full_url = absolute_url(src)
             ext = type_info.get('ext') or determine_ext(full_url)
             if ext == 'm3u8':
@@ -2514,6 +2557,7 @@ class InfoExtractor(object):
                 formats = [{
                     'url': full_url,
                     'vcodec': 'none' if cur_media_type == 'audio' else None,
+                    'ext': ext,
                 }]
             return is_plain_url, formats
 
@@ -2522,7 +2566,7 @@ class InfoExtractor(object):
         # so we wll include them right here (see
         # https://www.ampproject.org/docs/reference/components/amp-video)
         # For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
-        _MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
+        _MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video(?:-js)?|audio)'
         media_tags = [(media_tag, media_tag_name, media_type, '')
                       for media_tag, media_tag_name, media_type
                       in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
@@ -2540,7 +2584,8 @@ class InfoExtractor(object):
             media_attributes = extract_attributes(media_tag)
             src = strip_or_none(media_attributes.get('src'))
             if src:
-                _, formats = _media_formats(src, media_type)
+                f = parse_content_type(media_attributes.get('type'))
+                _, formats = _media_formats(src, media_type, f)
                 media_info['formats'].extend(formats)
             media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
             if media_content:
diff --git a/youtube_dl/extractor/globalplayer.py b/youtube_dl/extractor/globalplayer.py
index cceab9e6a..db490b141 100644
--- a/youtube_dl/extractor/globalplayer.py
+++ b/youtube_dl/extractor/globalplayer.py
@@ -24,13 +24,6 @@ class GlobalPlayerBaseIE(InfoExtractor):
     def _match_valid_url(cls, url):
         return cls.re.match(cls._VALID_URL, url)
 
-    def _search_nextjs_data(self, webpage, video_id, **kw):
-        return self._parse_json(
-            self._search_regex(
-                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
-                webpage, 'next.js data', **kw),
-            video_id, **kw)
-
     def _get_page_props(self, url, video_id):
         webpage = self._download_webpage(url, video_id)
         return self._search_nextjs_data(webpage, video_id)['props']['pageProps']
@@ -39,13 +32,14 @@ class GlobalPlayerBaseIE(InfoExtractor):
         return urlhandle_detect_ext(self._request_webpage(  # Server rejects HEAD requests
             url, video_id, note='Determining source extension'))
 
-    def _extract_audio(self, episode, series):
+    @staticmethod
+    def _clean_desc(x):
+        x = clean_html(x)
+        if x:
+            x = x.replace('\xa0', ' ')
+        return x
 
-        def clean_desc(x):
-            x = clean_html(x)
-            if x:
-                x = x.replace('\xa0', ' ')
-            return x
+    def _extract_audio(self, episode, series):
 
         return merge_dicts({
             'vcodec': 'none',
@@ -56,7 +50,7 @@ class GlobalPlayerBaseIE(InfoExtractor):
             'uploader': 'itunesAuthor',  # podcasts only
         }), traverse_obj(episode, {
             'id': 'id',
-            'description': ('description', T(clean_desc)),
+            'description': ('description', T(self._clean_desc)),
             'duration': ('duration', T(parse_duration)),
             'thumbnail': 'imageUrl',
             'url': 'streamUrl',
@@ -141,9 +135,9 @@ class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
             'ext': 'aac',
             # 'live_status': 'is_live',
             'is_live': True,
-            'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d',
+            'description': r're:(?s).+\bclassical\b.+\bClassic FM Hall [oO]f Fame\b',
             'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=',
-            'title': 're:^Classic FM Hall of Fame.+$'
+            'title': 're:Classic FM Hall of Fame.+$'
         },
     }]
 
@@ -160,7 +154,7 @@ class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
             'is_live': True,
         }, traverse_obj(station, {
             'title': 'title',
-            'description': 'description',
+            'description': ('description', T(self._clean_desc)),
             'thumbnail': 'image',
         }), rev=True)
 
@@ -177,7 +171,7 @@ class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
             'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
             'categories': ['Society & Culture', 'True Crime'],
             'uploader': 'Global',
-            'description': 'md5:da5b918eac9ae319454a10a563afacf9',
+            'description': r're:(?s).+\bscam\b.+?\bseries available now\b',
         },
     }, {
         # radio catchup
@@ -203,7 +197,7 @@ class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
                         series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))],
             'categories': traverse_obj(series, ('categories', Ellipsis, 'name')) or None,
         }, traverse_obj(series, {
-            'description': 'description',
+            'description': ('description', T(self._clean_desc)),
             'thumbnail': 'imageUrl',
             'title': 'title',
             'uploader': 'itunesAuthor',  # podcasts only
diff --git a/youtube_dl/extractor/whyp.py b/youtube_dl/extractor/whyp.py
index 16f9154ad..644eb4617 100644
--- a/youtube_dl/extractor/whyp.py
+++ b/youtube_dl/extractor/whyp.py
@@ -21,7 +21,7 @@ class WhypIE(InfoExtractor):
             'url': 'https://cdn.whyp.it/50eb17cc-e9ff-4e18-b89b-dc9206a95cb1.mp3',
             'id': '18337',
             'title': 'Home Page Example Track',
-            'description': 'md5:bd758000fb93f3159339c852b5b9133c',
+            'description': r're:(?s).+\bexample track\b',
             'ext': 'mp3',
             'duration': 52.82,
             'uploader': 'Brad',
@@ -33,29 +33,6 @@ class WhypIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', fatal=True, traverse=('data', 0)):
-        """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
-
-        import functools
-        import json
-        import re
-        from ..utils import (js_to_json, NO_DEFAULT)
-
-        re_ctx = re.escape(context_name)
-        FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
-        js, arg_keys, arg_vals = self._search_regex(
-            (p.format(re_ctx, FUNCTION_RE) for p in (r'<script>\s*window\.{0}={1}\s*\)\s*;?\s*</script>', r'{0}\(.*?{1}')),
-            webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
-            default=NO_DEFAULT if fatal else (None, None, None))
-        if js is None:
-            return {}
-
-        args = dict(zip(arg_keys.split(','), map(json.dumps, self._parse_json(
-            '[{0}]'.format(arg_vals), video_id, transform_source=js_to_json, fatal=fatal) or ())))
-
-        ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
-        return traverse_obj(ret, traverse) or {}
-
     def _real_extract(self, url):
         unique_id = self._match_id(url)
         webpage = self._download_webpage(url, unique_id)

From a190b559640ce1b5fe67e5a4843dc58328503f3c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 19 Jul 2023 13:01:02 +0100
Subject: [PATCH 263/312] [utils] Fix broken Py 3.11+ compat in
 `traverse_obj()`

* inspect.getargspec is missing despite doc claiming backward compat
* replace with emulation of `Signature.bind()`
---
 youtube_dl/utils.py | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 0cbbec0f3..d52fa7a28 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -6109,6 +6109,37 @@ def clean_podcast_url(url):
         )/''', '', url)
 
 
+if __debug__:
+    # Raise TypeError if args can't be bound
+    # needs compat owing to unstable inspect API, thanks PSF :-(
+    try:
+        inspect.signature
+
+        def _try_bind_args(fn, *args, **kwargs):
+            inspect.signature(fn).bind(*args, **kwargs)
+    except AttributeError:
+        # Py < 3.3
+        def _try_bind_args(fn, *args, **kwargs):
+            fn_args = inspect.getargspec(fn)
+            # Py2: ArgInfo(args, varargs, keywords, defaults)
+            # Py3: ArgSpec(args, varargs, keywords, defaults)
+            if not fn_args.keywords:
+                for k in kwargs:
+                    if k not in (fn_args.args or []):
+                        raise TypeError("got an unexpected keyword argument: '{0}'".format(k))
+            if not fn_args.varargs:
+                args_to_bind = len(args)
+                bindable = len(fn_args.args or [])
+                if args_to_bind > bindable:
+                    raise TypeError('too many positional arguments')
+                bindable -= len(fn_args.defaults or [])
+                if args_to_bind < bindable:
+                    if kwargs:
+                        bindable -= len(set(fn_args.args or []) & set(kwargs))
+                    if bindable > args_to_bind:
+                        raise TypeError("missing a required argument: '{0}'".format(fn_args.args[args_to_bind]))
+
+
 def traverse_obj(obj, *paths, **kwargs):
     """
     Safely traverse nested `dict`s and `Iterable`s
@@ -6327,10 +6358,7 @@ def traverse_obj(obj, *paths, **kwargs):
 
             if __debug__ and callable(key):
                 # Verify function signature
-                args = inspect.getargspec(key)
-                if len(args.args) != 2:
-                    # crash differently in 2.6 !
-                    inspect.getcallargs(key, None, None)
+                _try_bind_args(key, None, None)
 
             new_objs = []
             for obj in objs:

From b2ba24bb026904f3503db71f65d2b1627f08edf1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 19 Jul 2023 14:14:50 +0100
Subject: [PATCH 264/312] [InfoExtractor] Add `_match_valid_url()` class method
 and refactor

* API compatible with yt-dlp
* also support Sequence of patterns in _VALID_URL
* one place to compile _VALID_URL
* TODO: remove existing extractor shims
---
 devscripts/make_lazy_extractors.py   | 14 ++++++--
 youtube_dl/extractor/common.py       | 51 +++++++++++++++++++++-------
 youtube_dl/extractor/globalplayer.py |  6 ----
 3 files changed, 49 insertions(+), 22 deletions(-)

diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index a8b6ff1b9..1a841a08b 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -4,6 +4,7 @@ from inspect import getsource
 import io
 import os
 from os.path import dirname as dirn
+import re
 import sys
 
 print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
@@ -29,11 +30,18 @@ from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
 with open('devscripts/lazy_load_template.py', 'rt') as f:
     module_template = f.read()
 
+
+def get_source(m):
+    return re.sub(r'(?m)^\s*#.*\n', '', getsource(m))
+
+
 module_contents = [
-    module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
+    module_template,
+    get_source(InfoExtractor.suitable),
+    get_source(InfoExtractor._match_valid_url) + '\n',
     'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n',
     # needed for suitable() methods of Youtube extractor (see #28780)
-    'from youtube_dl.utils import parse_qs\n',
+    'from youtube_dl.utils import parse_qs, variadic\n',
 ]
 
 ie_template = '''
@@ -66,7 +74,7 @@ def build_lazy_ie(ie, name):
         valid_url=valid_url,
         module=ie.__module__)
     if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
-        s += '\n' + getsource(ie.suitable)
+        s += '\n' + get_source(ie.suitable)
     if hasattr(ie, '_make_valid_url'):
         # search extractors
         s += make_valid_template.format(valid_url=ie._make_valid_url())
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 549781186..7f416d312 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -83,6 +83,7 @@ from ..utils import (
     urljoin,
     url_basename,
     url_or_none,
+    variadic,
     xpath_element,
     xpath_text,
     xpath_with_ns,
@@ -371,9 +372,22 @@ class InfoExtractor(object):
     title, description etc.
 
 
-    Subclasses of this one should re-define the _real_initialize() and
-    _real_extract() methods and define a _VALID_URL regexp.
-    Probably, they should also be added to the list of extractors.
+    A subclass of InfoExtractor must be defined to handle each specific site (or
+    several sites). Such a concrete subclass should be added to the list of
+    extractors. It should also:
+    * define its _VALID_URL attribute as a regexp, or a Sequence of alternative
+      regexps (but see below)
+    * re-define the _real_extract() method
+    * optionally re-define the _real_initialize() method.
+
+    An extractor subclass may also override suitable() if necessary, but the
+    function signature must be preserved and the function must import everything
+    it needs (except other extractors), so that lazy_extractors works correctly.
+    If the subclass's suitable() and _real_extract() functions avoid using
+    _VALID_URL, the subclass need not set that class attribute.
+
+    An abstract subclass of InfoExtractor may be used to simplify implementation
+    within an extractor module; it should not be added to the list of extractors.
 
     _GEO_BYPASS attribute may be set to False in order to disable
     geo restriction bypass mechanisms for a particular extractor.
@@ -408,22 +422,33 @@ class InfoExtractor(object):
         self._x_forwarded_for_ip = None
         self.set_downloader(downloader)
 
+    @classmethod
+    def __match_valid_url(cls, url):
+        # This does not use has/getattr intentionally - we want to know whether
+        # we have cached the regexp for cls, whereas getattr would also
+        # match its superclass
+        if '_VALID_URL_RE' not in cls.__dict__:
+            # _VALID_URL can now be a list/tuple of patterns
+            cls._VALID_URL_RE = tuple(map(re.compile, variadic(cls._VALID_URL)))
+        # 20% faster than next(filter(None, (p.match(url) for p in cls._VALID_URL_RE)), None) in 2.7
+        for p in cls._VALID_URL_RE:
+            p = p.match(url)
+            if p:
+                return p
+
+    # The public alias can safely be overridden, as in some back-ports
+    _match_valid_url = __match_valid_url
+
     @classmethod
     def suitable(cls, url):
         """Receives a URL and returns True if suitable for this IE."""
-
-        # This does not use has/getattr intentionally - we want to know whether
-        # we have cached the regexp for *this* class, whereas getattr would also
-        # match the superclass
-        if '_VALID_URL_RE' not in cls.__dict__:
-            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
-        return cls._VALID_URL_RE.match(url) is not None
+        # This function must import everything it needs (except other extractors),
+        # so that lazy_extractors works correctly
+        return cls.__match_valid_url(url) is not None
 
     @classmethod
     def _match_id(cls, url):
-        if '_VALID_URL_RE' not in cls.__dict__:
-            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
-        m = cls._VALID_URL_RE.match(url)
+        m = cls.__match_valid_url(url)
         assert m
         return compat_str(m.group('id'))
 
diff --git a/youtube_dl/extractor/globalplayer.py b/youtube_dl/extractor/globalplayer.py
index db490b141..ae75dcabf 100644
--- a/youtube_dl/extractor/globalplayer.py
+++ b/youtube_dl/extractor/globalplayer.py
@@ -18,12 +18,6 @@ from ..utils import (
 
 class GlobalPlayerBaseIE(InfoExtractor):
 
-    import re
-
-    @classmethod
-    def _match_valid_url(cls, url):
-        return cls.re.match(cls._VALID_URL, url)
-
     def _get_page_props(self, url, video_id):
         webpage = self._download_webpage(url, video_id)
         return self._search_nextjs_data(webpage, video_id)['props']['pageProps']

From 1fa8b86f0b95f2e1488042ceeda8f356ea2a5448 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Jul 2023 05:29:59 +0100
Subject: [PATCH 265/312] [utils] Remove stray undocumented Host header in
 redirect (fix 46fde7c)

---
 youtube_dl/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index d52fa7a28..6d798f13a 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2996,7 +2996,8 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
         # Technically the Cookie header should be in unredirected_hdrs;
         # however in practice some may set it in normal headers anyway.
         # We will remove it here to prevent any leaks.
-        remove_headers = ['Cookie']
+        # Also remove unwanted and undocumented Host header for old URL
+        remove_headers = ['Cookie', 'Host']
 
         # A 303 must either use GET or HEAD for subsequent request
         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4

From 74eef6bb5e6b88d042aa13caec667aa3df84ba73 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Jul 2023 12:42:46 +0100
Subject: [PATCH 266/312] [workflows/ci.yml] Extend Python versions * add 3.10
 - 3.12 * use https://pypi.org/project/pynose/ for Py >= 3.9 * test Windows
 with 3.4 * set defaults (main, both) except push: (all, core)

---
 .github/workflows/ci.yml | 48 +++++++++++++++++++++++++---------------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c3aabde47..10951d322 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,22 +1,34 @@
 name: CI
 
 env:
-  # add 3.10+ after patching nose (https://github.com/nose-devs/nose/issues/1099)
-  # or switching to fork of https://github.com/mdmintz/pynose
-  all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9
-  main-cpython-versions: 2.7, 3.2, 3.5, 3.9
+  all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 3.10, 3.11, 3.12
+  main-cpython-versions: 2.7, 3.2, 3.5, 3.9, 3.11
   pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7
   cpython-versions: main
-  test-set: both
+  test-set: core
 
 on:
   push:
+    inputs:
+      cpython-versions:
+        type: string
+        default: all
+      test-set:
+        type: string
+        default: core
   pull_request:
+    inputs:
+      cpython-versions:
+        type: string
+        default: main
+      test-set:
+        type: string
+        default: both
   workflow_dispatch:
     inputs:
       cpython-versions:
         type: choice
-        description: CPython versions (main = 2.7, 3.2, 3.5, 3.9)
+        description: CPython versions (main = 2.7, 3.2, 3.5, 3.9, 3.11)
         options:
           - all
           - main
@@ -30,7 +42,7 @@ on:
           - core
           - download
         required: true
-        default: core
+        default: both
 
 permissions:
   contents: read
@@ -44,7 +56,8 @@ jobs:
       test-set: ${{ steps.run.outputs.test-set }}
       own-pip-versions: ${{ steps.run.outputs.own-pip-versions }}
     steps:
-    - id: run
+    - name: Make version array
+      id: run
       run: |
         # Make a JSON Array from comma/space-separated string (no extra escaping)
         json_list() { \
@@ -66,7 +79,6 @@ jobs:
         # versions with a special get-pip.py in a per-version subdirectory
         printf 'own-pip-versions=%s\n' \
           "$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT"
-
   tests:
     name: Run tests
     needs: select
@@ -82,19 +94,18 @@ jobs:
       fail-fast: true
       matrix:
         os: [ubuntu-20.04]
-        # outside steps, use github.env...., not env....
         python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }}
         python-impl: [cpython]
         ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }}
         run-tests-ext: [sh]
         include:
         - os: windows-2019
-          python-version: 3.2
+          python-version: 3.4
           python-impl: cpython
           ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
           run-tests-ext: bat
         - os: windows-2019
-          python-version: 3.2
+          python-version: 3.4
           python-impl: cpython
           ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download'  || 'nodownload' }}
           run-tests-ext: bat
@@ -205,17 +216,14 @@ jobs:
           make install )
         rm -rf $openssl_name
         rmdir $openssl_ssl/certs && ln -s /etc/ssl/certs $openssl_ssl/certs
-
         # Download PyEnv from its GitHub repository.
         export PYENV_ROOT=${{ env.PYENV_ROOT }}
         export PATH=$PYENV_ROOT/bin:$PATH
         git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
-
         # Prevent pyenv build trying (and failing) to update pip
         export GET_PIP=get-pip-2.6.py
         echo 'import sys; sys.exit(0)' > ${GET_PIP}
         GET_PIP=$(realpath $GET_PIP)
-
         # Build and install Python
         export CFLAGS="-I$openssl_inc"
         export LDFLAGS="-L$openssl_lib"
@@ -322,7 +330,12 @@ jobs:
       run: |
         echo "$PATH"
         echo "$PYTHONHOME"
-        $PIP -qq show nose || $PIP install nose
+        # Use PyNose for recent Pythons instead of Nose
+        py3ver="${{ matrix.python-version }}"
+        py3ver=${py3ver#3.}
+        [ "$py3ver" != "${{ matrix.python-version }}" ] && py3ver=${py3ver%.*} || py3ver=0
+        [ "$py3ver" -ge 9 ] && nose=pynose || nose=nose
+        $PIP -qq show $nose || $PIP install $nose
     - name: Install nose for other Python 2
       if: ${{ matrix.python-impl == 'jython' || matrix.python-version == '2.6' }}
       shell: bash
@@ -354,7 +367,7 @@ jobs:
           '    def setUp(self):' \
           '        self.ver = os.environ["PYTHON_VER"].split("-")' \
           '    def test_python_ver(self):' \
-          '        self.assertEqual(sys.version[:3], self.ver[-1])' \
+          '        self.assertEqual(["%d" % v for v in sys.version_info[:2]], self.ver[-1].split(".")[:2])' \
           '        self.assertTrue(sys.version.startswith(self.ver[-1]))' \
           '        self.assertIn(self.ver[0], sys.version.lower())' \
           '    def test_python_impl(self):' \
@@ -370,7 +383,6 @@ jobs:
         PYTHON_IMPL: ${{ matrix.python-impl }}
       run: |
         ./devscripts/run_tests.${{ matrix.run-tests-ext }}
-
   flake8:
     name: Linter
     runs-on: ubuntu-latest

From 2a4e9faa773cce60e82453cb32f13e48513c4a46 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Jul 2023 15:49:46 +0100
Subject: [PATCH 267/312] [doc] Update developer guidance * mention pynose *
 mention traverse_obj and add/revise examples

[skip ci]
---
 README.md | 113 ++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 101 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 14a3d6c86..47e686f84 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.ex
 You can also use pip:
 
     sudo -H pip install --upgrade youtube-dl
-    
+
 This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
 
 macOS users can install youtube-dl with [Homebrew](https://brew.sh/):
@@ -563,7 +563,7 @@ The basic usage is not to set any template arguments when downloading a single f
  - `is_live` (boolean): Whether this video is a live stream or a fixed-length video
  - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
  - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
- - `format` (string): A human-readable description of the format 
+ - `format` (string): A human-readable description of the format
  - `format_id` (string): Format code specified by `--format`
  - `format_note` (string): Additional info about the format
  - `width` (numeric): Width of the video
@@ -675,7 +675,7 @@ The general syntax for format selection is `--format FORMAT` or shorter `-f FORM
 
 **tl;dr:** [navigate me to examples](#format-selection-examples).
 
-The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. 
+The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
 
 You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
 
@@ -760,7 +760,7 @@ Videos can be filtered by their upload date using the options `--date`, `--dateb
 
  - Absolute dates: Dates in the format `YYYYMMDD`.
  - Relative dates: Dates in the format `(now|today)[+-][0-9](day|week|month|year)(s)?`
- 
+
 Examples:
 
 ```bash
@@ -1000,6 +1000,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
     python test/test_download.py
     nosetests
 
+For Python versions 3.6 and later, you can use [pynose](https://pypi.org/project/pynose/) to implement `nosetests`. The original [nose](https://pypi.org/project/nose/) has not been upgraded for 3.10 and later.
+
 See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
 
 If you want to create a build of youtube-dl yourself, you'll need
@@ -1091,7 +1093,7 @@ In any case, thank you very much for your contributions!
 
 ## youtube-dl coding conventions
 
-This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
+This section introduces guidelines for writing idiomatic, robust and future-proof extractor code.
 
 Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
 
@@ -1114,7 +1116,7 @@ Say you have some source dictionary `meta` that you've fetched as JSON with HTTP
 ```python
 meta = self._download_json(url, video_id)
 ```
-    
+
 Assume at this point `meta`'s layout is:
 
 ```python
@@ -1158,7 +1160,7 @@ description = self._search_regex(
 ```
 
 On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present.
- 
+
 ### Provide fallbacks
 
 When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable.
@@ -1206,7 +1208,7 @@ r'(id|ID)=(?P<id>\d+)'
 #### Make regular expressions relaxed and flexible
 
 When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
- 
+
 ##### Example
 
 Say you need to extract `title` from the following HTML code:
@@ -1230,7 +1232,7 @@ title = self._search_regex(
     webpage, 'title', group='title')
 ```
 
-Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute: 
+Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute:
 
 The code definitely should not look like:
 
@@ -1331,27 +1333,114 @@ Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`]
 
 Use `url_or_none` for safe URL processing.
 
-Use `try_get` for safe metadata extraction from parsed JSON.
+Use `traverse_obj` for safe metadata extraction from parsed JSON.
 
-Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. 
+Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
 
 Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
 
 #### More examples
 
 ##### Safely extract optional description from parsed JSON
+
+When processing complex JSON, as often returned by site API requests or stashed in web pages for "hydration", you can use the `traverse_obj()` utility function to handle multiple fallback values and to ensure the expected type of metadata items. The function's docstring defines how the function works: also review usage in the codebase for more examples.
+
+In this example, a text `description`, or `None`, is pulled from the `.result.video[0].summary` member of the parsed JSON `response`, if available.
+
+```python
+description = traverse_obj(response, ('result', 'video', 0, 'summary', T(compat_str)))
+```
+`T(...)` is a shorthand for a set literal; if you hate people who still run Python 2.6, `T(type_or_transformation)` could be written as a set literal `{type_or_transformation}`.
+
+Some extractors use the older and less capable `try_get()` function in the same way.
+
 ```python
 description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
 ```
 
 ##### Safely extract more optional metadata
+
+In this example, various optional metadata values are extracted from the `.result.video[0]` member of the parsed JSON `response`, which is expected to be a JS object, parsed into a `dict`, with no crash if that isn't so, or if any of the target values are missing or invalid.
+
 ```python
-video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
+video = traverse_obj(response, ('result', 'video', 0, T(dict))) or {}
+# formerly:
+# video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
 description = video.get('summary')
 duration = float_or_none(video.get('durationMs'), scale=1000)
 view_count = int_or_none(video.get('views'))
 ```
 
+#### Safely extract nested lists
+
+Suppose you've extracted JSON like this into a Python data structure named `media_json` using, say, the `_download_json()` or `_parse_json()` methods of `InfoExtractor`:
+```json
+{
+    "title": "Example video",
+    "comment": "try extracting this",
+    "media": [{
+        "type": "bad",
+        "size": 320,
+        "url": "https://some.cdn.site/bad.mp4"
+    }, {
+        "type": "streaming",
+        "url": "https://some.cdn.site/hls.m3u8"
+    }, {
+        "type": "super",
+        "size": 1280,
+        "url": "https://some.cdn.site/good.webm"
+    }],
+    "moreStuff": "more values",
+    ...
+}
+```
+
+Then extractor code like this can collect the various fields of the JSON:
+```python
+...
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    T,
+    traverse_obj,
+    txt_or_none,
+    url_or_none,
+)
+...
+        ...
+        info_dict = {}
+        # extract title and description if valid and not empty
+        info_dict.update(traverse_obj(media_json, {
+            'title': ('title', T(txt_or_none)),
+            'description': ('comment', T(txt_or_none)),
+        }))
+
+        # extract any recognisable media formats
+        fmts = []
+        # traverse into "media" list, extract `dict`s with desired keys
+        for fmt in traverse_obj(media_json, ('media', Ellipsis, {
+                'format_id': ('type', T(txt_or_none)),
+                'url': ('url', T(url_or_none)),
+                'width': ('size', T(int_or_none)), })):
+            # bad `fmt` values were `None` and removed
+            if 'url' not in fmt:
+                continue
+            fmt_url = fmt['url']  # known to be valid URL
+            ext = determine_ext(fmt_url)
+            if ext == 'm3u8':
+                fmts.extend(self._extract_m3u8_formats(fmt_url, video_id, 'mp4', fatal=False))
+            else:
+                fmt['ext'] = ext
+                fmts.append(fmt)
+
+        # sort, raise if no formats
+        self._sort_formats(fmts)
+
+        info_dict['formats'] = fmts
+        ...
+```
+The extractor raises an exception rather than random crashes if the JSON structure changes so that no formats are found.
+
 # EMBEDDING YOUTUBE-DL
 
 youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new).

From ca71e56c481c6d5ce69b4756f8f8c0aff97d79b5 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Jul 2023 16:36:54 +0100
Subject: [PATCH 268/312] [workflows/ci.yml] Build 3.12 with pyenv

---
 .github/workflows/ci.yml | 40 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 10951d322..a1e21fd4a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -124,7 +124,7 @@ jobs:
     #-------- Python 3 -----
     - name: Set up supported Python ${{ matrix.python-version }}
       id: setup-python
-      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7'}}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != '3.12'}}
       # wrap broken actions/setup-python@v4
       uses: ytdl-org/setup-python@v1
       with:
@@ -162,6 +162,42 @@ jobs:
             'import sys' \
             'print(sys.path)' \
             | ${expected} -
+    #-------- Python 3.12 -
+    - name: Set up Python 3.12 environment
+      if: ${{ matrix.python-version == '3.12' }}
+      shell: bash
+      run: |
+        PYENV_ROOT=$HOME/.local/share/pyenv
+        echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV"
+    - name: Cache Python 3.12
+      id: cache312
+      if: ${{ matrix.python-version == '3.12' }}
+      uses: actions/cache@v3
+      with:
+        key: python-3.12
+        path: |
+          ${{ env.PYENV_ROOT }}
+    - name: Build and set up Python 3.12
+      if: ${{ matrix.python-version == '3.12' && ! steps.cache312.outputs.cache-hit }}
+      # dl and build locally
+      shell: bash
+      run: |
+        # Install build environment
+        sudo apt-get install -y build-essential llvm libssl-dev tk-dev  \
+                      libncursesw5-dev libreadline-dev libsqlite3-dev   \
+                      libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev
+        # Download PyEnv from its GitHub repository.
+        export PYENV_ROOT=${{ env.PYENV_ROOT }}
+        export PATH=$PYENV_ROOT/bin:$PATH
+        git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
+        pyenv install 3.12.0b4
+    - name: Locate Python 3.12
+      if: ${{ matrix.python-version == '3.12' }}
+      shell: bash
+      run: |
+        PYTHONHOME="${{ env.PYENV_ROOT }}/versions/3.12.0b4"
+        echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV"
+        echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV"
     #-------- Python 2.7 --
     - name: Set up Python 2.7
       if: ${{ matrix.python-version == '2.7' }}
@@ -325,7 +361,7 @@ jobs:
         done
     #-------- nose --------
     - name: Install nose for Python ${{ matrix.python-version }}
-      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }}
+      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' || matrix.python-version == '3.12' }}
       shell: bash
       run: |
         echo "$PATH"

From 7bce2ad441b874e7a1cf8cc81059c5601d832697 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Jul 2023 18:49:48 +0100
Subject: [PATCH 269/312] [build] Fix various Jython CI and test issues

---
 .github/workflows/ci.yml           | 38 +++++++++++++++++++-----------
 devscripts/make_lazy_extractors.py | 11 +++++++++
 test/test_http.py                  | 13 ++++++----
 youtube_dl/compat.py               |  2 +-
 4 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a1e21fd4a..6b91edd6c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -111,10 +111,12 @@ jobs:
           run-tests-ext: bat
         # jython
         - os: ubuntu-20.04
+          python-version: 2.7
           python-impl: jython
           ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
           run-tests-ext: sh
         - os: ubuntu-20.04
+          python-version: 2.7
           python-impl: jython
           ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download'  || 'nodownload' }}
           run-tests-ext: sh
@@ -163,22 +165,22 @@ jobs:
             'print(sys.path)' \
             | ${expected} -
     #-------- Python 3.12 -
-    - name: Set up Python 3.12 environment
-      if: ${{ matrix.python-version == '3.12' }}
+    - name: Set up CPython 3.12 environment
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }}
       shell: bash
       run: |
         PYENV_ROOT=$HOME/.local/share/pyenv
         echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV"
     - name: Cache Python 3.12
       id: cache312
-      if: ${{ matrix.python-version == '3.12' }}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }}
       uses: actions/cache@v3
       with:
         key: python-3.12
         path: |
           ${{ env.PYENV_ROOT }}
     - name: Build and set up Python 3.12
-      if: ${{ matrix.python-version == '3.12' && ! steps.cache312.outputs.cache-hit }}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' && ! steps.cache312.outputs.cache-hit }}
       # dl and build locally
       shell: bash
       run: |
@@ -192,7 +194,7 @@ jobs:
         git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
         pyenv install 3.12.0b4
     - name: Locate Python 3.12
-      if: ${{ matrix.python-version == '3.12' }}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }}
       shell: bash
       run: |
         PYTHONHOME="${{ env.PYENV_ROOT }}/versions/3.12.0b4"
@@ -200,7 +202,7 @@ jobs:
         echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV"
     #-------- Python 2.7 --
     - name: Set up Python 2.7
-      if: ${{ matrix.python-version == '2.7' }}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.7' }}
       # install 2.7
       shell: bash
       run: |
@@ -208,7 +210,7 @@ jobs:
         echo "PYTHONHOME=/usr" >> "$GITHUB_ENV"
     #-------- Python 2.6 --
     - name: Set up Python 2.6 environment
-      if: ${{ matrix.python-version == '2.6' }}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }}
       shell: bash
       run: |
         openssl_name=openssl-1.0.2u
@@ -228,7 +230,7 @@ jobs:
           ${{ env.openssl_dir }}
           ${{ env.PYENV_ROOT }}
     - name: Build and set up Python 2.6
-      if: ${{ matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }}
       # dl and build locally
       shell: bash
       run: |
@@ -266,7 +268,7 @@ jobs:
         export LD_LIBRARY_PATH="$openssl_lib"
         pyenv install 2.6.9
     - name: Locate Python 2.6
-      if: ${{ matrix.python-version == '2.6' }}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }}
       shell: bash
       run: |
         PYTHONHOME="${{ env.PYENV_ROOT }}/versions/2.6.9"
@@ -288,7 +290,7 @@ jobs:
         echo "PIP=pip" >> "$GITHUB_ENV"
     - name: Cache Jython
       id: cachejy
-      if: ${{ matrix.python-impl == 'jython' }}
+      if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' }}
       uses: actions/cache@v3
       with:
         # 2.7.3 now available, may solve SNI issue
@@ -296,7 +298,7 @@ jobs:
         path: |
           ${{ env.JYTHON_ROOT }}
     - name: Install Jython
-      if: ${{ matrix.python-impl == 'jython' && ! steps.cachejy.outputs.cache-hit }}
+      if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' && ! steps.cachejy.outputs.cache-hit }}
       shell: bash
       run: |
         JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
@@ -309,6 +311,11 @@ jobs:
       run: |
         JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
         echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH
+    - name: Install supporting Python 2.7 if possible
+      if: ${{ steps.cachejy.outputs.cache-hit }}
+      shell: bash
+      run: |
+        sudo apt-get install -y python2.7 || true
     #-------- pip ---------
     - name: Set up supported Python ${{ matrix.python-version }} pip
       if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }}
@@ -391,6 +398,11 @@ jobs:
       if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }}
       shell: bash
       run: |
+        # set PYTHON_VER
+        PYTHON_VER=${{ matrix.python-version }}
+        [ "${PYTHON_VER#*-}" != "$PYTHON_VER" ] || PYTHON_VER="${{ matrix.python-impl }}-${PYTHON_VER}"
+        echo "PYTHON_VER=$PYTHON_VER" >> "$GITHUB_ENV"
+        echo "PYTHON_IMPL=${{ matrix.python-impl }}" >> "$GITHUB_ENV"
         # define a test to validate the Python version used by nosetests
         printf '%s\n' \
           'from __future__ import unicode_literals' \
@@ -405,7 +417,7 @@ jobs:
           '    def test_python_ver(self):' \
           '        self.assertEqual(["%d" % v for v in sys.version_info[:2]], self.ver[-1].split(".")[:2])' \
           '        self.assertTrue(sys.version.startswith(self.ver[-1]))' \
-          '        self.assertIn(self.ver[0], sys.version.lower())' \
+          '        self.assertIn(self.ver[0], ",".join((sys.version, platform.python_implementation())).lower())' \
           '    def test_python_impl(self):' \
           '        self.assertIn(platform.python_implementation().lower(), (os.environ["PYTHON_IMPL"], self.ver[0]))' \
           > test/test_python.py
@@ -415,8 +427,6 @@ jobs:
       continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
       env:
         YTDL_TEST_SET: ${{ matrix.ytdl-test-set }}
-        PYTHON_VER: ${{ matrix.python-version }}
-        PYTHON_IMPL: ${{ matrix.python-impl }}
       run: |
         ./devscripts/run_tests.${{ matrix.run-tests-ext }}
   flake8:
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index 1a841a08b..dee9d6d91 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -118,3 +118,14 @@ module_src = '\n'.join(module_contents) + '\n'
 
 with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
     f.write(module_src)
+
+# work around JVM byte code module limit in Jython
+if sys.platform.startswith('java') and sys.version_info[:2] == (2, 7):
+    import subprocess
+    from youtube_dl.compat import compat_subprocess_get_DEVNULL
+    # if Python 2.7 is available, use it to compile the module for Jython
+    try:
+        # if Python 2.7 is available, use it to compile the module for Jython
+        subprocess.check_call(['python2.7', '-m', 'py_compile', lazy_extractors_filename], stdout=compat_subprocess_get_DEVNULL())
+    except Exception:
+        pass
diff --git a/test/test_http.py b/test/test_http.py
index 1a6b2e878..4ec8e13e3 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -45,6 +45,7 @@ from youtube_dl.utils import (
 )
 
 from test.helper import (
+    expectedFailureIf,
     FakeYDL,
     FakeLogger,
     http_server_port,
@@ -243,6 +244,11 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
 
 
 class TestHTTP(unittest.TestCase):
+    # when does it make sense to check the SSL certificate?
+    _check_cert = (
+        sys.version_info >= (3, 2)
+        or (sys.version_info[0] == 2 and sys.version_info[1:] >= (7, 19)))
+
     def setUp(self):
         # HTTP server
         self.http_httpd = compat_http_server.HTTPServer(
@@ -307,10 +313,7 @@ class TestHTTP(unittest.TestCase):
             else self.https_port if scheme == 'https'
             else self.http_port, path)
 
-    @unittest.skipUnless(
-        sys.version_info >= (3, 2)
-        or (sys.version_info[0] == 2 and sys.version_info[1:] >= (7, 9)),
-        'No support for certificate check in SSL')
+    @unittest.skipUnless(_check_cert, 'No support for certificate check in SSL')
     def test_nocheckcertificate(self):
         with FakeYDL({'logger': FakeLogger()}) as ydl:
             with self.assertRaises(compat_urllib_error.URLError):
@@ -376,6 +379,8 @@ class TestHTTP(unittest.TestCase):
                 with self.assertRaises(compat_urllib_HTTPError):
                     do_req(code, 'GET')
 
+    # Jython 2.7.1 times out for some reason
+    @expectedFailureIf(sys.platform.startswith('java') and sys.version_info < (2, 7, 2))
     def test_content_type(self):
         # https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28
         with FakeYDL({'nocheckcertificate': True}) as ydl:
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 1d784d90f..da6d70ec4 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -131,7 +131,7 @@ if sys.version_info[0] == 2 or sys.version_info < (3, 3):
         def load(self, rawdata):
             must_have_value = 0
             if not isinstance(rawdata, dict):
-                if sys.version_info[:2] != (2, 7):
+                if sys.version_info[:2] != (2, 7) or sys.platform.startswith('java'):
                     # attribute must have value for parsing
                     rawdata, must_have_value = re.subn(
                         r'(?i)(;\s*)(secure|httponly)(\s*(?:;|$))', r'\1\2=\2\3', rawdata)

From 44faa71b19c866b836e4433ddd3e4722ac6d282f Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 20 Jul 2023 19:32:29 +0100
Subject: [PATCH 270/312] [test/test_execution.py] Use
 `compat_subprocess_get_DEVNULL()`

---
 test/test_execution.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/test/test_execution.py b/test/test_execution.py
index ae59e562a..56e1b679d 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -10,17 +10,14 @@ import os
 import subprocess
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from youtube_dl.compat import compat_register_utf8
+from youtube_dl.compat import compat_register_utf8, compat_subprocess_get_DEVNULL
 from youtube_dl.utils import encodeArgument
 
 compat_register_utf8()
 
 rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
-try:
-    _DEV_NULL = subprocess.DEVNULL
-except AttributeError:
-    _DEV_NULL = open(os.devnull, 'wb')
+_DEV_NULL = compat_subprocess_get_DEVNULL()
 
 
 class TestExecution(unittest.TestCase):

From 2b7dd3b2a2d7c6e228a42d1000a6f3296739ff1c Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 24 Jul 2023 03:30:28 +0100
Subject: [PATCH 271/312] [utils] Fix update_Request() with empty data (not
 None)

---
 test/test_http.py   | 13 +++++++++++++
 youtube_dl/utils.py |  7 +++----
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/test/test_http.py b/test/test_http.py
index 4ec8e13e3..89580969d 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -41,6 +41,7 @@ from youtube_dl.compat import (
 
 from youtube_dl.utils import (
     sanitized_Request,
+    update_Request,
     urlencode_postdata,
 )
 
@@ -395,6 +396,18 @@ class TestHTTP(unittest.TestCase):
             headers = ydl.urlopen(r).read().decode('utf-8')
             self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
 
+    def test_update_req(self):
+        req = sanitized_Request('http://example.com')
+        assert req.data is None
+        assert req.get_method() == 'GET'
+        assert not req.has_header('Content-Type')
+        # Test that zero-byte payloads will be sent
+        req = update_Request(req, data=b'')
+        assert req.data == b''
+        assert req.get_method() == 'POST'
+        # yt-dl expects data to be encoded and Content-Type to be added by sender
+        # assert req.get_header('Content-Type') == 'application/x-www-form-urlencoded'
+
     def test_cookiejar(self):
         with FakeYDL() as ydl:
             ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 6d798f13a..b5475434f 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2996,8 +2996,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
         # Technically the Cookie header should be in unredirected_hdrs;
         # however in practice some may set it in normal headers anyway.
         # We will remove it here to prevent any leaks.
-        # Also remove unwanted and undocumented Host header for old URL
-        remove_headers = ['Cookie', 'Host']
+        remove_headers = ['Cookie']
 
         # A 303 must either use GET or HEAD for subsequent request
         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
@@ -3016,7 +3015,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
             remove_headers.extend(['Content-Length', 'Content-Type'])
 
         # NB: don't use dict comprehension for python 2.6 compatibility
-        new_headers = dict((k, v) for k, v in req.header_items()
+        new_headers = dict((k, v) for k, v in req.headers.items()
                            if k.title() not in remove_headers)
 
         return compat_urllib_request.Request(
@@ -4187,7 +4186,7 @@ def update_url_query(url, query):
 def update_Request(req, url=None, data=None, headers={}, query={}):
     req_headers = req.headers.copy()
     req_headers.update(headers)
-    req_data = data or req.data
+    req_data = data if data is not None else req.data
     req_url = update_url_query(url or req.get_full_url(), query)
     req_get_method = req.get_method()
     if req_get_method == 'HEAD':

From aac33155e40af3da96a2467dd05faea201815989 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 24 Jul 2023 23:43:36 +0100
Subject: [PATCH 272/312] [build] Add and use `devscripts/utils`

---
 devscripts/__init__.py             |  1 +
 devscripts/make_lazy_extractors.py | 22 +++++------
 devscripts/utils.py                | 62 ++++++++++++++++++++++++++++++
 test/test_execution.py             | 10 +++--
 4 files changed, 80 insertions(+), 15 deletions(-)
 create mode 100644 devscripts/__init__.py
 create mode 100644 devscripts/utils.py

diff --git a/devscripts/__init__.py b/devscripts/__init__.py
new file mode 100644
index 000000000..750dbdca7
--- /dev/null
+++ b/devscripts/__init__.py
@@ -0,0 +1 @@
+# Empty file needed to make devscripts.utils properly importable from outside
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index dee9d6d91..5b8b123a4 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -1,7 +1,6 @@
 from __future__ import unicode_literals, print_function
 
 from inspect import getsource
-import io
 import os
 from os.path import dirname as dirn
 import re
@@ -9,17 +8,20 @@ import sys
 
 print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
 
-sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
 
 lazy_extractors_filename = sys.argv[1]
 if os.path.exists(lazy_extractors_filename):
     os.remove(lazy_extractors_filename)
 # Py2: may be confused by leftover lazy_extractors.pyc
-try:
-    os.remove(lazy_extractors_filename + 'c')
-except OSError:
-    pass
+if sys.version_info[0] < 3:
+    for c in ('c', 'o'):
+        try:
+            os.remove(lazy_extractors_filename + 'c')
+        except OSError:
+            pass
 
+from devscripts.utils import read_file, write_file
 from youtube_dl.compat import compat_register_utf8
 
 compat_register_utf8()
@@ -27,8 +29,7 @@ compat_register_utf8()
 from youtube_dl.extractor import _ALL_CLASSES
 from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
 
-with open('devscripts/lazy_load_template.py', 'rt') as f:
-    module_template = f.read()
+module_template = read_file('devscripts/lazy_load_template.py')
 
 
 def get_source(m):
@@ -114,10 +115,9 @@ for ie in ordered_cls:
 module_contents.append(
     '_ALL_CLASSES = [{0}]'.format(', '.join(names)))
 
-module_src = '\n'.join(module_contents) + '\n'
+module_src = '\n'.join(module_contents)
 
-with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
-    f.write(module_src)
+write_file(lazy_extractors_filename, module_src + '\n')
 
 # work around JVM byte code module limit in Jython
 if sys.platform.startswith('java') and sys.version_info[:2] == (2, 7):
diff --git a/devscripts/utils.py b/devscripts/utils.py
new file mode 100644
index 000000000..2d072d2e0
--- /dev/null
+++ b/devscripts/utils.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import argparse
+import functools
+import os.path
+import subprocess
+import sys
+
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
+from youtube_dl.compat import (
+    compat_kwargs,
+    compat_open as open,
+)
+
+
+def read_file(fname):
+    with open(fname, encoding='utf-8') as f:
+        return f.read()
+
+
+def write_file(fname, content, mode='w'):
+    with open(fname, mode, encoding='utf-8') as f:
+        return f.write(content)
+
+
+def read_version(fname='youtube_dl/version.py'):
+    """Get the version without importing the package"""
+    exec(compile(read_file(fname), fname, 'exec'))
+    return locals()['__version__']
+
+
+def get_filename_args(has_infile=False, default_outfile=None):
+    parser = argparse.ArgumentParser()
+    if has_infile:
+        parser.add_argument('infile', help='Input file')
+    kwargs = {'nargs': '?', 'default': default_outfile} if default_outfile else {}
+    kwargs['help'] = 'Output file'
+    parser.add_argument('outfile', **compat_kwargs(kwargs))
+
+    opts = parser.parse_args()
+    if has_infile:
+        return opts.infile, opts.outfile
+    return opts.outfile
+
+
+def compose_functions(*functions):
+    return lambda x: functools.reduce(lambda y, f: f(y), functions, x)
+
+
+def run_process(*args, **kwargs):
+    kwargs.setdefault('text', True)
+    kwargs.setdefault('check', True)
+    kwargs.setdefault('capture_output', True)
+    if kwargs['text']:
+        kwargs.setdefault('encoding', 'utf-8')
+        kwargs.setdefault('errors', 'replace')
+        kwargs = compat_kwargs(kwargs)
+    return subprocess.run(args, **kwargs)
diff --git a/test/test_execution.py b/test/test_execution.py
index 56e1b679d..9daaafa6c 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -8,14 +8,16 @@ import unittest
 import sys
 import os
 import subprocess
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+sys.path.insert(0, rootDir)
 
 from youtube_dl.compat import compat_register_utf8, compat_subprocess_get_DEVNULL
 from youtube_dl.utils import encodeArgument
 
 compat_register_utf8()
 
-rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
 _DEV_NULL = compat_subprocess_get_DEVNULL()
 
@@ -49,10 +51,10 @@ class TestExecution(unittest.TestCase):
             subprocess.check_call([sys.executable, os.path.normpath('devscripts/make_lazy_extractors.py'), lazy_extractors], cwd=rootDir, stdout=_DEV_NULL)
             subprocess.check_call([sys.executable, os.path.normpath('test/test_all_urls.py')], cwd=rootDir, stdout=_DEV_NULL)
         finally:
-            for x in ['', 'c'] if sys.version_info[0] < 3 else ['']:
+            for x in ('', 'c') if sys.version_info[0] < 3 else ('',):
                 try:
                     os.remove(lazy_extractors + x)
-                except (IOError, OSError):
+                except OSError:
                     pass
 
 

From a25e9f3c84a34d43f78a4e5a6f6c2e98e2a0ade3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 25 Jul 2023 00:17:15 +0100
Subject: [PATCH 273/312] [compat] Use `compat_open()`

---
 devscripts/make_readme.py                  |  4 +++-
 test/helper.py                             |  6 +++---
 test/test_InfoExtractor.py                 | 18 +++++++++---------
 test/test_YoutubeDL.py                     |  7 ++++---
 test/test_download.py                      |  6 +++---
 test/test_swfinterp.py                     | 10 ++++++----
 test/test_unicode_literals.py              | 12 +++++++-----
 test/test_write_annotations.py             |  5 ++---
 test/test_youtube_signature.py             |  9 ++++++---
 youtube_dl/YoutubeDL.py                    | 19 +++++++------------
 youtube_dl/cache.py                        |  8 +++++---
 youtube_dl/extractor/common.py             |  1 +
 youtube_dl/extractor/openload.py           |  1 +
 youtube_dl/postprocessor/embedthumbnail.py |  2 ++
 youtube_dl/postprocessor/ffmpeg.py         |  8 ++++----
 youtube_dl/update.py                       |  7 +++++--
 16 files changed, 68 insertions(+), 55 deletions(-)

diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py
index 8fbce0796..c5d5dd4f1 100755
--- a/devscripts/make_readme.py
+++ b/devscripts/make_readme.py
@@ -4,6 +4,8 @@ import io
 import sys
 import re
 
+from youtube_dl.compat import compat_open as open
+
 README_FILE = 'README.md'
 helptext = sys.stdin.read()
 
@@ -20,7 +22,7 @@ options = helptext[helptext.index('  General Options:') + 19:]
 options = re.sub(r'(?m)^  (\w.+)$', r'## \1', options)
 options = '# OPTIONS\n' + options + '\n'
 
-with io.open(README_FILE, 'w', encoding='utf-8') as f:
+with open(README_FILE, 'w', encoding='utf-8') as f:
     f.write(header)
     f.write(options)
     f.write(footer)
diff --git a/test/helper.py b/test/helper.py
index aa99001b2..fc55c6b46 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -1,7 +1,6 @@
 from __future__ import unicode_literals
 
 import errno
-import io
 import hashlib
 import json
 import os.path
@@ -14,6 +13,7 @@ import unittest
 import youtube_dl.extractor
 from youtube_dl import YoutubeDL
 from youtube_dl.compat import (
+    compat_open as open,
     compat_os_name,
     compat_str,
 )
@@ -29,10 +29,10 @@ def get_params(override=None):
                                    "parameters.json")
     LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                          "local_parameters.json")
-    with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
+    with open(PARAMETERS_FILE, encoding='utf-8') as pf:
         parameters = json.load(pf)
     if os.path.exists(LOCAL_PARAMETERS_FILE):
-        with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
+        with open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
             parameters.update(json.load(pf))
     if override:
         parameters.update(override)
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 34773fbd0..3f96645de 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -3,7 +3,6 @@
 from __future__ import unicode_literals
 
 # Allow direct execution
-import io
 import os
 import sys
 import unittest
@@ -21,6 +20,7 @@ from test.helper import (
 from youtube_dl.compat import (
     compat_etree_fromstring,
     compat_http_server,
+    compat_open as open,
 )
 from youtube_dl.extractor.common import InfoExtractor
 from youtube_dl.extractor import (
@@ -902,8 +902,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
         ]
 
         for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
-            with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
-                         mode='r', encoding='utf-8') as f:
+            with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
+                      mode='r', encoding='utf-8') as f:
                 formats = self.ie._parse_m3u8_formats(
                     f.read(), m3u8_url, ext='mp4')
                 self.ie._sort_formats(formats)
@@ -1127,8 +1127,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
         ]
 
         for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES:
-            with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
-                         mode='r', encoding='utf-8') as f:
+            with open('./test/testdata/mpd/%s.mpd' % mpd_file,
+                      mode='r', encoding='utf-8') as f:
                 formats = self.ie._parse_mpd_formats(
                     compat_etree_fromstring(f.read().encode('utf-8')),
                     mpd_base_url=mpd_base_url, mpd_url=mpd_url)
@@ -1154,8 +1154,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
         ]
 
         for f4m_file, f4m_url, expected_formats in _TEST_CASES:
-            with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
-                         mode='r', encoding='utf-8') as f:
+            with open('./test/testdata/f4m/%s.f4m' % f4m_file,
+                      mode='r', encoding='utf-8') as f:
                 formats = self.ie._parse_f4m_formats(
                     compat_etree_fromstring(f.read().encode('utf-8')),
                     f4m_url, None)
@@ -1202,8 +1202,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
         ]
 
         for xspf_file, xspf_url, expected_entries in _TEST_CASES:
-            with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
-                         mode='r', encoding='utf-8') as f:
+            with open('./test/testdata/xspf/%s.xspf' % xspf_file,
+                      mode='r', encoding='utf-8') as f:
                 entries = self.ie._parse_xspf(
                     compat_etree_fromstring(f.read().encode('utf-8')),
                     xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 6cf555827..d994682b2 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -22,6 +22,7 @@ from youtube_dl.compat import (
     compat_http_cookiejar_Cookie,
     compat_http_cookies_SimpleCookie,
     compat_kwargs,
+    compat_open as open,
     compat_str,
     compat_urllib_error,
 )
@@ -701,12 +702,12 @@ class TestYoutubeDL(unittest.TestCase):
 
         class SimplePP(PostProcessor):
             def run(self, info):
-                with open(audiofile, 'wt') as f:
+                with open(audiofile, 'w') as f:
                     f.write('EXAMPLE')
                 return [info['filepath']], info
 
         def run_pp(params, PP):
-            with open(filename, 'wt') as f:
+            with open(filename, 'w') as f:
                 f.write('EXAMPLE')
             ydl = YoutubeDL(params)
             ydl.add_post_processor(PP())
@@ -725,7 +726,7 @@ class TestYoutubeDL(unittest.TestCase):
 
         class ModifierPP(PostProcessor):
             def run(self, info):
-                with open(info['filepath'], 'wt') as f:
+                with open(info['filepath'], 'w') as f:
                     f.write('MODIFIED')
                 return [], info
 
diff --git a/test/test_download.py b/test/test_download.py
index d50008307..e0bc8cb95 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -20,15 +20,15 @@ from test.helper import (
 
 
 import hashlib
-import io
 import json
 import socket
 
 import youtube_dl.YoutubeDL
 from youtube_dl.compat import (
     compat_http_client,
-    compat_urllib_error,
     compat_HTTPError,
+    compat_open as open,
+    compat_urllib_error,
 )
 from youtube_dl.utils import (
     DownloadError,
@@ -245,7 +245,7 @@ def generator(test_case, tname):
                 self.assertTrue(
                     os.path.exists(info_json_fn),
                     'Missing info file %s' % info_json_fn)
-                with io.open(info_json_fn, encoding='utf-8') as infof:
+                with open(info_json_fn, encoding='utf-8') as infof:
                     info_dict = json.load(infof)
                 expect_info_dict(self, info_dict, tc.get('info_dict', {}))
         finally:
diff --git a/test/test_swfinterp.py b/test/test_swfinterp.py
index 9f18055e6..7c282ee00 100644
--- a/test/test_swfinterp.py
+++ b/test/test_swfinterp.py
@@ -5,16 +5,18 @@ from __future__ import unicode_literals
 import os
 import sys
 import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
 
 import errno
-import io
 import json
 import re
 import subprocess
 
 from youtube_dl.swfinterp import SWFInterpreter
+from youtube_dl.compat import compat_open as open
 
 
 TEST_DIR = os.path.join(
@@ -43,7 +45,7 @@ def _make_testfunc(testfile):
                     '-static-link-runtime-shared-libraries', as_file])
             except OSError as ose:
                 if ose.errno == errno.ENOENT:
-                    print('mxmlc not found! Skipping test.')
+                    self.skipTest('mxmlc not found!')
                     return
                 raise
 
@@ -51,7 +53,7 @@ def _make_testfunc(testfile):
             swf_content = swf_f.read()
         swfi = SWFInterpreter(swf_content)
 
-        with io.open(as_file, 'r', encoding='utf-8') as as_f:
+        with open(as_file, 'r', encoding='utf-8') as as_f:
             as_content = as_f.read()
 
         def _find_spec(key):
diff --git a/test/test_unicode_literals.py b/test/test_unicode_literals.py
index c7c2252f5..0c83f2a0c 100644
--- a/test/test_unicode_literals.py
+++ b/test/test_unicode_literals.py
@@ -2,14 +2,15 @@ from __future__ import unicode_literals
 
 # Allow direct execution
 import os
+import re
 import sys
 import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-import io
-import re
+dirn = os.path.dirname
 
-rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+rootDir = dirn(dirn(os.path.abspath(__file__)))
+
+sys.path.insert(0, rootDir)
 
 IGNORED_FILES = [
     'setup.py',  # http://bugs.python.org/issue13943
@@ -24,6 +25,7 @@ IGNORED_DIRS = [
 ]
 
 from test.helper import assertRegexpMatches
+from youtube_dl.compat import compat_open as open
 
 
 class TestUnicodeLiterals(unittest.TestCase):
@@ -41,7 +43,7 @@ class TestUnicodeLiterals(unittest.TestCase):
                     continue
 
                 fn = os.path.join(dirpath, basename)
-                with io.open(fn, encoding='utf-8') as inf:
+                with open(fn, encoding='utf-8') as inf:
                     code = inf.read()
 
                 if "'" not in code and '"' not in code:
diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py
index 41abdfe3b..68e0a391d 100644
--- a/test/test_write_annotations.py
+++ b/test/test_write_annotations.py
@@ -11,12 +11,11 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from test.helper import get_params, try_rm
 
 
-import io
-
 import xml.etree.ElementTree
 
 import youtube_dl.YoutubeDL
 import youtube_dl.extractor
+from youtube_dl.compat import compat_open as open
 
 
 class YoutubeDL(youtube_dl.YoutubeDL):
@@ -51,7 +50,7 @@ class TestAnnotations(unittest.TestCase):
         ydl.download([TEST_ID])
         self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
         annoxml = None
-        with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
+        with open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
             annoxml = xml.etree.ElementTree.parse(annof)
         self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
         root = annoxml.getroot()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 5dcabaf95..f45dfec7c 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -8,11 +8,14 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-import io
 import re
 import string
 
-from youtube_dl.compat import compat_str, compat_urlretrieve
+from youtube_dl.compat import (
+    compat_open as open,
+    compat_str,
+    compat_urlretrieve,
+)
 
 from test.helper import FakeYDL
 from youtube_dl.extractor import YoutubeIE
@@ -208,7 +211,7 @@ def t_factory(name, sig_func, url_pattern):
 
             if not os.path.exists(fn):
                 compat_urlretrieve(url, fn)
-            with io.open(fn, encoding='utf-8') as testf:
+            with open(fn, encoding='utf-8') as testf:
                 jscode = testf.read()
             self.assertEqual(sig_func(jscode, sig_input), expected_sig)
 
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 98d080f43..6a12f91e4 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -4,11 +4,9 @@
 from __future__ import absolute_import, unicode_literals
 
 import collections
-import contextlib
 import copy
 import datetime
 import errno
-import fileinput
 import io
 import itertools
 import json
@@ -45,6 +43,7 @@ from .compat import (
     compat_kwargs,
     compat_map as map,
     compat_numeric_types,
+    compat_open as open,
     compat_os_name,
     compat_str,
     compat_tokenize_tokenize,
@@ -1977,7 +1976,7 @@ class YoutubeDL(object):
             else:
                 try:
                     self.to_screen('[info] Writing video description to: ' + descfn)
-                    with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+                    with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
                         descfile.write(info_dict['description'])
                 except (OSError, IOError):
                     self.report_error('Cannot write description file ' + descfn)
@@ -1992,7 +1991,7 @@ class YoutubeDL(object):
             else:
                 try:
                     self.to_screen('[info] Writing video annotations to: ' + annofn)
-                    with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
+                    with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
                         annofile.write(info_dict['annotations'])
                 except (KeyError, TypeError):
                     self.report_warning('There are no annotations to write.')
@@ -2019,7 +2018,7 @@ class YoutubeDL(object):
                         try:
                             # Use newline='' to prevent conversion of newline characters
                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
-                            with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
+                            with open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
                                 subfile.write(sub_info['data'])
                         except (OSError, IOError):
                             self.report_error('Cannot write subtitles file ' + sub_filename)
@@ -2028,7 +2027,7 @@ class YoutubeDL(object):
                         try:
                             sub_data = ie._request_webpage(
                                 sub_info['url'], info_dict['id'], note=False).read()
-                            with io.open(encodeFilename(sub_filename), 'wb') as subfile:
+                            with open(encodeFilename(sub_filename), 'wb') as subfile:
                                 subfile.write(sub_data)
                         except (ExtractorError, IOError, OSError, ValueError) as err:
                             self.report_warning('Unable to download subtitle for "%s": %s' %
@@ -2232,12 +2231,8 @@ class YoutubeDL(object):
         return self._download_retcode
 
     def download_with_info_file(self, info_filename):
-        with contextlib.closing(fileinput.FileInput(
-                [info_filename], mode='r',
-                openhook=fileinput.hook_encoded('utf-8'))) as f:
-            # FileInput doesn't have a read method, we can't call json.load
-            # TODO: let's use io.open(), then
-            info = self.filter_requested_info(json.loads('\n'.join(f)))
+        with open(info_filename, encoding='utf-8') as f:
+            info = self.filter_requested_info(json.load(f))
         try:
             self.process_ie_result(info, download=True)
         except DownloadError:
diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py
index 4822439d0..54123da0e 100644
--- a/youtube_dl/cache.py
+++ b/youtube_dl/cache.py
@@ -1,14 +1,16 @@
 from __future__ import unicode_literals
 
 import errno
-import io
 import json
 import os
 import re
 import shutil
 import traceback
 
-from .compat import compat_getenv
+from .compat import (
+    compat_getenv,
+    compat_open as open,
+)
 from .utils import (
     error_to_compat_str,
     expand_path,
@@ -83,7 +85,7 @@ class Cache(object):
         cache_fn = self._get_cache_fn(section, key, dtype)
         try:
             try:
-                with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
+                with open(cache_fn, 'r', encoding='utf-8') as cachef:
                     return self._validate(json.load(cachef), min_ver)
             except ValueError:
                 try:
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 7f416d312..0eca9f844 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -25,6 +25,7 @@ from ..compat import (
     compat_integer_types,
     compat_http_client,
     compat_map as map,
+    compat_open as open,
     compat_os_name,
     compat_str,
     compat_urllib_error,
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py
index b05d60435..45b1add73 100644
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@@ -7,6 +7,7 @@ import subprocess
 import tempfile
 
 from ..compat import (
+    compat_open as open,
     compat_urlparse,
     compat_kwargs,
 )
diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py
index 5e7b6e2df..b6c60e127 100644
--- a/youtube_dl/postprocessor/embedthumbnail.py
+++ b/youtube_dl/postprocessor/embedthumbnail.py
@@ -18,6 +18,8 @@ from ..utils import (
     shell_quote,
 )
 
+from ..compat import compat_open as open
+
 
 class EmbedThumbnailPPError(PostProcessingError):
     pass
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 8c29c8d59..801160e6c 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
-import io
 import os
 import subprocess
 import time
@@ -9,6 +8,7 @@ import re
 
 from .common import AudioConversionError, PostProcessor
 
+from ..compat import compat_open as open
 from ..utils import (
     encodeArgument,
     encodeFilename,
@@ -493,7 +493,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
         chapters = info.get('chapters', [])
         if chapters:
             metadata_filename = replace_extension(filename, 'meta')
-            with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
+            with open(metadata_filename, 'w', encoding='utf-8') as f:
                 def ffmpeg_escape(text):
                     return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
 
@@ -636,7 +636,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
                 with open(dfxp_file, 'rb') as f:
                     srt_data = dfxp2srt(f.read())
 
-                with io.open(srt_file, 'wt', encoding='utf-8') as f:
+                with open(srt_file, 'w', encoding='utf-8') as f:
                     f.write(srt_data)
                 old_file = srt_file
 
@@ -652,7 +652,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
 
             self.run_ffmpeg(old_file, new_file, ['-f', new_format])
 
-            with io.open(new_file, 'rt', encoding='utf-8') as f:
+            with open(new_file, 'r', encoding='utf-8') as f:
                 subs[lang] = {
                     'ext': new_ext,
                     'data': f.read(),
diff --git a/youtube_dl/update.py b/youtube_dl/update.py
index 84c964617..b5f26e4a9 100644
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -9,7 +9,10 @@ import subprocess
 import sys
 from zipimport import zipimporter
 
-from .compat import compat_realpath
+from .compat import (
+    compat_open as open,
+    compat_realpath,
+)
 from .utils import encode_compat_str
 
 from .version import __version__
@@ -127,7 +130,7 @@ def update_self(to_screen, verbose, opener):
 
         try:
             bat = os.path.join(directory, 'youtube-dl-updater.bat')
-            with io.open(bat, 'w') as batfile:
+            with open(bat, 'w') as batfile:
                 batfile.write('''
 @echo off
 echo Waiting for file handle to be closed ...

From b87018122995acb7e6a1be3f2464605259b93611 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 25 Jul 2023 00:22:54 +0100
Subject: [PATCH 274/312] [build] Extend use of `devscripts/utils`

---
 devscripts/bash-completion.py            | 11 +++++++----
 devscripts/create-github-release.py      |  9 +++++----
 devscripts/fish-completion.py            | 11 ++++++-----
 devscripts/gh-pages/add-version.py       | 15 ++++++++++-----
 devscripts/gh-pages/generate-download.py | 17 ++++++++++++-----
 devscripts/gh-pages/update-copyright.py  | 17 +++++++++++------
 devscripts/gh-pages/update-feed.py       | 11 ++++++++---
 devscripts/gh-pages/update-sites.py      | 11 ++++++-----
 devscripts/make_contributing.py          |  9 ++++-----
 devscripts/make_issue_template.py        | 17 +++++++----------
 devscripts/make_readme.py                | 11 +++++++----
 devscripts/make_supportedsites.py        | 15 ++++++++-------
 devscripts/prepare_manpage.py            | 10 ++++------
 devscripts/zsh-completion.py             |  8 ++++----
 youtube_dl/update.py                     |  1 -
 15 files changed, 99 insertions(+), 74 deletions(-)

diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py
index 3d1391334..7db396a77 100755
--- a/devscripts/bash-completion.py
+++ b/devscripts/bash-completion.py
@@ -5,8 +5,12 @@ import os
 from os.path import dirname as dirn
 import sys
 
-sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
 import youtube_dl
+from youtube_dl.compat import compat_open as open
+
+from utils import read_file
 
 BASH_COMPLETION_FILE = "youtube-dl.bash-completion"
 BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
@@ -18,9 +22,8 @@ def build_completion(opt_parser):
         for option in group.option_list:
             # for every long flag
             opts_flag.append(option.get_opt_string())
-    with open(BASH_COMPLETION_TEMPLATE) as f:
-        template = f.read()
-    with open(BASH_COMPLETION_FILE, "w") as f:
+    template = read_file(BASH_COMPLETION_TEMPLATE)
+    with open(BASH_COMPLETION_FILE, "w", encoding='utf-8') as f:
         # just using the special char
         filled_template = template.replace("{{flags}}", " ".join(opts_flag))
         f.write(filled_template)
diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py
index 2ddfa1096..320bcfc27 100644
--- a/devscripts/create-github-release.py
+++ b/devscripts/create-github-release.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 from __future__ import unicode_literals
 
-import io
 import json
 import mimetypes
 import netrc
@@ -10,7 +9,9 @@ import os
 import re
 import sys
 
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
 
 from youtube_dl.compat import (
     compat_basestring,
@@ -22,6 +23,7 @@ from youtube_dl.utils import (
     make_HTTPS_handler,
     sanitized_Request,
 )
+from utils import read_file
 
 
 class GitHubReleaser(object):
@@ -89,8 +91,7 @@ def main():
 
     changelog_file, version, build_path = args
 
-    with io.open(changelog_file, encoding='utf-8') as inf:
-        changelog = inf.read()
+    changelog = read_file(changelog_file)
 
     mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog)
     body = mobj.group(1) if mobj else ''
diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py
index 51d19dd33..267ba6a58 100755
--- a/devscripts/fish-completion.py
+++ b/devscripts/fish-completion.py
@@ -6,10 +6,13 @@ import os
 from os.path import dirname as dirn
 import sys
 
-sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
 import youtube_dl
 from youtube_dl.utils import shell_quote
 
+from utils import read_file, write_file
+
 FISH_COMPLETION_FILE = 'youtube-dl.fish'
 FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
 
@@ -38,11 +41,9 @@ def build_completion(opt_parser):
             complete_cmd.extend(EXTRA_ARGS.get(long_option, []))
             commands.append(shell_quote(complete_cmd))
 
-    with open(FISH_COMPLETION_TEMPLATE) as f:
-        template = f.read()
+    template = read_file(FISH_COMPLETION_TEMPLATE)
     filled_template = template.replace('{{commands}}', '\n'.join(commands))
-    with open(FISH_COMPLETION_FILE, 'w') as f:
-        f.write(filled_template)
+    write_file(filled_template)
 
 
 parser = youtube_dl.parseOpts()[0]
diff --git a/devscripts/gh-pages/add-version.py b/devscripts/gh-pages/add-version.py
index 867ea0048..b84908f85 100755
--- a/devscripts/gh-pages/add-version.py
+++ b/devscripts/gh-pages/add-version.py
@@ -6,16 +6,21 @@ import sys
 import hashlib
 import os.path
 
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__)))))
+
+from devscripts.utils import read_file, write_file
+from youtube_dl.compat import compat_open as open
 
 if len(sys.argv) <= 1:
     print('Specify the version number as parameter')
     sys.exit()
 version = sys.argv[1]
 
-with open('update/LATEST_VERSION', 'w') as f:
-    f.write(version)
+write_file('update/LATEST_VERSION', version)
 
-versions_info = json.load(open('update/versions.json'))
+versions_info = json.loads(read_file('update/versions.json'))
 if 'signature' in versions_info:
     del versions_info['signature']
 
@@ -39,5 +44,5 @@ for key, filename in filenames.items():
 versions_info['versions'][version] = new_version
 versions_info['latest'] = version
 
-with open('update/versions.json', 'w') as jsonf:
-    json.dump(versions_info, jsonf, indent=4, sort_keys=True)
+with open('update/versions.json', 'w', encoding='utf-8') as jsonf:
+    json.dumps(versions_info, jsonf, indent=4, sort_keys=True)
diff --git a/devscripts/gh-pages/generate-download.py b/devscripts/gh-pages/generate-download.py
index a873d32ee..3e38e9299 100755
--- a/devscripts/gh-pages/generate-download.py
+++ b/devscripts/gh-pages/generate-download.py
@@ -2,14 +2,21 @@
 from __future__ import unicode_literals
 
 import json
+import os.path
+import sys
 
-versions_info = json.load(open('update/versions.json'))
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+
+from utils import read_file, write_file
+
+versions_info = json.loads(read_file('update/versions.json'))
 version = versions_info['latest']
 version_dict = versions_info['versions'][version]
 
 # Read template page
-with open('download.html.in', 'r', encoding='utf-8') as tmplf:
-    template = tmplf.read()
+template = read_file('download.html.in')
 
 template = template.replace('@PROGRAM_VERSION@', version)
 template = template.replace('@PROGRAM_URL@', version_dict['bin'][0])
@@ -18,5 +25,5 @@ template = template.replace('@EXE_URL@', version_dict['exe'][0])
 template = template.replace('@EXE_SHA256SUM@', version_dict['exe'][1])
 template = template.replace('@TAR_URL@', version_dict['tar'][0])
 template = template.replace('@TAR_SHA256SUM@', version_dict['tar'][1])
-with open('download.html', 'w', encoding='utf-8') as dlf:
-    dlf.write(template)
+
+write_file('download.html', template)
diff --git a/devscripts/gh-pages/update-copyright.py b/devscripts/gh-pages/update-copyright.py
index 61487f925..444595c48 100755
--- a/devscripts/gh-pages/update-copyright.py
+++ b/devscripts/gh-pages/update-copyright.py
@@ -5,17 +5,22 @@ from __future__ import with_statement, unicode_literals
 
 import datetime
 import glob
-import io  # For Python 2 compatibility
 import os
 import re
+import sys
 
-year = str(datetime.datetime.now().year)
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__)))))
+
+from devscripts.utils import read_file, write_file
+from youtube_dl import compat_str
+
+year = compat_str(datetime.datetime.now().year)
 for fn in glob.glob('*.html*'):
-    with io.open(fn, encoding='utf-8') as f:
-        content = f.read()
+    content = read_file(fn)
     newc = re.sub(r'(?P<copyright>Copyright © 2011-)(?P<year>[0-9]{4})', 'Copyright © 2011-' + year, content)
     if content != newc:
         tmpFn = fn + '.part'
-        with io.open(tmpFn, 'wt', encoding='utf-8') as outf:
-            outf.write(newc)
+        write_file(tmpFn, newc)
         os.rename(tmpFn, fn)
diff --git a/devscripts/gh-pages/update-feed.py b/devscripts/gh-pages/update-feed.py
index 506a62377..13a367d34 100755
--- a/devscripts/gh-pages/update-feed.py
+++ b/devscripts/gh-pages/update-feed.py
@@ -2,10 +2,16 @@
 from __future__ import unicode_literals
 
 import datetime
-import io
 import json
+import os.path
 import textwrap
+import sys
 
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
+from utils import write_file
 
 atom_template = textwrap.dedent("""\
     <?xml version="1.0" encoding="utf-8"?>
@@ -72,5 +78,4 @@ for v in versions:
 entries_str = textwrap.indent(''.join(entries), '\t')
 atom_template = atom_template.replace('@ENTRIES@', entries_str)
 
-with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file:
-    atom_file.write(atom_template)
+write_file('update/releases.atom', atom_template)
diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py
index 531c93c70..06a8a474c 100755
--- a/devscripts/gh-pages/update-sites.py
+++ b/devscripts/gh-pages/update-sites.py
@@ -5,15 +5,17 @@ import sys
 import os
 import textwrap
 
+dirn = os.path.dirname
+
 # We must be able to import youtube_dl
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__)))))
 
 import youtube_dl
+from devscripts.utils import read_file, write_file
 
 
 def main():
-    with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
-        template = tmplf.read()
+    template = read_file('supportedsites.html.in')
 
     ie_htmls = []
     for ie in youtube_dl.list_extractors(age_limit=None):
@@ -29,8 +31,7 @@ def main():
 
     template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
 
-    with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
-        sitesf.write(template)
+    write_file('supportedsites.html', template)
 
 
 if __name__ == '__main__':
diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py
index 226d1a5d6..5a9eb194f 100755
--- a/devscripts/make_contributing.py
+++ b/devscripts/make_contributing.py
@@ -1,10 +1,11 @@
 #!/usr/bin/env python
 from __future__ import unicode_literals
 
-import io
 import optparse
 import re
 
+from utils import read_file, write_file
+
 
 def main():
     parser = optparse.OptionParser(usage='%prog INFILE OUTFILE')
@@ -14,8 +15,7 @@ def main():
 
     infile, outfile = args
 
-    with io.open(infile, encoding='utf-8') as inf:
-        readme = inf.read()
+    readme = read_file(infile)
 
     bug_text = re.search(
         r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1)
@@ -25,8 +25,7 @@ def main():
 
     out = bug_text + dev_text
 
-    with io.open(outfile, 'w', encoding='utf-8') as outf:
-        outf.write(out)
+    write_file(outfile, out)
 
 
 if __name__ == '__main__':
diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py
index b7ad23d83..65fa8169f 100644
--- a/devscripts/make_issue_template.py
+++ b/devscripts/make_issue_template.py
@@ -1,8 +1,11 @@
 #!/usr/bin/env python
 from __future__ import unicode_literals
 
-import io
 import optparse
+import os.path
+import sys
+
+from utils import read_file, read_version, write_file
 
 
 def main():
@@ -13,17 +16,11 @@ def main():
 
     infile, outfile = args
 
-    with io.open(infile, encoding='utf-8') as inf:
-        issue_template_tmpl = inf.read()
+    issue_template_tmpl = read_file(infile)
 
-    # Get the version from youtube_dl/version.py without importing the package
-    exec(compile(open('youtube_dl/version.py').read(),
-                 'youtube_dl/version.py', 'exec'))
+    out = issue_template_tmpl % {'version': read_version()}
 
-    out = issue_template_tmpl % {'version': locals()['__version__']}
-
-    with io.open(outfile, 'w', encoding='utf-8') as outf:
-        outf.write(out)
+    write_file(outfile, out)
 
 if __name__ == '__main__':
     main()
diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py
index c5d5dd4f1..7a5b04dcc 100755
--- a/devscripts/make_readme.py
+++ b/devscripts/make_readme.py
@@ -1,9 +1,13 @@
 from __future__ import unicode_literals
 
-import io
-import sys
+import os.path
 import re
+import sys
+dirn = os.path.dirname
 
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
+from utils import read_file
 from youtube_dl.compat import compat_open as open
 
 README_FILE = 'README.md'
@@ -12,8 +16,7 @@ helptext = sys.stdin.read()
 if isinstance(helptext, bytes):
     helptext = helptext.decode('utf-8')
 
-with io.open(README_FILE, encoding='utf-8') as f:
-    oldreadme = f.read()
+oldreadme = read_file(README_FILE)
 
 header = oldreadme[:oldreadme.index('# OPTIONS')]
 footer = oldreadme[oldreadme.index('# CONFIGURATION'):]
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
index 764795bc5..c424d18d7 100644
--- a/devscripts/make_supportedsites.py
+++ b/devscripts/make_supportedsites.py
@@ -1,17 +1,19 @@
 #!/usr/bin/env python
 from __future__ import unicode_literals
 
-import io
 import optparse
-import os
+import os.path
 import sys
 
-
 # Import youtube_dl
-ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
-sys.path.insert(0, ROOT_DIR)
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
 import youtube_dl
 
+from utils import write_file
+
 
 def main():
     parser = optparse.OptionParser(usage='%prog OUTFILE.md')
@@ -38,8 +40,7 @@ def main():
         ' - ' + md + '\n'
         for md in gen_ies_md(ies))
 
-    with io.open(outfile, 'w', encoding='utf-8') as outf:
-        outf.write(out)
+    write_file(outfile, out)
 
 
 if __name__ == '__main__':
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py
index 76bf873e1..0090ada3e 100644
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@@ -1,13 +1,13 @@
 from __future__ import unicode_literals
 
-import io
 import optparse
 import os.path
 import re
 
+from utils import read_file, write_file
+
 ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 README_FILE = os.path.join(ROOT_DIR, 'README.md')
-
 PREFIX = r'''%YOUTUBE-DL(1)
 
 # NAME
@@ -29,8 +29,7 @@ def main():
 
     outfile, = args
 
-    with io.open(README_FILE, encoding='utf-8') as f:
-        readme = f.read()
+    readme = read_file(README_FILE)
 
     readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
     readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
@@ -38,8 +37,7 @@ def main():
 
     readme = filter_options(readme)
 
-    with io.open(outfile, 'w', encoding='utf-8') as outf:
-        outf.write(readme)
+    write_file(outfile, readme)
 
 
 def filter_options(readme):
diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py
index 60aaf76cc..ebd552fcb 100755
--- a/devscripts/zsh-completion.py
+++ b/devscripts/zsh-completion.py
@@ -7,6 +7,8 @@ import sys
 
 sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
 import youtube_dl
+from utils import read_file, write_file
+
 
 ZSH_COMPLETION_FILE = "youtube-dl.zsh"
 ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in"
@@ -34,15 +36,13 @@ def build_completion(opt_parser):
 
     flags = [opt.get_opt_string() for opt in opts]
 
-    with open(ZSH_COMPLETION_TEMPLATE) as f:
-        template = f.read()
+    template = read_file(ZSH_COMPLETION_TEMPLATE)
 
     template = template.replace("{{fileopts}}", "|".join(fileopts))
     template = template.replace("{{diropts}}", "|".join(diropts))
     template = template.replace("{{flags}}", " ".join(flags))
 
-    with open(ZSH_COMPLETION_FILE, "w") as f:
-        f.write(template)
+    write_file(ZSH_COMPLETION_FILE, template)
 
 
 parser = youtube_dl.parseOpts()[0]
diff --git a/youtube_dl/update.py b/youtube_dl/update.py
index b5f26e4a9..a147b5253 100644
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
-import io
 import json
 import traceback
 import hashlib

From 0861812d7208310a03909502b1610f5e89d04401 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 25 Jul 2023 15:11:15 +0100
Subject: [PATCH 275/312] [build] Fix typo in `devscripts/fish-completion.py`
 (fix 2285605)

---
 devscripts/fish-completion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py
index 267ba6a58..ef8a39e0b 100755
--- a/devscripts/fish-completion.py
+++ b/devscripts/fish-completion.py
@@ -43,7 +43,7 @@ def build_completion(opt_parser):
 
     template = read_file(FISH_COMPLETION_TEMPLATE)
     filled_template = template.replace('{{commands}}', '\n'.join(commands))
-    write_file(filled_template)
+    write_file(FISH_COMPLETION_FILE, filled_template)
 
 
 parser = youtube_dl.parseOpts()[0]

From 87e578c9b891b29ab6559ac81ed391897b1e1ace Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 28 Jul 2023 10:52:04 +0100
Subject: [PATCH 276/312] [workflows/ci.yml] Update to setup-java@v3

* avoid Node 12 deprecation
---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 6b91edd6c..a73bedae1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -278,7 +278,7 @@ jobs:
     #-------- Jython ------
     - name: Set up Java 8
       if: ${{ matrix.python-impl == 'jython' }}
-      uses: actions/setup-java@v2
+      uses: actions/setup-java@v3
       with:
         java-version: 8
         distribution: 'zulu'

From e7926ae9f4e5fa258696551a39295402819280c9 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 28 Jul 2023 06:03:14 +0100
Subject: [PATCH 277/312] [utils] Rework decoding of `Content-Encoding`s

* support nested encodings
* support optional `br` encoding, if brotli package is installed
* support optional 'compress' encoding, if ncompress package is installed
* response `Content-Encoding` has only unprocessed encodings, or removed
* response `Content-Length` is decoded length (usable for filesize metadata)
* use zlib for both deflate and gzip decompression
* some elements taken from yt-dlp: thx especially coletdjnz
---
 test/test_http.py    |  16 ++----
 youtube_dl/compat.py |  14 +++++
 youtube_dl/utils.py  | 120 ++++++++++++++++++++++++++++++++-----------
 3 files changed, 107 insertions(+), 43 deletions(-)

diff --git a/test/test_http.py b/test/test_http.py
index 89580969d..793bea359 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -461,33 +461,23 @@ class TestHTTP(unittest.TestCase):
                 sanitized_Request(
                     self._test_url('content-encoding'),
                     headers={'ytdl-encoding': encoding}))
-            self.assertEqual(res.headers.get('Content-Encoding'), encoding)
+            # decoded encodings are removed: only check for valid decompressed data
             self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
 
     @unittest.skipUnless(brotli, 'brotli support is not installed')
-    @unittest.expectedFailure
     def test_brotli(self):
         self.__test_compression('br')
 
-    @unittest.expectedFailure
     def test_deflate(self):
         self.__test_compression('deflate')
 
-    @unittest.expectedFailure
     def test_gzip(self):
         self.__test_compression('gzip')
 
-    @unittest.expectedFailure  # not yet implemented
     def test_multiple_encodings(self):
         # https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4
-        with FakeYDL() as ydl:
-            for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
-                res = ydl.urlopen(
-                    sanitized_Request(
-                        self._test_url('content-encoding'),
-                        headers={'ytdl-encoding': pair}))
-                self.assertEqual(res.headers.get('Content-Encoding'), pair)
-                self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
+        for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
+            self.__test_compression(pair)
 
     def test_unsupported_encoding(self):
         # it should return the raw content
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index da6d70ec4..54ad64674 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -3200,6 +3200,18 @@ except AttributeError:
     def compat_datetime_timedelta_total_seconds(td):
         return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
 
+# optional decompression packages
+# PyPi brotli package implements 'br' Content-Encoding
+try:
+    import brotli as compat_brotli
+except ImportError:
+    compat_brotli = None
+# PyPi ncompress package implements 'compress' Content-Encoding
+try:
+    import ncompress as compat_ncompress
+except ImportError:
+    compat_ncompress = None
+
 
 legacy = [
     'compat_HTMLParseError',
@@ -3234,6 +3246,7 @@ __all__ = [
     'compat_Struct',
     'compat_base64_b64decode',
     'compat_basestring',
+    'compat_brotli',
     'compat_casefold',
     'compat_chr',
     'compat_collections_abc',
@@ -3259,6 +3272,7 @@ __all__ = [
     'compat_itertools_zip_longest',
     'compat_kwargs',
     'compat_map',
+    'compat_ncompress',
     'compat_numeric_types',
     'compat_open',
     'compat_ord',
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index b5475434f..e73291107 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -15,7 +15,6 @@ import email.utils
 import email.header
 import errno
 import functools
-import gzip
 import inspect
 import io
 import itertools
@@ -42,6 +41,7 @@ from .compat import (
     compat_HTMLParseError,
     compat_HTMLParser,
     compat_basestring,
+    compat_brotli as brotli,
     compat_casefold,
     compat_chr,
     compat_collections_abc,
@@ -55,6 +55,7 @@ from .compat import (
     compat_http_client,
     compat_integer_types,
     compat_kwargs,
+    compat_ncompress as ncompress,
     compat_os_name,
     compat_re_Match,
     compat_re_Pattern,
@@ -2638,11 +2639,44 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
             req)
 
     @staticmethod
-    def deflate(data):
+    def deflate_gz(data):
         try:
-            return zlib.decompress(data, -zlib.MAX_WBITS)
+            # format:zlib,gzip + windowsize:32768
+            return data and zlib.decompress(data, 32 + zlib.MAX_WBITS)
         except zlib.error:
-            return zlib.decompress(data)
+            # raw zlib * windowsize:32768 (RFC 9110: "non-conformant")
+            return zlib.decompress(data, -zlib.MAX_WBITS)
+
+    @staticmethod
+    def gzip(data):
+
+        from gzip import GzipFile
+
+        def _gzip(data):
+            with io.BytesIO(data) as data_buf:
+                gz = GzipFile(fileobj=data_buf, mode='rb')
+                return gz.read()
+
+        try:
+            return _gzip(data)
+        except IOError as original_ioerror:
+            # There may be junk at the end of the file
+            # See http://stackoverflow.com/q/4928560/35070 for details
+            for i in range(1, 1024):
+                try:
+                    return _gzip(data[:-i])
+                except IOError:
+                    continue
+            else:
+                raise original_ioerror
+
+    @staticmethod
+    def brotli(data):
+        return data and brotli.decompress(data)
+
+    @staticmethod
+    def compress(data):
+        return data and ncompress.decompress(data)
 
     def http_request(self, req):
         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
@@ -2679,33 +2713,59 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 
     def http_response(self, req, resp):
         old_resp = resp
-        # gzip
-        if resp.headers.get('Content-encoding', '') == 'gzip':
-            content = resp.read()
-            gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
-            try:
-                uncompressed = io.BytesIO(gz.read())
-            except IOError as original_ioerror:
-                # There may be junk at the end of the file
-                # See http://stackoverflow.com/q/4928560/35070 for details
-                for i in range(1, 1024):
-                    try:
-                        gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
-                        uncompressed = io.BytesIO(gz.read())
-                    except IOError:
-                        continue
-                    break
-                else:
-                    raise original_ioerror
-            resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
+
+        # Content-Encoding header lists the encodings in order that they were applied [1].
+        # To decompress, we simply do the reverse.
+        # [1]: https://datatracker.ietf.org/doc/html/rfc9110#name-content-encoding
+        decoded_response = None
+        decoders = {
+            'gzip': self.deflate_gz,
+            'deflate': self.deflate_gz,
+        }
+        if brotli:
+            decoders['br'] = self.brotli
+        if ncompress:
+            decoders['compress'] = self.compress
+        if sys.platform.startswith('java'):
+            # Jython zlib implementation misses gzip
+            decoders['gzip'] = self.gzip
+
+        def encodings(hdrs):
+            # A header field that allows multiple values can have multiple instances [2].
+            # [2]: https://datatracker.ietf.org/doc/html/rfc9110#name-fields
+            for e in reversed(','.join(hdrs).split(',')):
+                if e:
+                    yield e.strip()
+
+        encodings_left = []
+        try:
+            resp.headers.get_all
+            hdrs = resp.headers
+        except AttributeError:
+            # Py2 has no get_all() method: headers are rfc822.Message
+            from email.message import Message
+            hdrs = Message()
+            for k, v in resp.headers.items():
+                hdrs[k] = v
+
+        decoder, decoded_response = True, None
+        for encoding in encodings(hdrs.get_all('Content-Encoding', [])):
+            # "SHOULD consider" x-compress, x-gzip as compress, gzip
+            decoder = decoder and decoders.get(remove_start(encoding, 'x-'))
+            if not decoder:
+                encodings_left.insert(0, encoding)
+                continue
+            decoded_response = decoder(decoded_response or resp.read())
+        if decoded_response is not None:
+            resp = compat_urllib_request.addinfourl(
+                io.BytesIO(decoded_response), old_resp.headers, old_resp.url, old_resp.code)
             resp.msg = old_resp.msg
-            del resp.headers['Content-encoding']
-        # deflate
-        if resp.headers.get('Content-encoding', '') == 'deflate':
-            gz = io.BytesIO(self.deflate(resp.read()))
-            resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
-            resp.msg = old_resp.msg
-            del resp.headers['Content-encoding']
+            del resp.headers['Content-Length']
+            resp.headers['Content-Length'] = '%d' % len(decoded_response)
+        del resp.headers['Content-Encoding']
+        if encodings_left:
+            resp.headers['Content-Encoding'] = ', '.join(encodings_left)
+
         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
         # https://github.com/ytdl-org/youtube-dl/issues/6457).
         if 300 <= resp.code < 400:

From abef53466da1f7d2e79f5644718a2cf7524abc49 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 28 Jul 2023 06:19:15 +0100
Subject: [PATCH 278/312] [utils] Rework URL path munging for ., .. components

* move processing to YoutubeDLHandler
* also process `Location` header for redirect
* use tests from https://github.com/yt-dlp/yt-dlp/pull/7662
---
 test/test_http.py       | 14 +++++++++
 test/test_utils.py      | 29 ++++++++++++++++-
 youtube_dl/YoutubeDL.py | 23 --------------
 youtube_dl/utils.py     | 70 +++++++++++++++++++++++++++++++----------
 4 files changed, 95 insertions(+), 41 deletions(-)

diff --git a/test/test_http.py b/test/test_http.py
index 793bea359..485c4c6fc 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -180,6 +180,12 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
             respond()
         elif self.path == '/%c7%9f':
             respond()
+        elif self.path == '/redirect_dotsegments':
+            self.send_response(301)
+            # redirect to /headers but with dot segments before
+            self.send_header('Location', '/a/b/./../../headers')
+            self.send_header('Content-Length', '0')
+            self.end_headers()
         elif self.path.startswith('/redirect_'):
             self._redirect()
         elif self.path.startswith('/method'):
@@ -489,6 +495,14 @@ class TestHTTP(unittest.TestCase):
             self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')
             self.assertEqual(res.read(), b'raw')
 
+    def test_remove_dot_segments(self):
+        with FakeYDL() as ydl:
+            res = ydl.urlopen(sanitized_Request(self._test_url('a/b/./../../headers')))
+            self.assertEqual(compat_urllib_parse.urlparse(res.geturl()).path, '/headers')
+
+            res = ydl.urlopen(sanitized_Request(self._test_url('redirect_dotsegments')))
+            self.assertEqual(compat_urllib_parse.urlparse(res.geturl()).path, '/headers')
+
 
 def _build_proxy_handler(name):
     class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
diff --git a/test/test_utils.py b/test/test_utils.py
index e83977f29..fdae1f744 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -64,6 +64,7 @@ from youtube_dl.utils import (
     parse_age_limit,
     parse_duration,
     parse_filesize,
+    parse_codecs,
     parse_count,
     parse_iso8601,
     parse_resolution,
@@ -114,7 +115,7 @@ from youtube_dl.utils import (
     cli_option,
     cli_valueless_option,
     cli_bool_option,
-    parse_codecs,
+    YoutubeDLHandler,
 )
 from youtube_dl.compat import (
     compat_chr,
@@ -905,6 +906,32 @@ class TestUtil(unittest.TestCase):
         )
         self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
 
+    def test_remove_dot_segments(self):
+
+        def remove_dot_segments(p):
+            q = '' if p.startswith('/') else '/'
+            p = 'http://example.com' + q + p
+            p = compat_urlparse.urlsplit(YoutubeDLHandler._fix_path(p)).path
+            return p[1:] if q else p
+
+        self.assertEqual(remove_dot_segments('/a/b/c/./../../g'), '/a/g')
+        self.assertEqual(remove_dot_segments('mid/content=5/../6'), 'mid/6')
+        self.assertEqual(remove_dot_segments('/ad/../cd'), '/cd')
+        self.assertEqual(remove_dot_segments('/ad/../cd/'), '/cd/')
+        self.assertEqual(remove_dot_segments('/..'), '/')
+        self.assertEqual(remove_dot_segments('/./'), '/')
+        self.assertEqual(remove_dot_segments('/./a'), '/a')
+        self.assertEqual(remove_dot_segments('/abc/./.././d/././e/.././f/./../../ghi'), '/ghi')
+        self.assertEqual(remove_dot_segments('/'), '/')
+        self.assertEqual(remove_dot_segments('/t'), '/t')
+        self.assertEqual(remove_dot_segments('t'), 't')
+        self.assertEqual(remove_dot_segments(''), '')
+        self.assertEqual(remove_dot_segments('/../a/b/c'), '/a/b/c')
+        self.assertEqual(remove_dot_segments('../a'), 'a')
+        self.assertEqual(remove_dot_segments('./a'), 'a')
+        self.assertEqual(remove_dot_segments('.'), '')
+        self.assertEqual(remove_dot_segments('////'), '////')
+
     def test_js_to_json_vars_strings(self):
         self.assertDictEqual(
             json.loads(js_to_json(
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 6a12f91e4..13a41928f 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -71,7 +71,6 @@ from .utils import (
     format_bytes,
     formatSeconds,
     GeoRestrictedError,
-    HEADRequest,
     int_or_none,
     ISO3166Utils,
     join_nonempty,
@@ -88,7 +87,6 @@ from .utils import (
     preferredencoding,
     prepend_extension,
     process_communicate_or_kill,
-    PUTRequest,
     register_socks_protocols,
     render_table,
     replace_extension,
@@ -2460,27 +2458,6 @@ class YoutubeDL(object):
         """ Start an HTTP download """
         if isinstance(req, compat_basestring):
             req = sanitized_Request(req)
-        # an embedded /../ sequence is not automatically handled by urllib2
-        # see https://github.com/yt-dlp/yt-dlp/issues/3355
-        url = req.get_full_url()
-        parts = url.partition('/../')
-        if parts[1]:
-            url = compat_urllib_parse.urljoin(parts[0] + parts[1][:1], parts[1][1:] + parts[2])
-        if url:
-            # worse, URL path may have initial /../ against RFCs: work-around
-            # by stripping such prefixes, like eg Firefox
-            parts = compat_urllib_parse.urlsplit(url)
-            path = parts.path
-            while path.startswith('/../'):
-                path = path[3:]
-            url = parts._replace(path=path).geturl()
-            # get a new Request with the munged URL
-            if url != req.get_full_url():
-                req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get(
-                    req.get_method(), compat_urllib_request.Request)
-                req = req_type(
-                    url, data=req.data, headers=dict(req.header_items()),
-                    origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
         return self._opener.open(req, timeout=self._socket_timeout)
 
     def print_debug_header(self):
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index e73291107..36204c8fa 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2678,17 +2678,52 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
     def compress(data):
         return data and ncompress.decompress(data)
 
+    @staticmethod
+    def _fix_path(url):
+        # an embedded /../ or /./ sequence is not automatically handled by urllib2
+        # see https://github.com/yt-dlp/yt-dlp/issues/3355
+        parsed_url = compat_urllib_parse.urlsplit(url)
+        path = parsed_url.path
+        if not path.endswith('/'):
+            path += '/'
+        parts = path.partition('/./')
+        if not parts[1]:
+            parts = path.partition('/../')
+        if parts[1]:
+            path = compat_urllib_parse.urljoin(
+                parts[0] + parts[1][:1],
+                parts[1][1:] + (parts[2] if parsed_url.path.endswith('/') else parts[2][:-1]))
+            url = parsed_url._replace(path=path).geturl()
+        if '/.' in url:
+            # worse, URL path may have initial /../ against RFCs: work-around
+            # by stripping such prefixes, like eg Firefox
+            path = parsed_url.path + '/'
+            while path.startswith('/.'):
+                if path.startswith('/../'):
+                    path = path[3:]
+                elif path.startswith('/./'):
+                    path = path[2:]
+                else:
+                    break
+            path = path[:-1]
+            if not path.startswith('/') and parsed_url.path.startswith('/'):
+                path = '/' + path
+            url = parsed_url._replace(path=path).geturl()
+        return url
+
     def http_request(self, req):
-        # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
-        # always respected by websites, some tend to give out URLs with non percent-encoded
+        url = req.get_full_url()
+        # resolve embedded . and ..
+        url_fixed = self._fix_path(url)
+        # According to RFC 3986, URLs can not contain non-ASCII characters; however this is not
+        # always respected by websites: some tend to give out URLs with non percent-encoded
         # non-ASCII characters (see telemb.py, ard.py [#3412])
         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
         # To work around aforementioned issue we will replace request's original URL with
         # percent-encoded one
         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
         # the code of this workaround has been moved here from YoutubeDL.urlopen()
-        url = req.get_full_url()
-        url_escaped = escape_url(url)
+        url_escaped = escape_url(url_fixed)
 
         # Substitute URL if any change after escaping
         if url != url_escaped:
@@ -2702,10 +2737,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 
         req.headers = handle_youtubedl_headers(req.headers)
 
-        if sys.version_info < (2, 7) and '#' in req.get_full_url():
-            # Python 2.6 is brain-dead when it comes to fragments
-            req._Request__original = req._Request__original.partition('#')[0]
-            req._Request__r_type = req._Request__r_type.partition('#')[0]
+        if sys.version_info < (2, 7):
+            # avoid possible race where __r_type may be unset
+            req.get_type()
+            if '#' in req.get_full_url():
+                # Python 2.6 is brain-dead when it comes to fragments
+                req._Request__original = req._Request__original.partition('#')[0]
+                req._Request__r_type = req._Request__r_type.partition('#')[0]
 
         # Use the totally undocumented AbstractHTTPHandler per
         # https://github.com/yt-dlp/yt-dlp/pull/4158
@@ -2775,10 +2813,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
                 if sys.version_info >= (3, 0):
                     location = location.encode('iso-8859-1')
                 location = location.decode('utf-8')
-                location_escaped = escape_url(location)
+                # resolve embedded . and ..
+                location_fixed = self._fix_path(location)
+                location_escaped = escape_url(location_fixed)
                 if location != location_escaped:
                     del resp.headers['Location']
-                    if sys.version_info < (3, 0):
+                    # if sys.version_info < (3, 0):
+                    if not isinstance(location_escaped, str):
                         location_escaped = location_escaped.encode('utf-8')
                     resp.headers['Location'] = location_escaped
         return resp
@@ -4248,13 +4289,8 @@ def update_Request(req, url=None, data=None, headers={}, query={}):
     req_headers.update(headers)
     req_data = data if data is not None else req.data
     req_url = update_url_query(url or req.get_full_url(), query)
-    req_get_method = req.get_method()
-    if req_get_method == 'HEAD':
-        req_type = HEADRequest
-    elif req_get_method == 'PUT':
-        req_type = PUTRequest
-    else:
-        req_type = compat_urllib_request.Request
+    req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get(
+        req.get_method(), compat_urllib_request.Request)
     new_req = req_type(
         req_url, data=req_data, headers=req_headers,
         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)

From 7d965e6b65655f2a5fbae34219fc87359a3d7061 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 30 Jul 2023 21:45:57 +0100
Subject: [PATCH 279/312] [utils] Avoid comparing `type(var)`, etc, to pass new
 Linter rules

---
 youtube_dl/swfinterp.py |  2 +-
 youtube_dl/utils.py     | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py
index 0c7158575..e79e0b17f 100644
--- a/youtube_dl/swfinterp.py
+++ b/youtube_dl/swfinterp.py
@@ -727,7 +727,7 @@ class SWFInterpreter(object):
                             stack.append(res)
                             continue
 
-                        assert isinstance(obj, (dict, _ScopeDict)),\
+                        assert isinstance(obj, (dict, _ScopeDict)), \
                             'Accessing member %r on %r' % (pname, obj)
                         res = obj.get(pname, undefined)
                         stack.append(res)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 36204c8fa..1da5a7a38 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2235,7 +2235,7 @@ def _htmlentity_transform(entity_with_semicolon):
 def unescapeHTML(s):
     if s is None:
         return None
-    assert type(s) == compat_str
+    assert isinstance(s, compat_str)
 
     return re.sub(
         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
@@ -3418,7 +3418,7 @@ def _windows_write_string(s, out):
 def write_string(s, out=None, encoding=None):
     if out is None:
         out = sys.stderr
-    assert type(s) == compat_str
+    assert isinstance(s, compat_str)
 
     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
         if _windows_write_string(s, out):
@@ -4459,8 +4459,10 @@ TV_PARENTAL_GUIDELINES = {
 
 
 def parse_age_limit(s):
-    if type(s) == int:
-        return s if 0 <= s <= 21 else None
+    if not isinstance(s, bool):
+        age = int_or_none(s)
+        if age is not None:
+            return age if 0 <= age <= 21 else None
     if not isinstance(s, compat_basestring):
         return None
     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)

From 2d2a4bc8324fc4bc5a235cbd1ee0b0769912bfd1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 30 Jul 2023 21:47:48 +0100
Subject: [PATCH 280/312] [utils] Revise `isinstance()` tests (especially for
 str/unicode/bytes) to complete Linter fix

---
 youtube_dl/compat.py |   2 +-
 youtube_dl/utils.py  | 153 ++++++++++++++++++++-----------------------
 2 files changed, 73 insertions(+), 82 deletions(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 54ad64674..3c526a78d 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -36,7 +36,7 @@ try:
     )
 except NameError:
     compat_str, compat_basestring, compat_chr = (
-        str, str, chr
+        str, (str, bytes), chr
     )
 
 # casefold
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 1da5a7a38..94b339b1d 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1826,11 +1826,11 @@ def write_json_file(obj, fn):
     if sys.version_info < (3, 0) and sys.platform != 'win32':
         encoding = get_filesystem_encoding()
         # os.path.basename returns a bytes object, but NamedTemporaryFile
-        # will fail if the filename contains non ascii characters unless we
+        # will fail if the filename contains non-ascii characters unless we
         # use a unicode object
-        path_basename = lambda f: os.path.basename(fn).decode(encoding)
+        path_basename = lambda f: os.path.basename(f).decode(encoding)
         # the same for os.path.dirname
-        path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
+        path_dirname = lambda f: os.path.dirname(f).decode(encoding)
     else:
         path_basename = os.path.basename
         path_dirname = os.path.dirname
@@ -1894,10 +1894,10 @@ else:
                 return f
         return None
 
+
 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 # the namespace parameter
 
-
 def xpath_with_ns(path, ns_map):
     components = [c.split(':') for c in path.split('/')]
     replaced = []
@@ -1914,7 +1914,7 @@ def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
     def _find_xpath(xpath):
         return node.find(compat_xpath(xpath))
 
-    if isinstance(xpath, (str, compat_str)):
+    if isinstance(xpath, compat_basestring):
         n = _find_xpath(xpath)
     else:
         for xp in xpath:
@@ -2262,39 +2262,32 @@ def get_subprocess_encoding():
     return encoding
 
 
-def encodeFilename(s, for_subprocess=False):
-    """
-    @param s The name of the file
-    """
+# Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
+if sys.version_info < (3, 0) and not sys.platform.startswith('java'):
 
-    assert type(s) == compat_str
+    def encodeFilename(s, for_subprocess=False):
+        """
+        @param s The name of the file
+        """
+
+        # Pass '' directly to use Unicode APIs on Windows 2000 and up
+        # (Detecting Windows NT 4 is tricky because 'major >= 4' would
+        # match Windows 9x series as well. Besides, NT 4 is obsolete.)
+        if (not for_subprocess
+                and sys.platform == 'win32'
+                and sys.getwindowsversion()[0] >= 5
+                and isinstance(s, compat_str)):
+            return s
+
+        return _encode_compat_str(s, get_subprocess_encoding(), 'ignore')
+
+    def decodeFilename(b, for_subprocess=False):
+        return _decode_compat_str(b, get_subprocess_encoding(), 'ignore')
+
+else:
 
     # Python 3 has a Unicode API
-    if sys.version_info >= (3, 0):
-        return s
-
-    # Pass '' directly to use Unicode APIs on Windows 2000 and up
-    # (Detecting Windows NT 4 is tricky because 'major >= 4' would
-    # match Windows 9x series as well. Besides, NT 4 is obsolete.)
-    if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
-        return s
-
-    # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
-    if sys.platform.startswith('java'):
-        return s
-
-    return s.encode(get_subprocess_encoding(), 'ignore')
-
-
-def decodeFilename(b, for_subprocess=False):
-
-    if sys.version_info >= (3, 0):
-        return b
-
-    if not isinstance(b, bytes):
-        return b
-
-    return b.decode(get_subprocess_encoding(), 'ignore')
+    encodeFilename = decodeFilename = lambda *s, **k: s[0]
 
 
 def encodeArgument(s):
@@ -2313,11 +2306,7 @@ def decodeArgument(b):
 def decodeOption(optval):
     if optval is None:
         return optval
-    if isinstance(optval, bytes):
-        optval = optval.decode(preferredencoding())
-
-    assert isinstance(optval, compat_str)
-    return optval
+    return _decode_compat_str(optval)
 
 
 def formatSeconds(secs):
@@ -2363,7 +2352,7 @@ def make_HTTPS_handler(params, **kwargs):
 
     if sys.version_info < (3, 2):
         return YoutubeDLHTTPSHandler(params, **kwargs)
-    else:  # Python < 3.4
+    else:  # Python3 < 3.4
         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
         context.verify_mode = (ssl.CERT_NONE
                                if opts_no_check_certificate
@@ -2818,8 +2807,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
                 location_escaped = escape_url(location_fixed)
                 if location != location_escaped:
                     del resp.headers['Location']
-                    # if sys.version_info < (3, 0):
-                    if not isinstance(location_escaped, str):
+                    if not isinstance(location_escaped, str):  # Py 2 case
                         location_escaped = location_escaped.encode('utf-8')
                     resp.headers['Location'] = location_escaped
         return resp
@@ -3086,8 +3074,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
         # On python 2 urlh.geturl() may sometimes return redirect URL
         # as a byte string instead of unicode. This workaround forces
         # it to return unicode.
-        if sys.version_info[0] < 3:
-            newurl = compat_str(newurl)
+        newurl = _decode_compat_str(newurl)
 
         # Be conciliant with URIs containing a space.  This is mainly
         # redundant with the more complete encoding done in http_error_302(),
@@ -3333,11 +3320,7 @@ class DateRange(object):
 def platform_name():
     """ Returns the platform name as a compat_str """
     res = platform.platform()
-    if isinstance(res, bytes):
-        res = res.decode(preferredencoding())
-
-    assert isinstance(res, compat_str)
-    return res
+    return _decode_compat_str(res)
 
 
 def _windows_write_string(s, out):
@@ -3567,9 +3550,8 @@ def shell_quote(args):
     quoted_args = []
     encoding = get_filesystem_encoding()
     for a in args:
-        if isinstance(a, bytes):
-            # We may get a filename encoded with 'encodeFilename'
-            a = a.decode(encoding)
+        # We may get a filename encoded with 'encodeFilename'
+        a = _decode_compat_str(a, encoding)
         quoted_args.append(compat_shlex_quote(a))
     return ' '.join(quoted_args)
 
@@ -3733,8 +3715,9 @@ def parse_resolution(s):
 
 
 def parse_bitrate(s):
-    if not isinstance(s, compat_str):
-        return
+    s = txt_or_none(s)
+    if not s:
+        return None
     mobj = re.search(r'\b(\d+)\s*kbps', s)
     if mobj:
         return int(mobj.group(1))
@@ -3822,18 +3805,17 @@ def base_url(url):
 
 
 def urljoin(base, path):
-    if isinstance(path, bytes):
-        path = path.decode('utf-8')
-    if not isinstance(path, compat_str) or not path:
+    path = _decode_compat_str(path, encoding='utf-8', or_none=True)
+    if not path:
         return None
     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
         return path
-    if isinstance(base, bytes):
-        base = base.decode('utf-8')
-    if not isinstance(base, compat_str) or not re.match(
-            r'^(?:https?:)?//', base):
+    base = _decode_compat_str(base, encoding='utf-8', or_none=True)
+    if not base:
         return None
-    return compat_urllib_parse.urljoin(base, path)
+    return (
+        re.match(r'^(?:https?:)?//', base)
+        and compat_urllib_parse.urljoin(base, path))
 
 
 class HEADRequest(compat_urllib_request.Request):
@@ -3998,8 +3980,7 @@ def get_exe_version(exe, args=['--version'],
             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
     except OSError:
         return False
-    if isinstance(out, bytes):  # Python 2.x
-        out = out.decode('ascii', 'ignore')
+    out = _decode_compat_str(out, 'ascii', 'ignore')
     return detect_exe_version(out, version_re, unrecognized)
 
 
@@ -4218,8 +4199,8 @@ def lowercase_escape(s):
 
 def escape_rfc3986(s):
     """Escape non-ASCII characters as suggested by RFC 3986"""
-    if sys.version_info < (3, 0) and isinstance(s, compat_str):
-        s = s.encode('utf-8')
+    if sys.version_info < (3, 0):
+        s = _encode_compat_str(s, 'utf-8')
     # ensure unicode: after quoting, it can always be converted
     return compat_str(compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]"))
 
@@ -4242,8 +4223,7 @@ def parse_qs(url, **kwargs):
 
 def read_batch_urls(batch_fd):
     def fixup(url):
-        if not isinstance(url, compat_str):
-            url = url.decode('utf-8', 'replace')
+        url = _decode_compat_str(url, 'utf-8', 'replace')
         BOM_UTF8 = '\xef\xbb\xbf'
         if url.startswith(BOM_UTF8):
             url = url[len(BOM_UTF8):]
@@ -4305,10 +4285,8 @@ def _multipart_encode_impl(data, boundary):
     out = b''
     for k, v in data.items():
         out += b'--' + boundary.encode('ascii') + b'\r\n'
-        if isinstance(k, compat_str):
-            k = k.encode('utf-8')
-        if isinstance(v, compat_str):
-            v = v.encode('utf-8')
+        k = _encode_compat_str(k, 'utf-8')
+        v = _encode_compat_str(v, 'utf-8')
         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
@@ -4435,8 +4413,26 @@ def merge_dicts(*dicts, **kwargs):
     return merged
 
 
-def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
-    return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
+# very poor choice of name, as if Python string encodings weren't confusing enough
+def encode_compat_str(s, encoding=preferredencoding(), errors='strict'):
+    assert isinstance(s, compat_basestring)
+    return s if isinstance(s, compat_str) else compat_str(s, encoding, errors)
+
+
+# what it could have been
+def _decode_compat_str(s, encoding=preferredencoding(), errors='strict', or_none=False):
+    if not or_none:
+        assert isinstance(s, compat_basestring)
+    return (
+        s if isinstance(s, compat_str)
+        else compat_str(s, encoding, errors) if isinstance(s, compat_basestring)
+        else None)
+
+
+# the real encode_compat_str, but only for internal use
+def _encode_compat_str(s, encoding=preferredencoding(), errors='strict'):
+    assert isinstance(s, compat_basestring)
+    return s.encode(encoding, errors) if isinstance(s, compat_str) else s
 
 
 US_RATINGS = {
@@ -4639,12 +4635,7 @@ def args_to_str(args):
 
 
 def error_to_compat_str(err):
-    err_str = str(err)
-    # On python 2 error byte string must be decoded with proper
-    # encoding rather than ascii
-    if sys.version_info[0] < 3:
-        err_str = err_str.decode(preferredencoding())
-    return err_str
+    return _decode_compat_str(str(err))
 
 
 def mimetype2ext(mt):

From e4178b5af3428f29feca622d531090f10f54af35 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 30 Jul 2023 21:49:58 +0100
Subject: [PATCH 281/312] [utils] Add and use `filter_dict()` from yt-dlp

---
 youtube_dl/utils.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 94b339b1d..c530ed5a2 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2586,7 +2586,7 @@ def handle_youtubedl_headers(headers):
     filtered_headers = headers
 
     if 'Youtubedl-no-compression' in filtered_headers:
-        filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
+        filtered_headers = filter_dict(filtered_headers, cndn=lambda k, _: k.lower() != 'accept-encoding')
         del filtered_headers['Youtubedl-no-compression']
 
     return filtered_headers
@@ -3102,9 +3102,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
             new_data = None
             remove_headers.extend(['Content-Length', 'Content-Type'])
 
-        # NB: don't use dict comprehension for python 2.6 compatibility
-        new_headers = dict((k, v) for k, v in req.headers.items()
-                           if k.title() not in remove_headers)
+        new_headers = filter_dict(req.headers, cndn=lambda k, _: k.title() not in remove_headers)
 
         return compat_urllib_request.Request(
             newurl, headers=new_headers, origin_req_host=req.origin_req_host,
@@ -4377,6 +4375,11 @@ def try_get(src, getter, expected_type=None):
                 return v
 
 
+def filter_dict(dct, cndn=lambda _, v: v is not None):
+    # NB: don't use dict comprehension for python 2.6 compatibility
+    return dict((k, v) for k, v in dct.items() if cndn(k, v))
+
+
 def merge_dicts(*dicts, **kwargs):
     """
         Merge the `dict`s in `dicts` using the first valid value for each key.

From 2efc8de4d2299e08e0c84d674d7fc7f3fa669487 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 30 Jul 2023 21:50:52 +0100
Subject: [PATCH 282/312] [utils] Advertise optional supported
 `Content-Encoding`s

---
 youtube_dl/utils.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index c530ed5a2..81ff78807 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1678,9 +1678,7 @@ def random_user_agent():
 
 std_headers = {
     'User-Agent': random_user_agent(),
-    'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-    'Accept-Encoding': 'gzip, deflate',
     'Accept-Language': 'en-us,en;q=0.5',
 }
 
@@ -2724,6 +2722,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
             if h.capitalize() not in req.headers:
                 req.add_header(h, v)
 
+        # Similarly, 'Accept-encoding'
+        if 'Accept-encoding' not in req.headers:
+            req.add_header(
+                'Accept-Encoding', join_nonempty(
+                    'gzip', 'deflate', brotli and 'br', ncompress and 'compress',
+                    delim=', '))
+
         req.headers = handle_youtubedl_headers(req.headers)
 
         if sys.version_info < (2, 7):

From 86e3cf5e5849aefcc540c19bb5fa5ab7f470d1c1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Fri, 4 Aug 2023 22:54:12 +0100
Subject: [PATCH 283/312] [S4C] Add extractor for Sianel Pedwar Cymru

* from https://github.com/yt-dlp/yt-dlp/pull/7730, thx ifan-t, bashonly
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/s4c.py        | 76 ++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 youtube_dl/extractor/s4c.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 42b009ef5..cb39876c2 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1087,6 +1087,7 @@ from .rutube import (
 from .rutv import RUTVIE
 from .ruutu import RuutuIE
 from .ruv import RuvIE
+from .s4c import S4CIE
 from .safari import (
     SafariIE,
     SafariApiIE,
diff --git a/youtube_dl/extractor/s4c.py b/youtube_dl/extractor/s4c.py
new file mode 100644
index 000000000..21d40c2d3
--- /dev/null
+++ b/youtube_dl/extractor/s4c.py
@@ -0,0 +1,76 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    merge_dicts,
+    T,
+    traverse_obj,
+    txt_or_none,
+)
+
+
+class S4CIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/programme/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.s4c.cymru/clic/programme/861362209',
+        'info_dict': {
+            'id': '861362209',
+            'ext': 'mp4',
+            'title': 'Y Swn',
+            'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
+            'duration': 5340
+        },
+    }, {
+        'url': 'https://www.s4c.cymru/clic/programme/856636948',
+        'info_dict': {
+            'id': '856636948',
+            'ext': 'mp4',
+            'title': 'Am Dro',
+            'duration': 2880,
+            'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        details = self._download_json(
+            'https://www.s4c.cymru/df/full_prog_details',
+            video_id, query={
+                'lang': 'e',
+                'programme_id': video_id,
+            }, fatal=False)
+
+        filename = self._download_json(
+            'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
+                'programme_id': video_id,
+                'signed': '0',
+                'lang': 'en',
+                'mode': 'od',
+                'appId': 'clic',
+                'streamName': '',
+            }, note='Downloading player config JSON')['filename']
+        m3u8_url = self._download_json(
+            'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
+                'mode': 'od',
+                'application': 'clic',
+                'region': 'WW',
+                'extra': 'false',
+                'thirdParty': 'false',
+                'filename': filename,
+            }, note='Downloading streaming urls JSON')['hls']
+        # ... self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
+        formats, subtitles = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native'), {}
+
+        return merge_dicts({
+            'id': video_id,
+            'formats': formats,
+            'subtitles': subtitles,
+        }, traverse_obj(details, ('full_prog_details', 0, {
+            'title': (('programme_title', 'series_title'), T(txt_or_none)),
+            'description': ('full_billing', T(txt_or_none)),
+            'duration': ('duration', T(lambda x: float_or_none(x, invscale=60))),
+        }), get_all=False),
+            rev=True)

From 7d58f0769a8f08e46ea77432041577cef94c07e2 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 31 Aug 2023 17:16:47 +0100
Subject: [PATCH 284/312] [ci.yml] Improve conditions for nosetest
 installations

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a73bedae1..7fb8f9f83 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -368,7 +368,7 @@ jobs:
         done
     #-------- nose --------
     - name: Install nose for Python ${{ matrix.python-version }}
-      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' || matrix.python-version == '3.12' }}
+      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || (matrix.python-impl == 'cpython' && (matrix.python-version == '2.7' || matrix.python-version == '3.12')) }}
       shell: bash
       run: |
         echo "$PATH"
@@ -380,7 +380,7 @@ jobs:
         [ "$py3ver" -ge 9 ] && nose=pynose || nose=nose
         $PIP -qq show $nose || $PIP install $nose
     - name: Install nose for other Python 2
-      if: ${{ matrix.python-impl == 'jython' || matrix.python-version == '2.6' }}
+      if: ${{ matrix.python-impl == 'jython' || (matrix.python-impl == 'cpython' && matrix.python-version == '2.6') }}
       shell: bash
       run: |
         # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)

From 31f50c8194f12c27ac6fbfe336f1d515aa8677ae Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 27 Aug 2023 19:08:28 +0100
Subject: [PATCH 285/312] [S4C] Add thumbnail extraction, extract series as
 playlist

Based on https://github.com/yt-dlp/yt-dlp/pull/7776: thx ifan-t, bashonly
---
 youtube_dl/extractor/extractors.py |  5 ++-
 youtube_dl/extractor/s4c.py        | 62 ++++++++++++++++++++++++++----
 2 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index cb39876c2..d9289e5bf 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1087,7 +1087,10 @@ from .rutube import (
 from .rutv import RUTVIE
 from .ruutu import RuutuIE
 from .ruv import RuvIE
-from .s4c import S4CIE
+from .s4c import (
+    S4CIE,
+    S4CSeriesIE,
+)
 from .safari import (
     SafariIE,
     SafariApiIE,
diff --git a/youtube_dl/extractor/s4c.py b/youtube_dl/extractor/s4c.py
index 21d40c2d3..b152e6680 100644
--- a/youtube_dl/extractor/s4c.py
+++ b/youtube_dl/extractor/s4c.py
@@ -2,6 +2,8 @@
 
 from __future__ import unicode_literals
 
+from functools import partial as partial_f
+
 from .common import InfoExtractor
 from ..utils import (
     float_or_none,
@@ -9,6 +11,7 @@ from ..utils import (
     T,
     traverse_obj,
     txt_or_none,
+    url_or_none,
 )
 
 
@@ -21,7 +24,8 @@ class S4CIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Y Swn',
             'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
-            'duration': 5340
+            'duration': 5340,
+            'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg',
         },
     }, {
         'url': 'https://www.s4c.cymru/clic/programme/856636948',
@@ -31,6 +35,7 @@ class S4CIE(InfoExtractor):
             'title': 'Am Dro',
             'duration': 2880,
             'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
+            'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg',
         },
     }]
 
@@ -43,7 +48,7 @@ class S4CIE(InfoExtractor):
                 'programme_id': video_id,
             }, fatal=False)
 
-        filename = self._download_json(
+        player_config = self._download_json(
             'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
                 'programme_id': video_id,
                 'signed': '0',
@@ -51,7 +56,8 @@ class S4CIE(InfoExtractor):
                 'mode': 'od',
                 'appId': 'clic',
                 'streamName': '',
-            }, note='Downloading player config JSON')['filename']
+            }, note='Downloading player config JSON')
+
         m3u8_url = self._download_json(
             'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
                 'mode': 'od',
@@ -59,18 +65,60 @@ class S4CIE(InfoExtractor):
                 'region': 'WW',
                 'extra': 'false',
                 'thirdParty': 'false',
-                'filename': filename,
+                'filename': player_config['filename'],
             }, note='Downloading streaming urls JSON')['hls']
-        # ... self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
-        formats, subtitles = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native'), {}
+        formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native')
+        self._sort_formats(formats)
+
+        subtitles = {}
+        for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))):
+            subtitles.setdefault(sub.get('3', 'en'), []).append({
+                'url': sub['0'],
+                'name': sub.get('1'),
+            })
 
         return merge_dicts({
             'id': video_id,
             'formats': formats,
             'subtitles': subtitles,
+            'thumbnail': url_or_none(player_config.get('poster')),
         }, traverse_obj(details, ('full_prog_details', 0, {
             'title': (('programme_title', 'series_title'), T(txt_or_none)),
             'description': ('full_billing', T(txt_or_none)),
-            'duration': ('duration', T(lambda x: float_or_none(x, invscale=60))),
+            'duration': ('duration', T(partial_f(float_or_none, invscale=60))),
         }), get_all=False),
             rev=True)
+
+
+class S4CSeriesIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.s4c.cymru/clic/series/864982911',
+        'playlist_mincount': 6,
+        'info_dict': {
+            'id': '864982911',
+            'title': 'Iaith ar Daith',
+        },
+    }, {
+        'url': 'https://www.s4c.cymru/clic/series/866852587',
+        'playlist_mincount': 8,
+        'info_dict': {
+            'id': '866852587',
+            'title': 'FFIT Cymru',
+        },
+    }]
+
+    def _real_extract(self, url):
+        series_id = self._match_id(url)
+        series_details = self._download_json(
+            'https://www.s4c.cymru/df/series_details', series_id, query={
+                'lang': 'e',
+                'series_id': series_id,
+                'show_prog_in_series': 'Y'
+            }, note='Downloading series details JSON')
+
+        return self.playlist_result(
+            (self.url_result('https://www.s4c.cymru/clic/programme/' + episode_id, S4CIE, episode_id)
+             for episode_id in traverse_obj(series_details, ('other_progs_in_series', Ellipsis, 'id'))),
+            playlist_id=series_id, playlist_title=traverse_obj(
+                series_details, ('full_prog_details', 0, 'series_title', T(txt_or_none))))

From 21caaf23800c95451cec27dfac56df2c0f8de85a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 3 Sep 2023 01:13:40 +0100
Subject: [PATCH 286/312] [test] Remove redundancy from lambda expected value
 regex

---
 test/helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/helper.py b/test/helper.py
index fc55c6b46..5b7e3dfe2 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -142,7 +142,7 @@ def expect_value(self, got, expected, field):
         self.assertTrue(
             contains_str in got,
             'field %s (value: %r) should contain %r' % (field, got, contains_str))
-    elif isinstance(expected, compat_str) and re.match(r'^lambda \w+:', expected):
+    elif isinstance(expected, compat_str) and re.match(r'lambda \w+:', expected):
         fn = eval(expected)
         suite = expected.split(':', 1)[1].strip()
         self.assertTrue(

From bbd3e7e9999877104e1e47a8ed49f3b90257f083 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 3 Sep 2023 01:18:22 +0100
Subject: [PATCH 287/312] [utils] Properly handle list values in update_url()

An actual list value in a query update could have been treated
as a list of values because of the key:list parse_qs format.
---
 youtube_dl/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 81ff78807..fdf41b025 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4257,7 +4257,7 @@ def update_url(url, **kwargs):
     query = kwargs.pop('query_update', None)
     if query:
         qs = compat_parse_qs(url.query)
-        qs.update(query)
+        qs.update((k, [v]) for k, v in query.items())
         kwargs['query'] = compat_urllib_parse_urlencode(qs, True)
         kwargs = compat_kwargs(kwargs)
     return compat_urllib_parse.urlunparse(url._replace(**kwargs))

From 66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 3 Sep 2023 23:15:19 +0100
Subject: [PATCH 288/312] [utils] Revert bbd3e7e, updating docstring, test
 instead

---
 test/test_utils.py  | 46 ++++++++++++++++++++++-----------------------
 youtube_dl/utils.py |  3 ++-
 2 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index fdae1f744..102420fcb 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -62,13 +62,14 @@ from youtube_dl.utils import (
     OnDemandPagedList,
     orderedSet,
     parse_age_limit,
+    parse_bitrate,
     parse_duration,
     parse_filesize,
     parse_codecs,
     parse_count,
     parse_iso8601,
     parse_resolution,
-    parse_bitrate,
+    parse_qs,
     pkcs1pad,
     prepend_extension,
     read_batch_urls,
@@ -125,7 +126,6 @@ from youtube_dl.compat import (
     compat_setenv,
     compat_str,
     compat_urlparse,
-    compat_parse_qs,
 )
 
 
@@ -683,38 +683,36 @@ class TestUtil(unittest.TestCase):
         self.assertTrue(isinstance(data, bytes))
 
     def test_update_url_query(self):
-        def query_dict(url):
-            return compat_parse_qs(compat_urlparse.urlparse(url).query)
-        self.assertEqual(query_dict(update_url_query(
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
-            query_dict('http://example.com/path?quality=HD&format=mp4'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?quality=HD&format=mp4'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
-            query_dict('http://example.com/path?system=LINUX&system=WINDOWS'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?system=LINUX&system=WINDOWS'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path', {'fields': 'id,formats,subtitles'})),
-            query_dict('http://example.com/path?fields=id,formats,subtitles'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?fields=id,formats,subtitles'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
-            query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path?manifest=f4m', {'manifest': []})),
-            query_dict('http://example.com/path'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
-            query_dict('http://example.com/path?system=LINUX'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?system=LINUX'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path', {'fields': b'id,formats,subtitles'})),
-            query_dict('http://example.com/path?fields=id,formats,subtitles'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?fields=id,formats,subtitles'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path', {'width': 1080, 'height': 720})),
-            query_dict('http://example.com/path?width=1080&height=720'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?width=1080&height=720'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path', {'bitrate': 5020.43})),
-            query_dict('http://example.com/path?bitrate=5020.43'))
-        self.assertEqual(query_dict(update_url_query(
+            parse_qs('http://example.com/path?bitrate=5020.43'))
+        self.assertEqual(parse_qs(update_url_query(
             'http://example.com/path', {'test': '第二行тест'})),
-            query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
+            parse_qs('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
 
     def test_multipart_encode(self):
         self.assertEqual(
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index fdf41b025..443d2609c 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4248,6 +4248,7 @@ def update_url(url, **kwargs):
        url: compat_str or parsed URL tuple
        if query_update is in kwargs, update query with
        its value instead of replacing (overrides any `query`)
+       NB: query_update expects parse_qs() format: [key: value_list, ...]
        returns: compat_str
     """
     if not kwargs:
@@ -4257,7 +4258,7 @@ def update_url(url, **kwargs):
     query = kwargs.pop('query_update', None)
     if query:
         qs = compat_parse_qs(url.query)
-        qs.update((k, [v]) for k, v in query.items())
+        qs.update(query)
         kwargs['query'] = compat_urllib_parse_urlencode(qs, True)
         kwargs = compat_kwargs(kwargs)
     return compat_urllib_parse.urlunparse(url._replace(**kwargs))

From 00ef748cc0e35ee60efd0f7a00e373ab8d1af86b Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 24 Sep 2023 22:00:13 +0100
Subject: [PATCH 289/312] [downloader] Fix baa6c5e: show ETA of http download
 as ETA instead of total d/l time

---
 youtube_dl/downloader/common.py | 2 +-
 youtube_dl/downloader/http.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index afb4ee33d..91e691776 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -96,7 +96,7 @@ class FileDownloader(object):
                 return None
             return int(float(remaining) / rate)
         start, now = (start_or_rate, now_or_remaining)
-        total, current = args
+        total, current = args[:2]
         if total is None:
             return None
         if now is None:
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index 28a49b9e8..3cad87420 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -294,7 +294,7 @@ class HttpFD(FileDownloader):
 
                 # Progress message
                 speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
-                eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - ctx.resume_len))
+                eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - byte_counter))
 
                 self._hook_progress({
                     'status': 'downloading',

From b7fca0fab36c71fee02d6ecf81acbbaa46942be4 Mon Sep 17 00:00:00 2001
From: ReenigneArcher <42013603+ReenigneArcher@users.noreply.github.com>
Date: Wed, 15 Nov 2023 18:54:31 -0500
Subject: [PATCH 290/312] [Youtube] Update consent cookie handling to match
 site

Apologies for force push!
[skip ci]
---
 youtube_dl/extractor/youtube.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 9c419c002..3bf483c1c 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -260,16 +260,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         cookies = self._get_cookies('https://www.youtube.com/')
         if cookies.get('__Secure-3PSID'):
             return
-        consent_id = None
-        consent = cookies.get('CONSENT')
-        if consent:
-            if 'YES' in consent.value:
-                return
-            consent_id = self._search_regex(
-                r'PENDING\+(\d+)', consent.value, 'consent', default=None)
-        if not consent_id:
-            consent_id = random.randint(100, 999)
-        self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
+        socs = cookies.get('SOCS')
+        if socs and not socs.value.startswith('CAA'):  # not consented
+            return
+        self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True)  # accept all (required for mixes)
 
     def _real_initialize(self):
         self._initialize_consent()

From 4e115e18cbb02ecde30edb736a030cf84bf813e9 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 18 Oct 2023 14:28:10 +0100
Subject: [PATCH 291/312] [workflows/ci.yml] Run apt-get update before
 installing

---
 .github/workflows/ci.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7fb8f9f83..f00fd0c6b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -121,6 +121,12 @@ jobs:
           ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download'  || 'nodownload' }}
           run-tests-ext: sh
     steps:
+    - name: Prepare Linux
+      if: ${{ startswith(matrix.os, 'ubuntu') }}
+      shell: bash
+      run: |
+        # apt in runner, if needed, may not be up-to-date
+        sudo apt-get update
     - name: Checkout
       uses: actions/checkout@v3
     #-------- Python 3 -----
@@ -128,6 +134,7 @@ jobs:
       id: setup-python
       if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != '3.12'}}
       # wrap broken actions/setup-python@v4
+      # NB may run apt-get install in Linux
       uses: ytdl-org/setup-python@v1
       with:
         python-version: ${{ matrix.python-version }}

From 8d227cb97b00a36fa9389bcba2a63ef6db3dbff7 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 28 Nov 2023 16:17:07 +0000
Subject: [PATCH 292/312] [workflows/ci.yml] Actually use default values for
 push and pull_request

---
 .github/workflows/ci.yml | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f00fd0c6b..ca52e0e43 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -9,6 +9,7 @@ env:
 
 on:
   push:
+    # push inputs aren't known to GitHub
     inputs:
       cpython-versions:
         type: string
@@ -17,6 +18,7 @@ on:
         type: string
         default: core
   pull_request:
+    # pull_request inputs aren't known to GitHub
     inputs:
       cpython-versions:
         type: string
@@ -56,6 +58,23 @@ jobs:
       test-set: ${{ steps.run.outputs.test-set }}
       own-pip-versions: ${{ steps.run.outputs.own-pip-versions }}
     steps:
+    # push and pull_request inputs aren't known to GitHub (pt3)
+    - name: Set push defaults
+      if: ${{ github.event_name == 'push' }}
+      env:
+        cpython-versions: all
+        test-set: core
+      run: |
+        echo "cpython-versions=${{env.cpython-versions}}" >> "$GITHUB_ENV"
+        echo "test_set=${{env.test_set}}" >> "$GITHUB_ENV"
+    - name: Get pull_request inputs
+      if: ${{ github.event_name == 'pull_request' }}
+      env:
+        cpython-versions: main
+        test-set: both
+      run: |
+        echo "cpython-versions=${{env.cpython-versions}}" >> "$GITHUB_ENV"
+        echo "test_set=${{env.test_set}}" >> "$GITHUB_ENV"
     - name: Make version array
       id: run
       run: |
@@ -79,6 +98,7 @@ jobs:
         # versions with a special get-pip.py in a per-version subdirectory
         printf 'own-pip-versions=%s\n' \
           "$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT"
+
   tests:
     name: Run tests
     needs: select

From c6538ed323409707fc73e81fb7c93bc62ad11ac1 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 28 Nov 2023 18:06:40 +0000
Subject: [PATCH 293/312] [workflows/ci.yml] Use setup-python for now released
 Python 3.12

---
 .github/workflows/ci.yml | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ca52e0e43..93562afd7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -6,6 +6,9 @@ env:
   pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7
   cpython-versions: main
   test-set: core
+  # Python beta version to be built using pyenv before setup-python support
+  # Must also be included in all-cpython-versions 
+  next: 3.13
 
 on:
   push:
@@ -152,7 +155,7 @@ jobs:
     #-------- Python 3 -----
     - name: Set up supported Python ${{ matrix.python-version }}
       id: setup-python
-      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != '3.12'}}
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != env.next }}
       # wrap broken actions/setup-python@v4
       # NB may run apt-get install in Linux
       uses: ytdl-org/setup-python@v1
@@ -191,23 +194,23 @@ jobs:
             'import sys' \
             'print(sys.path)' \
             | ${expected} -
-    #-------- Python 3.12 -
-    - name: Set up CPython 3.12 environment
-      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }}
+    #-------- Python next (was 3.12) -
+    - name: Set up CPython 3.next environment
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }}
       shell: bash
       run: |
         PYENV_ROOT=$HOME/.local/share/pyenv
         echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV"
-    - name: Cache Python 3.12
-      id: cache312
-      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }}
+    - name: Cache Python 3.next 
+      id: cachenext
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }}
       uses: actions/cache@v3
       with:
-        key: python-3.12
+        key: python-${{ env.next }}
         path: |
           ${{ env.PYENV_ROOT }}
-    - name: Build and set up Python 3.12
-      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' && ! steps.cache312.outputs.cache-hit }}
+    - name: Build and set up Python 3.next
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next && ! steps.cachenext.outputs.cache-hit }}
       # dl and build locally
       shell: bash
       run: |
@@ -219,12 +222,13 @@ jobs:
         export PYENV_ROOT=${{ env.PYENV_ROOT }}
         export PATH=$PYENV_ROOT/bin:$PATH
         git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
-        pyenv install 3.12.0b4
-    - name: Locate Python 3.12
-      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }}
+        pyenv install ${{ env.next }}
+    - name: Locate Python 3.next
+      if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }}
       shell: bash
       run: |
-        PYTHONHOME="${{ env.PYENV_ROOT }}/versions/3.12.0b4"
+        PYTHONHOME="$(echo "${{ env.PYENV_ROOT }}/versions/${{ env.next }}."*)"
+        test -n "$PYTHONHOME"
         echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV"
         echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV"
     #-------- Python 2.7 --
@@ -395,7 +399,7 @@ jobs:
         done
     #-------- nose --------
     - name: Install nose for Python ${{ matrix.python-version }}
-      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || (matrix.python-impl == 'cpython' && (matrix.python-version == '2.7' || matrix.python-version == '3.12')) }}
+      if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || (matrix.python-impl == 'cpython' && (matrix.python-version == '2.7' || matrix.python-version == env.next)) }}
       shell: bash
       run: |
         echo "$PATH"

From 427472351ce6b2fcf5bb35dde32bf9ee5beddd89 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 28 Nov 2023 17:26:37 +0000
Subject: [PATCH 294/312] [utils] Make restricted filenames ignore characters
 in Unicode categories Mark, Other

Resolves #32629
---
 youtube_dl/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 443d2609c..61b94d84c 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2121,7 +2121,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
             return '_'
         if restricted and ord(char) > 127:
-            return '_'
+            return '' if unicodedata.category(char)[0] in 'CM' else '_'
+
         return char
 
     # Replace look-alike Unicode glyphs

From c62936a5f20d941e67d566e74a7c3fc8d8188f7a Mon Sep 17 00:00:00 2001
From: mimvahedi <61986916+mimvahedi@users.noreply.github.com>
Date: Sat, 2 Dec 2023 18:55:09 +0330
Subject: [PATCH 295/312] [telewebion] Fix extraction (#32634)

* [telewebion] fix extraction

Resolves https://github.com/ytdl-org/youtube-dl/issues/5135#issuecomment-932952119

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/telewebion.py | 47 +++++++++++++++---------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/youtube_dl/extractor/telewebion.py b/youtube_dl/extractor/telewebion.py
index 1207b1a1b..30192d74e 100644
--- a/youtube_dl/extractor/telewebion.py
+++ b/youtube_dl/extractor/telewebion.py
@@ -3,17 +3,23 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    url_or_none,
+)
+
 
 class TelewebionIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?telewebion\.com/#!/episode/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?telewebion\.com/(episode|clip)/(?P<id>[a-zA-Z0-9]+)'
 
     _TEST = {
-        'url': 'http://www.telewebion.com/#!/episode/1263668/',
+        'url': 'http://www.telewebion.com/episode/0x1b3139c/',
         'info_dict': {
-            'id': '1263668',
+            'id': '0x1b3139c',
             'ext': 'mp4',
             'title': 'قرعه\u200cکشی لیگ قهرمانان اروپا',
-            'thumbnail': r're:^https?://.*\.jpg',
+            'thumbnail': r're:^https?://static\.telewebion\.com/episodeImages/.*/default',
             'view_count': int,
         },
         'params': {
@@ -25,31 +31,24 @@ class TelewebionIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        secure_token = self._download_webpage(
-            'http://m.s2.telewebion.com/op/op?action=getSecurityToken', video_id)
-        episode_details = self._download_json(
-            'http://m.s2.telewebion.com/op/op', video_id,
-            query={'action': 'getEpisodeDetails', 'episode_id': video_id})
+        episode_details = self._download_json('https://gateway.telewebion.ir/kandoo/episode/getEpisodeDetail/?EpisodeId={0}'.format(video_id), video_id)
+        episode_details = episode_details['body']['queryEpisode'][0]
 
-        m3u8_url = 'http://m.s1.telewebion.com/smil/%s.m3u8?filepath=%s&m3u8=1&secure_token=%s' % (
-            video_id, episode_details['file_path'], secure_token)
+        channel_id = episode_details['channel']['descriptor']
+        episode_image_id = episode_details.get('image')
+        episode_image = 'https://static.telewebion.com/episodeImages/{0}/default'.format(episode_image_id) if episode_image_id else None
+
+        m3u8_url = 'https://cdna.telewebion.com/{0}/episode/{1}/playlist.m3u8'.format(channel_id, video_id)
         formats = self._extract_m3u8_formats(
-            m3u8_url, video_id, ext='mp4', m3u8_id='hls')
-
-        picture_paths = [
-            episode_details.get('picture_path'),
-            episode_details.get('large_picture_path'),
-        ]
-
-        thumbnails = [{
-            'url': picture_path,
-            'preference': idx,
-        } for idx, picture_path in enumerate(picture_paths) if picture_path is not None]
+            m3u8_url, video_id, ext='mp4', m3u8_id='hls',
+            entry_protocol='m3u8_native')
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
             'title': episode_details['title'],
             'formats': formats,
-            'thumbnails': thumbnails,
-            'view_count': episode_details.get('view_count'),
+            'thumbnail': url_or_none(episode_image),
+            'view_count': int_or_none(episode_details.get('view_count')),
+            'duration': float_or_none(episode_details.get('duration')),
         }

From 55a442adaea1eb3dae332fe00179f6dbd437b398 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Tue, 5 Dec 2023 20:02:30 +0000
Subject: [PATCH 296/312] [Imgur] Overhaul extractor module (#32612)

Revise extractors for new API and page formats
---
 youtube_dl/extractor/imgur.py | 348 +++++++++++++++++++++++++++-------
 1 file changed, 279 insertions(+), 69 deletions(-)

diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py
index a5ba03efa..59f129d6a 100644
--- a/youtube_dl/extractor/imgur.py
+++ b/youtube_dl/extractor/imgur.py
@@ -1,101 +1,267 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
 
 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
+    ExtractorError,
+    float_or_none,
     int_or_none,
     js_to_json,
+    merge_dicts,
     mimetype2ext,
-    ExtractorError,
+    parse_iso8601,
+    T,
+    traverse_obj,
+    txt_or_none,
+    url_or_none,
 )
 
 
-class ImgurIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P<id>[a-zA-Z0-9]+)'
+class ImgurBaseIE(InfoExtractor):
+    # hard-coded value, as also used by ArchiveTeam
+    _CLIENT_ID = '546c25a59c58ad7'
+
+    @classmethod
+    def _imgur_result(cls, item_id):
+        return cls.url_result('imgur:%s' % item_id, ImgurIE.ie_key(), item_id)
+
+    def _call_api(self, endpoint, video_id, **kwargs):
+        return self._download_json(
+            'https://api.imgur.com/post/v1/%s/%s?client_id=%s&include=media,account' % (endpoint, video_id, self._CLIENT_ID),
+            video_id, **kwargs)
+
+    @staticmethod
+    def get_description(s):
+        if 'Discover the magic of the internet at Imgur' in s:
+            return None
+        return txt_or_none(s)
+
+
+class ImgurIE(ImgurBaseIE):
+    _VALID_URL = r'''(?x)
+        (?:
+            https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)|
+            imgur:
+        )(?P<id>[a-zA-Z0-9]+)
+    '''
 
     _TESTS = [{
-        'url': 'https://i.imgur.com/A61SaA1.gifv',
+        'url': 'https://imgur.com/A61SaA1',
         'info_dict': {
             'id': 'A61SaA1',
             'ext': 'mp4',
             'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
+            'timestamp': 1416446068,
+            'upload_date': '20141120',
         },
     }, {
-        'url': 'https://imgur.com/A61SaA1',
+        'url': 'https://i.imgur.com/A61SaA1.gifv',
         'only_matching': True,
     }, {
         'url': 'https://i.imgur.com/crGpqCV.mp4',
         'only_matching': True,
     }, {
-        # no title
+        # previously, no title
         'url': 'https://i.imgur.com/jxBXAMC.gifv',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'jxBXAMC',
+            'ext': 'mp4',
+            'title': 'Fahaka puffer feeding',
+            'timestamp': 1533835503,
+            'upload_date': '20180809',
+        },
     }]
 
+    def _extract_twitter_formats(self, html, tw_id='twitter', **kwargs):
+        fatal = kwargs.pop('fatal', False)
+        tw_stream = self._html_search_meta('twitter:player:stream', html, fatal=fatal, **kwargs)
+        if not tw_stream:
+            return []
+        ext = mimetype2ext(self._html_search_meta(
+            'twitter:player:stream:content_type', html, default=None))
+        width, height = (int_or_none(self._html_search_meta('twitter:player:' + v, html, default=None))
+                         for v in ('width', 'height'))
+        return [{
+            'format_id': tw_id,
+            'url': tw_stream,
+            'ext': ext or determine_ext(tw_stream),
+            'width': width,
+            'height': height,
+        }]
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        data = self._call_api('media', video_id, fatal=False, expected_status=404)
         webpage = self._download_webpage(
-            'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id)
+            'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id, fatal=not data) or ''
 
-        width = int_or_none(self._og_search_property(
-            'video:width', webpage, default=None))
-        height = int_or_none(self._og_search_property(
-            'video:height', webpage, default=None))
+        if not traverse_obj(data, ('media', 0, (
+                ('type', T(lambda t: t == 'video' or None)),
+                ('metadata', 'is_animated'))), get_all=False):
+            raise ExtractorError(
+                '%s is not a video or animated image' % video_id,
+                expected=True)
+
+        media_fmt = traverse_obj(data, ('media', 0, {
+            'url': ('url', T(url_or_none)),
+            'ext': 'ext',
+            'width': ('width', T(int_or_none)),
+            'height': ('height', T(int_or_none)),
+            'filesize': ('size', T(int_or_none)),
+            'acodec': ('metadata', 'has_sound', T(lambda b: None if b else 'none')),
+        }))
+
+        media_url = traverse_obj(media_fmt, 'url')
+        if media_url:
+            if not media_fmt.get('ext'):
+                media_fmt['ext'] = mimetype2ext(traverse_obj(
+                    data, ('media', 0, 'mime_type'))) or determine_ext(media_url)
+            if traverse_obj(data, ('media', 0, 'type')) == 'image':
+                media_fmt['acodec'] = 'none'
+                media_fmt.setdefault('preference', -10)
+
+        tw_formats = self._extract_twitter_formats(webpage)
+        if traverse_obj(tw_formats, (0, 'url')) == media_url:
+            tw_formats = []
+        else:
+            # maybe this isn't an animated image/video?
+            self._check_formats(tw_formats, video_id)
 
         video_elements = self._search_regex(
             r'(?s)<div class="video-elements">(.*?)</div>',
             webpage, 'video elements', default=None)
-        if not video_elements:
+        if not (video_elements or tw_formats or media_url):
             raise ExtractorError(
-                'No sources found for video %s. Maybe an image?' % video_id,
+                'No sources found for video %s. Maybe a plain image?' % video_id,
                 expected=True)
 
-        formats = []
-        for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
-            formats.append({
-                'format_id': m.group('type').partition('/')[2],
-                'url': self._proto_relative_url(m.group('src')),
-                'ext': mimetype2ext(m.group('type')),
-                'width': width,
-                'height': height,
+        def mung_format(fmt, *extra):
+            fmt.update({
                 'http_headers': {
                     'User-Agent': 'youtube-dl (like wget)',
                 },
             })
+            for d in extra:
+                fmt.update(d)
+            return fmt
 
-        gif_json = self._search_regex(
-            r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
-            webpage, 'GIF code', fatal=False)
-        if gif_json:
-            gifd = self._parse_json(
-                gif_json, video_id, transform_source=js_to_json)
-            formats.append({
-                'format_id': 'gif',
-                'preference': -10,
-                'width': width,
-                'height': height,
-                'ext': 'gif',
-                'acodec': 'none',
-                'vcodec': 'gif',
-                'container': 'gif',
-                'url': self._proto_relative_url(gifd['gifUrl']),
-                'filesize': gifd.get('size'),
-                'http_headers': {
-                    'User-Agent': 'youtube-dl (like wget)',
-                },
-            })
+        if video_elements:
+            def og_get_size(media_type):
+                return dict((p, int_or_none(self._og_search_property(
+                    ':'.join((media_type, p)), webpage, default=None)))
+                    for p in ('width', 'height'))
+
+            size = og_get_size('video')
+            if all(v is None for v in size.values()):
+                size = og_get_size('image')
+
+            formats = traverse_obj(
+                re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements),
+                (Ellipsis, {
+                    'format_id': ('type', T(lambda s: s.partition('/')[2])),
+                    'url': ('src', T(self._proto_relative_url)),
+                    'ext': ('type', T(mimetype2ext)),
+                }, T(lambda f: mung_format(f, size))))
+
+            gif_json = self._search_regex(
+                r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
+                webpage, 'GIF code', fatal=False)
+            MUST_BRANCH = (None, T(lambda _: None))
+            formats.extend(traverse_obj(gif_json, (
+                T(lambda j: self._parse_json(
+                    j, video_id, transform_source=js_to_json, fatal=False)), {
+                        'url': ('gifUrl', T(self._proto_relative_url)),
+                        'filesize': ('size', T(int_or_none)),
+                }, T(lambda f: mung_format(f, size, {
+                    'format_id': 'gif',
+                    'preference': -10,  # gifs are worse than videos
+                    'ext': 'gif',
+                    'acodec': 'none',
+                    'vcodec': 'gif',
+                    'container': 'gif',
+                })), MUST_BRANCH)))
+        else:
+            formats = []
+
+        # maybe add formats from JSON or page Twitter metadata
+        if not any((u == media_url) for u in traverse_obj(formats, (Ellipsis, 'url'))):
+            formats.append(mung_format(media_fmt))
+        tw_url = traverse_obj(tw_formats, (0, 'url'))
+        if not any((u == tw_url) for u in traverse_obj(formats, (Ellipsis, 'url'))):
+            formats.extend(mung_format(f) for f in tw_formats)
 
         self._sort_formats(formats)
 
-        return {
+        return merge_dicts(traverse_obj(data, {
+            'uploader_id': ('account_id', T(txt_or_none),
+                            T(lambda a: a if int_or_none(a) != 0 else None)),
+            'uploader': ('account', 'username', T(txt_or_none)),
+            'uploader_url': ('account', 'avatar_url', T(url_or_none)),
+            'like_count': ('upvote_count', T(int_or_none)),
+            'dislike_count': ('downvote_count', T(int_or_none)),
+            'comment_count': ('comment_count', T(int_or_none)),
+            'age_limit': ('is_mature', T(lambda x: 18 if x else None)),
+            'timestamp': (('updated_at', 'created_at'), T(parse_iso8601)),
+            'release_timestamp': ('created_at', T(parse_iso8601)),
+        }, get_all=False), traverse_obj(data, ('media', 0, 'metadata', {
+            'title': ('title', T(txt_or_none)),
+            'description': ('description', T(self.get_description)),
+            'duration': ('duration', T(float_or_none)),
+            'timestamp': (('updated_at', 'created_at'), T(parse_iso8601)),
+            'release_timestamp': ('created_at', T(parse_iso8601)),
+        })), {
             'id': video_id,
             'formats': formats,
-            'title': self._og_search_title(webpage, default=video_id),
-        }
+            'title': self._og_search_title(webpage, default='Imgur video ' + video_id),
+            'description': self.get_description(self._og_search_description(webpage)),
+            'thumbnail': url_or_none(self._html_search_meta('thumbnailUrl', webpage, default=None)),
+        })
 
 
-class ImgurGalleryIE(InfoExtractor):
+class ImgurGalleryBaseIE(ImgurBaseIE):
+    _GALLERY = True
+
+    def _real_extract(self, url):
+        gallery_id = self._match_id(url)
+
+        data = self._call_api('albums', gallery_id, fatal=False, expected_status=404)
+
+        info = traverse_obj(data, {
+            'title': ('title', T(txt_or_none)),
+            'description': ('description', T(self.get_description)),
+        })
+
+        if traverse_obj(data, 'is_album'):
+
+            def yield_media_ids():
+                for m_id in traverse_obj(data, (
+                        'media', lambda _, v: v.get('type') == 'video' or v['metadata']['is_animated'],
+                        'id', T(txt_or_none))):
+                    yield m_id
+
+            # if a gallery with exactly one video, apply album metadata to video
+            media_id = (
+                self._GALLERY
+                and traverse_obj(data, ('image_count', T(lambda c: c == 1)))
+                and next(yield_media_ids(), None))
+
+            if not media_id:
+                result = self.playlist_result(
+                    map(self._imgur_result, yield_media_ids()), gallery_id)
+                result.update(info)
+                return result
+            gallery_id = media_id
+
+        result = self._imgur_result(gallery_id)
+        info['_type'] = 'url_transparent'
+        result.update(info)
+        return result
+
+
+class ImgurGalleryIE(ImgurGalleryBaseIE):
     IE_NAME = 'imgur:gallery'
     _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
 
@@ -106,49 +272,93 @@ class ImgurGalleryIE(InfoExtractor):
             'title': 'Adding faces make every GIF better',
         },
         'playlist_count': 25,
+        'skip': 'Zoinks! You\'ve taken a wrong turn.',
     }, {
+        # TODO: static images - replace with animated/video gallery
         'url': 'http://imgur.com/topic/Aww/ll5Vk',
         'only_matching': True,
     }, {
         'url': 'https://imgur.com/gallery/YcAQlkx',
+        'add_ies': ['Imgur'],
         'info_dict': {
             'id': 'YcAQlkx',
             'ext': 'mp4',
             'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
-        }
+            'timestamp': 1358554297,
+            'upload_date': '20130119',
+            'uploader_id': '1648642',
+            'uploader': 'wittyusernamehere',
+        },
     }, {
+        # TODO: static image - replace with animated/video gallery
         'url': 'http://imgur.com/topic/Funny/N8rOudd',
         'only_matching': True,
     }, {
         'url': 'http://imgur.com/r/aww/VQcQPhM',
-        'only_matching': True,
+        'add_ies': ['Imgur'],
+        'info_dict': {
+            'id': 'VQcQPhM',
+            'ext': 'mp4',
+            'title': 'The boss is here',
+            'timestamp': 1476494751,
+            'upload_date': '20161015',
+            'uploader_id': '19138530',
+            'uploader': 'thematrixcam',
+        },
+    },
+        # from PR #16674
+        {
+        'url': 'https://imgur.com/t/unmuted/6lAn9VQ',
+        'info_dict': {
+            'id': '6lAn9VQ',
+            'title': 'Penguins !',
+        },
+        'playlist_count': 3,
+    }, {
+        'url': 'https://imgur.com/t/unmuted/kx2uD3C',
+        'add_ies': ['Imgur'],
+        'info_dict': {
+            'id': 'ZVMv45i',
+            'ext': 'mp4',
+            'title': 'Intruder',
+            'timestamp': 1528129683,
+            'upload_date': '20180604',
+        },
+    }, {
+        'url': 'https://imgur.com/t/unmuted/wXSK0YH',
+        'add_ies': ['Imgur'],
+        'info_dict': {
+            'id': 'JCAP4io',
+            'ext': 'mp4',
+            'title': 're:I got the blues$',
+            'description': 'Luka’s vocal stylings.\n\nFP edit: don’t encourage me. I’ll never stop posting Luka and friends.',
+            'timestamp': 1527809525,
+            'upload_date': '20180531',
+        },
     }]
 
-    def _real_extract(self, url):
-        gallery_id = self._match_id(url)
 
-        data = self._download_json(
-            'https://imgur.com/gallery/%s.json' % gallery_id,
-            gallery_id)['data']['image']
-
-        if data.get('is_album'):
-            entries = [
-                self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash'])
-                for image in data['album_images']['images'] if image.get('hash')]
-            return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description'))
-
-        return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
-
-
-class ImgurAlbumIE(ImgurGalleryIE):
+class ImgurAlbumIE(ImgurGalleryBaseIE):
     IE_NAME = 'imgur:album'
     _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
-
+    _GALLERY = False
     _TESTS = [{
+        # TODO: only static images - replace with animated/video gallery
         'url': 'http://imgur.com/a/j6Orj',
+        'only_matching': True,
+    },
+        # from PR #21693
+        {
+        'url': 'https://imgur.com/a/iX265HX',
         'info_dict': {
-            'id': 'j6Orj',
-            'title': 'A Literary Analysis of "Star Wars: The Force Awakens"',
+            'id': 'iX265HX',
+            'title': 'enen-no-shouboutai'
         },
-        'playlist_count': 12,
+        'playlist_count': 2,
+    }, {
+        'url': 'https://imgur.com/a/8pih2Ed',
+        'info_dict': {
+            'id': '8pih2Ed'
+        },
+        'playlist_mincount': 1,
     }]

From b1bbc1e50277e240419eb1308e444ac8a5da4320 Mon Sep 17 00:00:00 2001
From: Robotix <82544307+realRobotix@users.noreply.github.com>
Date: Wed, 6 Dec 2023 02:17:57 +0100
Subject: [PATCH 297/312] [Epidemic Sound] Add new extractor (#32628)

* Add simple extractor
* Support separate tracks
* Use index as id instead of slug

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/epidemicsound.py | 101 ++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py    |   1 +
 2 files changed, 102 insertions(+)
 create mode 100644 youtube_dl/extractor/epidemicsound.py

diff --git a/youtube_dl/extractor/epidemicsound.py b/youtube_dl/extractor/epidemicsound.py
new file mode 100644
index 000000000..1a52738aa
--- /dev/null
+++ b/youtube_dl/extractor/epidemicsound.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    T,
+    traverse_obj,
+    txt_or_none,
+    unified_timestamp,
+    url_or_none,
+)
+
+
+class EpidemicSoundIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/track/(?P<id>[0-9a-zA-Z]+)'
+    _TESTS = [{
+        'url': 'https://www.epidemicsound.com/track/yFfQVRpSPz/',
+        'md5': 'd98ff2ddb49e8acab9716541cbc9dfac',
+        'info_dict': {
+            'id': '45014',
+            'display_id': 'yFfQVRpSPz',
+            'ext': 'mp3',
+            'tags': ['foley', 'door', 'knock', 'glass', 'window', 'glass door knock'],
+            'title': 'Door Knock Door 1',
+            'duration': 1,
+            'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/default-sfx/3000x3000.jpg',
+            'timestamp': 1415320353,
+            'upload_date': '20141107',
+            'age_limit': None,
+            # check that the "best" format was found, since test file MD5 doesn't
+            # distinguish the formats
+            'format': 'full',
+        },
+    }, {
+        'url': 'https://www.epidemicsound.com/track/mj8GTTwsZd/',
+        'md5': 'c82b745890f9baf18dc2f8d568ee3830',
+        'info_dict': {
+            'id': '148700',
+            'display_id': 'mj8GTTwsZd',
+            'ext': 'mp3',
+            'tags': ['liquid drum n bass', 'energetic'],
+            'title': 'Noplace',
+            'duration': 237,
+            'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/11138/3000x3000.jpg',
+            'timestamp': 1694426482,
+            'release_timestamp': 1700535606,
+            'upload_date': '20230911',
+            'age_limit': None,
+            'format': 'full',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        json_data = self._download_json('https://www.epidemicsound.com/json/track/' + video_id, video_id)
+
+        def fmt_or_none(f):
+            if not f.get('format'):
+                f['format'] = f.get('format_id')
+            elif not f.get('format_id'):
+                f['format_id'] = f['format']
+            if not (f['url'] and f['format']):
+                return
+            if f.get('format_note'):
+                f['format_note'] = 'track ID ' + f['format_note']
+            f['preference'] = -1 if f['format'] == 'full' else -2
+            return f
+
+        formats = traverse_obj(json_data, (
+            'stems', T(dict.items), Ellipsis, {
+                'format': (0, T(txt_or_none)),
+                'format_note': (1, 's3TrackId', T(txt_or_none)),
+                'format_id': (1, 'stemType', T(txt_or_none)),
+                'url': (1, 'lqMp3Url', T(url_or_none)),
+            }, T(fmt_or_none)))
+
+        self._sort_formats(formats)
+
+        info = traverse_obj(json_data, {
+            'id': ('id', T(txt_or_none)),
+            'tags': ('metadataTags', Ellipsis, T(txt_or_none)),
+            'title': ('title', T(txt_or_none)),
+            'duration': ('length', T(float_or_none)),
+            'timestamp': ('added', T(unified_timestamp)),
+            'thumbnail': (('imageUrl', 'cover'), T(url_or_none)),
+            'age_limit': ('isExplicit', T(lambda b: 18 if b else None)),
+            'release_timestamp': ('releaseDate', T(unified_timestamp)),
+        }, get_all=False)
+
+        info.update(traverse_obj(json_data, {
+            'categories': ('genres', Ellipsis, 'tag', T(txt_or_none)),
+            'tags': ('metadataTags', Ellipsis, T(txt_or_none)),
+        }))
+
+        info.update({
+            'display_id': video_id,
+            'formats': formats,
+        })
+
+        return info
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index d9289e5bf..82221445f 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -357,6 +357,7 @@ from .ellentube import (
 from .elpais import ElPaisIE
 from .embedly import EmbedlyIE
 from .engadget import EngadgetIE
+from .epidemicsound import EpidemicSoundIE
 from .eporner import EpornerIE
 from .eroprofile import EroProfileIE
 from .escapist import EscapistIE

From be008e657d79832642e2158557c899249c9e31cd Mon Sep 17 00:00:00 2001
From: mk-pmb <mk-pmb@users.noreply.github.com>
Date: Wed, 13 Sep 2023 20:57:05 +0200
Subject: [PATCH 298/312] [core] Fix format string injection for metadata JSON
 filename message.

---
 youtube_dl/YoutubeDL.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 13a41928f..6f2aba5ac 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -2635,12 +2635,12 @@ class YoutubeDL(object):
             self.to_screen(msg('[info] %s is already present', label.title()))
             return 'exists'
         else:
-            self.to_screen(msg('[info] Writing %s as JSON to: ' + infofn, label))
+            self.to_screen(msg('[info] Writing %s as JSON to: ', label) + infofn)
             try:
                 write_json_file(self.filter_requested_info(info_dict), infofn)
                 return True
             except (OSError, IOError):
-                self.report_error(msg('Cannot write %s to JSON file ' + infofn, label))
+                self.report_error(msg('Cannot write %s to JSON file ', label) + infofn)
                 return
 
     def _write_thumbnails(self, info_dict, filename):

From 66518714169185195a359e173cef73fba31d76b8 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 20 Jan 2024 18:28:52 +0000
Subject: [PATCH 299/312] [compat] Rework compat for `method` parameter of
 `compat_urllib_request.Request` constructor * fixes #32573 * does not break
 `utils.HEADrequest` (eg)

---
 test/test_compat.py  | 14 ++++++++++++++
 youtube_dl/compat.py | 27 +++++++++++++++++----------
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/test/test_compat.py b/test/test_compat.py
index e233b1ae1..b83c8cb41 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -23,6 +23,7 @@ from youtube_dl.compat import (
     compat_urllib_parse_unquote,
     compat_urllib_parse_unquote_plus,
     compat_urllib_parse_urlencode,
+    compat_urllib_request,
 )
 
 
@@ -135,6 +136,19 @@ class TestCompat(unittest.TestCase):
         self.assertEqual(compat_casefold('\u03a3'), '\u03c3')
         self.assertEqual(compat_casefold('A\u0345\u03a3'), 'a\u03b9\u03c3')
 
+    def test_compat_urllib_request_Request(self):
+        self.assertEqual(
+            compat_urllib_request.Request('http://127.0.0.1', method='PUT').get_method(),
+            'PUT')
+
+        class PUTrequest(compat_urllib_request.Request):
+            def get_method(self):
+                return 'PUT'
+
+        self.assertEqual(
+            PUTrequest('http://127.0.0.1').get_method(),
+            'PUT')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 3c526a78d..818ccebd0 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -58,19 +58,26 @@ except ImportError:  # Python 2
 
 # Also fix up lack of method arg in old Pythons
 try:
-    _req = compat_urllib_request.Request
-    _req('http://127.0.0.1', method='GET')
+    type(compat_urllib_request.Request('http://127.0.0.1', method='GET'))
 except TypeError:
-    class _request(object):
-        def __new__(cls, url, *args, **kwargs):
-            method = kwargs.pop('method', None)
-            r = _req(url, *args, **kwargs)
-            if method:
-                r.get_method = types.MethodType(lambda _: method, r)
-            return r
+    def _add_init_method_arg(cls):
 
-    compat_urllib_request.Request = _request
+        init = cls.__init__
 
+        def wrapped_init(self, *args, **kwargs):
+            method = kwargs.pop('method', 'GET')
+            init(self, *args, **kwargs)
+            if any(callable(x.__dict__.get('get_method')) for x in (self.__class__, self) if x != cls):
+                # allow instance or its subclass to override get_method()
+                return
+            if self.has_data() and method == 'GET':
+                method = 'POST'
+            self.get_method = types.MethodType(lambda _: method, self)
+
+        cls.__init__ = wrapped_init
+
+    _add_init_method_arg(compat_urllib_request.Request)
+    del _add_init_method_arg
 
 try:
     import urllib.error as compat_urllib_error

From 640d39f03ae80a0b8d0605a711d97c10f6edbd3f Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 15 Jan 2024 18:32:06 +0000
Subject: [PATCH 300/312] [InfoExtractor] Support some warning and
 `._downloader` shortcut methods from yt-dlp

---
 youtube_dl/extractor/common.py | 56 ++++++++++++++++++++++++++++++++--
 1 file changed, 53 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 0eca9f844..d33557135 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -596,6 +596,14 @@ class InfoExtractor(object):
         """Sets the downloader for this IE."""
         self._downloader = downloader
 
+    @property
+    def cache(self):
+        return self._downloader.cache
+
+    @property
+    def cookiejar(self):
+        return self._downloader.cookiejar
+
     def _real_initialize(self):
         """Real initialization process. Redefine in subclasses."""
         pass
@@ -942,14 +950,47 @@ class InfoExtractor(object):
             else:
                 self.report_warning(errmsg + str(ve))
 
-    def report_warning(self, msg, video_id=None):
+    def __ie_msg(self, *msg):
+        return '[{0}] {1}'.format(self.IE_NAME, ''.join(msg))
+
+    # msg, video_id=None, *args, only_once=False, **kwargs
+    def report_warning(self, msg, *args, **kwargs):
+        if len(args) > 0:
+            video_id = args[0]
+            args = args[1:]
+        else:
+            video_id = kwargs.pop('video_id', None)
         idstr = '' if video_id is None else '%s: ' % video_id
         self._downloader.report_warning(
-            '[%s] %s%s' % (self.IE_NAME, idstr, msg))
+            self.__ie_msg(idstr, msg), *args, **kwargs)
 
     def to_screen(self, msg):
         """Print msg to screen, prefixing it with '[ie_name]'"""
-        self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg))
+        self._downloader.to_screen(self.__ie_msg(msg))
+
+    def write_debug(self, msg, only_once=False, _cache=[]):
+        '''Log debug message or Print message to stderr'''
+        if not self.get_param('verbose', False):
+            return
+        message = '[debug] ' + self.__ie_msg(msg)
+        logger = self.get_param('logger')
+        if logger:
+            logger.debug(message)
+        else:
+            if only_once and hash(message) in _cache:
+                return
+            self._downloader.to_stderr(message)
+            _cache.append(hash(message))
+
+    # name, default=None, *args, **kwargs
+    def get_param(self, name, *args, **kwargs):
+        default, args = (args[0], args[1:]) if len(args) > 0 else (kwargs.pop('default', None), args)
+        if self._downloader:
+            return self._downloader.params.get(name, default, *args, **kwargs)
+        return default
+
+    def report_drm(self, video_id):
+        self.raise_no_formats('This video is DRM protected', expected=True, video_id=video_id)
 
     def report_extraction(self, id_or_name):
         """Report information extraction."""
@@ -977,6 +1018,15 @@ class InfoExtractor(object):
     def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
         raise GeoRestrictedError(msg, countries=countries)
 
+    def raise_no_formats(self, msg, expected=False, video_id=None):
+        if expected and (
+                self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
+            self.report_warning(msg, video_id)
+        elif isinstance(msg, ExtractorError):
+            raise msg
+        else:
+            raise ExtractorError(msg, expected=expected, video_id=video_id)
+
     # Methods for following #608
     @staticmethod
     def url_result(url, ie=None, video_id=None, video_title=None):

From f8b0135850f39609f72002f5426883859579fc51 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 15 Jan 2024 18:34:21 +0000
Subject: [PATCH 301/312] [YouTube] Rework n-sig processing, realigning with
 yt-dlp * apply n-sig before chunked fragments, fixes #32692

---
 youtube_dl/extractor/youtube.py | 488 +++++++++++++++++++-------------
 1 file changed, 296 insertions(+), 192 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 3bf483c1c..cd4b3ef60 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2,6 +2,7 @@
 
 from __future__ import unicode_literals
 
+import collections
 import itertools
 import json
 import os.path
@@ -23,10 +24,10 @@ from ..compat import (
 )
 from ..jsinterp import JSInterpreter
 from ..utils import (
-    ExtractorError,
     clean_html,
     dict_get,
     error_to_compat_str,
+    ExtractorError,
     float_or_none,
     extract_attributes,
     get_element_by_attribute,
@@ -36,6 +37,7 @@ from ..utils import (
     LazyList,
     merge_dicts,
     mimetype2ext,
+    NO_DEFAULT,
     parse_codecs,
     parse_duration,
     parse_qs,
@@ -45,6 +47,7 @@ from ..utils import (
     str_or_none,
     str_to_int,
     traverse_obj,
+    try_call,
     try_get,
     txt_or_none,
     unescapeHTML,
@@ -1460,6 +1463,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         self._code_cache = {}
         self._player_cache = {}
 
+    # *ytcfgs, webpage=None
+    def _extract_player_url(self, *ytcfgs, **kw_webpage):
+        if ytcfgs and not isinstance(ytcfgs[0], dict):
+            webpage = kw_webpage.get('webpage') or ytcfgs[0]
+        if webpage:
+            player_url = self._search_regex(
+                r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
+                webpage or '', 'player URL', fatal=False)
+            if player_url:
+                ytcfgs = ytcfgs + ({'PLAYER_JS_URL': player_url},)
+        return traverse_obj(
+            ytcfgs, (Ellipsis, 'PLAYER_JS_URL'), (Ellipsis, 'WEB_PLAYER_CONTEXT_CONFIGS', Ellipsis, 'jsUrl'),
+            get_all=False, expected_type=lambda u: urljoin('https://www.youtube.com', u))
+
+    def _download_player_url(self, video_id, fatal=False):
+        res = self._download_webpage(
+            'https://www.youtube.com/iframe_api',
+            note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
+        player_version = self._search_regex(
+            r'player\\?/([0-9a-fA-F]{8})\\?/', res or '', 'player version', fatal=fatal,
+            default=NO_DEFAULT if res else None)
+        if player_version:
+            return 'https://www.youtube.com/s/player/{0}/player_ias.vflset/en_US/base.js'.format(player_version)
+
     def _signature_cache_id(self, example_sig):
         """ Return a string representation of a signature """
         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
@@ -1474,46 +1501,49 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             raise ExtractorError('Cannot identify player %r' % player_url)
         return id_m.group('id')
 
-    def _get_player_code(self, video_id, player_url, player_id=None):
+    def _load_player(self, video_id, player_url, fatal=True, player_id=None):
         if not player_id:
             player_id = self._extract_player_info(player_url)
-
         if player_id not in self._code_cache:
-            self._code_cache[player_id] = self._download_webpage(
-                player_url, video_id,
+            code = self._download_webpage(
+                player_url, video_id, fatal=fatal,
                 note='Downloading player ' + player_id,
                 errnote='Download of %s failed' % player_url)
-        return self._code_cache[player_id]
+            if code:
+                self._code_cache[player_id] = code
+        return self._code_cache[player_id] if fatal else self._code_cache.get(player_id)
 
     def _extract_signature_function(self, video_id, player_url, example_sig):
         player_id = self._extract_player_info(player_url)
 
         # Read from filesystem cache
-        func_id = 'js_%s_%s' % (
+        func_id = 'js_{0}_{1}'.format(
             player_id, self._signature_cache_id(example_sig))
         assert os.path.basename(func_id) == func_id
 
-        cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
-        if cache_spec is not None:
-            return lambda s: ''.join(s[i] for i in cache_spec)
+        self.write_debug('Extracting signature function {0}'.format(func_id))
+        cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
 
-        code = self._get_player_code(video_id, player_url, player_id)
-        res = self._parse_sig_js(code)
+        if not cache_spec:
+            code = self._load_player(video_id, player_url, player_id)
+        if code:
+            res = self._parse_sig_js(code)
+            test_string = ''.join(map(compat_chr, range(len(example_sig))))
+            cache_spec = [ord(c) for c in res(test_string)]
+            self.cache.store('youtube-sigfuncs', func_id, cache_spec)
 
-        test_string = ''.join(map(compat_chr, range(len(example_sig))))
-        cache_res = res(test_string)
-        cache_spec = [ord(c) for c in cache_res]
-
-        self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
-        return res
+        return lambda s: ''.join(s[i] for i in cache_spec)
 
     def _print_sig_code(self, func, example_sig):
+        if not self.get_param('youtube_print_sig_code'):
+            return
+
         def gen_sig_code(idxs):
             def _genslice(start, end, step):
                 starts = '' if start == 0 else str(start)
                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
                 steps = '' if step == 1 else (':%d' % step)
-                return 's[%s%s%s]' % (starts, ends, steps)
+                return 's[{0}{1}{2}]'.format(starts, ends, steps)
 
             step = None
             # Quelch pyflakes warnings - start will be set when step is set
@@ -1564,143 +1594,137 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             jscode, 'Initial JS player signature function name', group='sig')
 
         jsi = JSInterpreter(jscode)
-
         initial_function = jsi.extract_function(funcname)
-
         return lambda s: initial_function([s])
 
+    def _cached(self, func, *cache_id):
+        def inner(*args, **kwargs):
+            if cache_id not in self._player_cache:
+                try:
+                    self._player_cache[cache_id] = func(*args, **kwargs)
+                except ExtractorError as e:
+                    self._player_cache[cache_id] = e
+                except Exception as e:
+                    self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
+
+            ret = self._player_cache[cache_id]
+            if isinstance(ret, Exception):
+                raise ret
+            return ret
+        return inner
+
     def _decrypt_signature(self, s, video_id, player_url):
         """Turn the encrypted s field into a working signature"""
-
-        if player_url is None:
-            raise ExtractorError('Cannot decrypt signature without player_url')
-
-        try:
-            player_id = (player_url, self._signature_cache_id(s))
-            if player_id not in self._player_cache:
-                func = self._extract_signature_function(
-                    video_id, player_url, s
-                )
-                self._player_cache[player_id] = func
-            func = self._player_cache[player_id]
-            if self._downloader.params.get('youtube_print_sig_code'):
-                self._print_sig_code(func, s)
-            return func(s)
-        except Exception as e:
-            tb = traceback.format_exc()
-            raise ExtractorError(
-                'Signature extraction failed: ' + tb, cause=e)
-
-    def _extract_player_url(self, webpage):
-        player_url = self._search_regex(
-            r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
-            webpage or '', 'player URL', fatal=False)
-        if not player_url:
-            return
-        if player_url.startswith('//'):
-            player_url = 'https:' + player_url
-        elif not re.match(r'https?://', player_url):
-            player_url = compat_urllib_parse.urljoin(
-                'https://www.youtube.com', player_url)
-        return player_url
+        extract_sig = self._cached(
+            self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
+        func = extract_sig(video_id, player_url, s)
+        self._print_sig_code(func, s)
+        return func(s)
 
     # from yt-dlp
     # See also:
     # 1. https://github.com/ytdl-org/youtube-dl/issues/29326#issuecomment-894619419
     # 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
     # 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
-    def _extract_n_function_name(self, jscode):
-        target = r'(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?'
-        nfunc_and_idx = self._search_regex(
-            r'\.get\("n"\)\)&&\(b=(%s)\([\w$]+\)' % (target, ),
-            jscode, 'Initial JS player n function name')
-        nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
-        if not idx:
-            return nfunc
-
-        VAR_RE_TMPL = r'var\s+%s\s*=\s*(?P<name>\[(?P<alias>%s)\])[;,]'
-        note = 'Initial JS player n function {0} (%s[%s])' % (nfunc, idx)
-
-        def search_function_code(needle, group):
-            return self._search_regex(
-                VAR_RE_TMPL % (re.escape(nfunc), needle), jscode,
-                note.format(group), group=group)
-
-        if int_or_none(idx) == 0:
-            real_nfunc = search_function_code(r'[a-zA-Z_$][\w$]*', group='alias')
-            if real_nfunc:
-                return real_nfunc
-        return self._parse_json(
-            search_function_code('.+?', group='name'),
-            nfunc, transform_source=js_to_json)[int(idx)]
-
-    def _extract_n_function(self, video_id, player_url):
-        player_id = self._extract_player_info(player_url)
-        func_code = self._downloader.cache.load('youtube-nsig', player_id)
-
-        if func_code:
-            jsi = JSInterpreter(func_code)
-        else:
-            jscode = self._get_player_code(video_id, player_url, player_id)
-            funcname = self._extract_n_function_name(jscode)
-            jsi = JSInterpreter(jscode)
-            func_code = jsi.extract_function_code(funcname)
-            self._downloader.cache.store('youtube-nsig', player_id, func_code)
-
-        if self._downloader.params.get('youtube_print_sig_code'):
-            self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(player_id, func_code[1]))
-
-        return lambda s: jsi.extract_function_from_code(*func_code)([s])
-
-    def _n_descramble(self, n_param, player_url, video_id):
-        """Compute the response to YT's "n" parameter challenge,
-           or None
-
-        Args:
-        n_param     -- challenge string that is the value of the
-                       URL's "n" query parameter
-        player_url  -- URL of YT player JS
-        video_id
-        """
-
-        sig_id = ('nsig_value', n_param)
-        if sig_id in self._player_cache:
-            return self._player_cache[sig_id]
+    def _decrypt_nsig(self, n, video_id, player_url):
+        """Turn the encrypted n field into a working signature"""
+        if player_url is None:
+            raise ExtractorError('Cannot decrypt nsig without player_url')
 
         try:
-            player_id = ('nsig', player_url)
-            if player_id not in self._player_cache:
-                self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
-            func = self._player_cache[player_id]
-            ret = func(n_param)
-            if ret.startswith('enhanced_except_'):
-                raise ExtractorError('Unhandled exception in decode')
-            self._player_cache[sig_id] = ret
-            if self._downloader.params.get('verbose', False):
-                self._downloader.to_screen('[debug] [%s] %s' % (self.IE_NAME, 'Decrypted nsig {0} => {1}'.format(n_param, self._player_cache[sig_id])))
-            return self._player_cache[sig_id]
-        except Exception as e:
-            self._downloader.report_warning(
-                '[%s] %s (%s %s)' % (
-                    self.IE_NAME,
-                    'Unable to decode n-parameter: download likely to be throttled',
+            jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
+        except ExtractorError as e:
+            raise ExtractorError('Unable to extract nsig jsi, player_id, func_codefunction code', cause=e)
+        if self.get_param('youtube_print_sig_code'):
+            self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(
+                player_id, func_code[1]))
+
+        try:
+            extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
+            ret = extract_nsig(jsi, func_code)(n)
+        except JSInterpreter.Exception as e:
+            self.report_warning(
+                '%s (%s %s)' % (
+                    self.__ie_msg(
+                        'Unable to decode n-parameter: download likely to be throttled'),
                     error_to_compat_str(e),
                     traceback.format_exc()))
+            return
+
+        self.write_debug('Decrypted nsig {0} => {1}'.format(n, ret))
+        return ret
+
+    def _extract_n_function_name(self, jscode):
+        func_name, idx = self._search_regex(
+            r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?\([\w$]+\)',
+            jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
+        if not idx:
+            return func_name
+
+        return self._parse_json(self._search_regex(
+            r'var {0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
+            'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
+            func_name, transform_source=js_to_json)[int(idx)]
+
+    def _extract_n_function_code(self, video_id, player_url):
+        player_id = self._extract_player_info(player_url)
+        func_code = self.cache.load('youtube-nsig', player_id)
+        jscode = func_code or self._load_player(video_id, player_url)
+        jsi = JSInterpreter(jscode)
+
+        if func_code:
+            return jsi, player_id, func_code
+
+        func_name = self._extract_n_function_name(jscode)
+
+        # For redundancy
+        func_code = self._search_regex(
+            r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
+                     # NB: The end of the regex is intentionally kept strict
+                     {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
+            jscode, 'nsig function', group=('var', 'code'), default=None)
+        if func_code:
+            func_code = ([func_code[0]], func_code[1])
+        else:
+            self.write_debug('Extracting nsig function with jsinterp')
+            func_code = jsi.extract_function_code(func_name)
+
+        self.cache.store('youtube-nsig', player_id, func_code)
+        return jsi, player_id, func_code
+
+    def _extract_n_function_from_code(self, jsi, func_code):
+        func = jsi.extract_function_from_code(*func_code)
+
+        def extract_nsig(s):
+            try:
+                ret = func([s])
+            except JSInterpreter.Exception:
+                raise
+            except Exception as e:
+                raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
+
+            if ret.startswith('enhanced_except_'):
+                raise JSInterpreter.Exception('Signature function returned an exception')
+            return ret
+
+        return extract_nsig
+
+    def _unthrottle_format_urls(self, video_id, player_url, *formats):
+
+        def decrypt_nsig(n):
+            return self._cached(self._decrypt_nsig, 'nsig', n, player_url)
 
-    def _unthrottle_format_urls(self, video_id, player_url, formats):
         for fmt in formats:
             parsed_fmt_url = compat_urllib_parse.urlparse(fmt['url'])
             n_param = compat_parse_qs(parsed_fmt_url.query).get('n')
             if not n_param:
                 continue
             n_param = n_param[-1]
-            n_response = self._n_descramble(n_param, player_url, video_id)
+            n_response = decrypt_nsig(n_param)(n_param, video_id, player_url)
             if n_response is None:
                 # give up if descrambling failed
                 break
-            for fmt_dct in traverse_obj(fmt, (None, (None, ('fragments', Ellipsis))), expected_type=dict):
-                fmt_dct['url'] = update_url(
-                    fmt_dct['url'], query_update={'n': [n_response]})
+            fmt['url'] = update_url_query(fmt['url'], {'n': n_response})
 
     # from yt-dlp, with tweaks
     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
@@ -1708,16 +1732,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         Extract signatureTimestamp (sts)
         Required to tell API what sig/player version is in use.
         """
-        sts = int_or_none(ytcfg.get('STS')) if isinstance(ytcfg, dict) else None
+        sts = traverse_obj(ytcfg, 'STS', expected_type=int)
         if not sts:
             # Attempt to extract from player
             if player_url is None:
                 error_msg = 'Cannot extract signature timestamp without player_url.'
                 if fatal:
                     raise ExtractorError(error_msg)
-                self._downloader.report_warning(error_msg)
+                self.report_warning(error_msg)
                 return
-            code = self._get_player_code(video_id, player_url)
+            code = self._load_player(video_id, player_url, fatal=fatal)
             sts = int_or_none(self._search_regex(
                 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '',
                 'JS player signature timestamp', group='sts', fatal=fatal))
@@ -1733,12 +1757,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         # cpn generation algorithm is reverse engineered from base.js.
         # In fact it works even with dummy cpn.
         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
-        cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
+        cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
 
-        playback_url = update_url(
-            playback_url, query_update={
-                'ver': ['2'],
-                'cpn': [cpn],
+        # more consistent results setting it to right before the end
+        qs = parse_qs(playback_url)
+        video_length = '{0}'.format(float((qs.get('len') or ['1.5'])[0]) - 1)
+
+        playback_url = update_url_query(
+            playback_url, {
+                'ver': '2',
+                'cpn': cpn,
+                'cmt': video_length,
+                'el': 'detailpage',  # otherwise defaults to "shorts"
             })
 
         self._download_webpage(
@@ -1986,8 +2016,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             else:
                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
 
+        if not player_url:
+            player_url = self._extract_player_url(webpage)
+
         formats = []
-        itags = []
+        itags = collections.defaultdict(set)
         itag_qualities = {}
         q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
         CHUNK_SIZE = 10 << 20
@@ -2003,58 +2036,92 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 })
             } for range_start in range(0, f['filesize'], CHUNK_SIZE))
 
+        lower = lambda s: s.lower()
+
         for fmt in streaming_formats:
-            if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
+            if fmt.get('targetDurationSec'):
                 continue
 
             itag = str_or_none(fmt.get('itag'))
-            quality = fmt.get('quality')
-            if itag and quality:
+            audio_track = traverse_obj(fmt, ('audioTrack', T(dict))) or {}
+
+            quality = traverse_obj(fmt, ((
+                # The 3gp format (17) in android client has a quality of "small",
+                # but is actually worse than other formats
+                T(lambda _: 'tiny' if itag == 17 else None),
+                ('quality', T(lambda q: q if q and q != 'tiny' else None)),
+                ('audioQuality', T(lower)),
+                'quality'), T(txt_or_none)), get_all=False)
+            if quality and itag:
                 itag_qualities[itag] = quality
             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
-            # number of fragment that would subsequently requested with (`&sq=N`)
+            # number of fragments that would subsequently be requested with (`&sq=N`)
             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
                 continue
 
             fmt_url = fmt.get('url')
             if not fmt_url:
                 sc = compat_parse_qs(fmt.get('signatureCipher'))
-                fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
-                encrypted_sig = try_get(sc, lambda x: x['s'][0])
-                if not (sc and fmt_url and encrypted_sig):
+                fmt_url = traverse_obj(sc, ('url', -1, T(url_or_none)))
+                encrypted_sig = traverse_obj(sc, ('s', -1))
+                if not (fmt_url and encrypted_sig):
                     continue
-                if not player_url:
-                    player_url = self._extract_player_url(webpage)
+                player_url = player_url or self._extract_player_url(webpage)
                 if not player_url:
                     continue
-                signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
-                sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
-                fmt_url += '&' + sp + '=' + signature
+                try:
+                    fmt_url = update_url_query(fmt_url, {
+                        traverse_obj(sc, ('sp', -1)) or 'signature':
+                            [self._decrypt_signature(encrypted_sig, video_id, player_url)],
+                    })
+                except ExtractorError as e:
+                    self.report_warning('Signature extraction failed: Some formats may be missing',
+                                        video_id=video_id, only_once=True)
+                    self.write_debug(error_to_compat_str(e), only_once=True)
+                    continue
 
-            if itag:
-                itags.append(itag)
-            tbr = float_or_none(
-                fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
+            language_preference = (
+                10 if audio_track.get('audioIsDefault')
+                else -10 if 'descriptive' in (traverse_obj(audio_track, ('displayName', T(lower))) or '')
+                else -1)
+            name = (
+                traverse_obj(fmt, ('qualityLabel', T(txt_or_none)))
+                or quality.replace('audio_quality_', ''))
             dct = {
-                'asr': int_or_none(fmt.get('audioSampleRate')),
-                'filesize': int_or_none(fmt.get('contentLength')),
-                'format_id': itag,
-                'format_note': fmt.get('qualityLabel') or quality,
-                'fps': int_or_none(fmt.get('fps')),
-                'height': int_or_none(fmt.get('height')),
-                'quality': q(quality),
-                'tbr': tbr,
+                'format_id': join_nonempty(itag, fmt.get('isDrc') and 'drc'),
                 'url': fmt_url,
-                'width': fmt.get('width'),
+                # Format 22 is likely to be damaged: see https://github.com/yt-dlp/yt-dlp/issues/3372
+                'source_preference': ((-5 if itag == '22' else -1)
+                                      + (100 if 'Premium' in name else 0)),
+                'quality': q(quality),
+                'language': join_nonempty(audio_track.get('id', '').split('.')[0],
+                                          'desc' if language_preference < -1 else '') or None,
+                'language_preference': language_preference,
+                # Strictly de-prioritize 3gp formats
+                'preference': -2 if itag == '17' else None,
             }
-            mimetype = fmt.get('mimeType')
-            if mimetype:
-                mobj = re.match(
-                    r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
-                if mobj:
-                    dct['ext'] = mimetype2ext(mobj.group(1))
-                    dct.update(parse_codecs(mobj.group(2)))
+            if itag:
+                itags[itag].add(('https', dct.get('language')))
+            self._unthrottle_format_urls(video_id, player_url, dct)
+            dct.update(traverse_obj(fmt, {
+                'asr': ('audioSampleRate', T(int_or_none)),
+                'filesize': ('contentLength', T(int_or_none)),
+                'format_note': ('qualityLabel', T(lambda x: x or quality)),
+                # for some formats, fps is wrongly returned as 1
+                'fps': ('fps', T(int_or_none), T(lambda f: f if f > 1 else None)),
+                'audio_channels': ('audioChannels', T(int_or_none)),
+                'height': ('height', T(int_or_none)),
+                'has_drm': ('drmFamilies', T(bool)),
+                'tbr': (('averageBitrate', 'bitrate'), T(lambda t: float_or_none(t, 1000))),
+                'width': ('width', T(int_or_none)),
+                '_duration_ms': ('approxDurationMs', T(int_or_none)),
+            }, get_all=False))
+            mime_mobj = re.match(
+                r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
+            if mime_mobj:
+                dct['ext'] = mimetype2ext(mime_mobj.group(1))
+                dct.update(parse_codecs(mime_mobj.group(2)))
             single_stream = 'none' in (dct.get(c) for c in ('acodec', 'vcodec'))
             if single_stream and dct.get('ext'):
                 dct['container'] = dct['ext'] + '_dash'
@@ -2069,32 +2136,62 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
             formats.append(dct)
 
+        def process_manifest_format(f, proto, client_name, itag, all_formats=False):
+            key = (proto, f.get('language'))
+            if not all_formats and key in itags[itag]:
+                return False
+            itags[itag].add(key)
+
+            if itag:
+                f['format_id'] = (
+                    '{0}-{1}'.format(itag, proto)
+                    if all_formats or any(p != proto for p, _ in itags[itag])
+                    else itag)
+
+            if f.get('source_preference') is None:
+                f['source_preference'] = -1
+
+            if itag in ('616', '235'):
+                f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
+                f['source_preference'] += 100
+
+            f['quality'] = q(traverse_obj(f, (
+                'format_id', T(lambda s: itag_qualities[s.split('-')[0]])), default=-1))
+            if try_call(lambda: f['fps'] <= 1):
+                del f['fps']
+
+            if proto == 'hls' and f.get('has_drm'):
+                f['has_drm'] = 'maybe'
+                f['source_preference'] -= 5
+            return True
+
         hls_manifest_url = streaming_data.get('hlsManifestUrl')
         if hls_manifest_url:
             for f in self._extract_m3u8_formats(
                     hls_manifest_url, video_id, 'mp4', fatal=False):
-                itag = self._search_regex(
-                    r'/itag/(\d+)', f['url'], 'itag', default=None)
-                if itag:
-                    f['format_id'] = itag
-                formats.append(f)
+                if process_manifest_format(
+                        f, 'hls', None, self._search_regex(
+                            r'/itag/(\d+)', f['url'], 'itag', default=None)):
+                    formats.append(f)
 
         if self._downloader.params.get('youtube_include_dash_manifest', True):
             dash_manifest_url = streaming_data.get('dashManifestUrl')
             if dash_manifest_url:
                 for f in self._extract_mpd_formats(
                         dash_manifest_url, video_id, fatal=False):
-                    itag = f['format_id']
-                    if itag in itags:
-                        continue
-                    if itag in itag_qualities:
-                        f['quality'] = q(itag_qualities[itag])
-                    filesize = int_or_none(self._search_regex(
-                        r'/clen/(\d+)', f.get('fragment_base_url')
-                        or f['url'], 'file size', default=None))
-                    if filesize:
-                        f['filesize'] = filesize
-                    formats.append(f)
+                    if process_manifest_format(
+                            f, 'dash', None, f['format_id']):
+                        f['filesize'] = traverse_obj(f, (
+                            ('fragment_base_url', 'url'), T(lambda u: self._search_regex(
+                                r'/clen/(\d+)', u, 'file size', default=None)),
+                            T(int_or_none)), get_all=False)
+                        formats.append(f)
+
+        playable_formats = [f for f in formats if not f.get('has_drm')]
+        if formats and not playable_formats:
+            # If there are no formats that definitely don't have DRM, all have DRM
+            self.report_drm(video_id)
+        formats[:] = playable_formats
 
         if not formats:
             if streaming_data.get('licenseInfos'):
@@ -2166,6 +2263,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             video_details.get('lengthSeconds')
             or microformat.get('lengthSeconds')) \
             or parse_duration(search_meta('duration'))
+
+        for f in formats:
+            # Some formats may have much smaller duration than others (possibly damaged during encoding)
+            # but avoid false positives with small duration differences.
+            # Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
+            if try_call(lambda x: float(x.pop('_duration_ms')) / duration < 500, args=(f,)):
+                self.report_warning(
+                    '{0}: Some possibly damaged formats will be deprioritized'.format(video_id), only_once=True)
+                # Strictly de-prioritize damaged formats
+                f['preference'] = -10
+
         is_live = video_details.get('isLive')
 
         owner_profile_url = self._yt_urljoin(self._extract_author_var(
@@ -2174,10 +2282,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         uploader = self._extract_author_var(
             webpage, 'name', videodetails=video_details, metadata=microformat)
 
-        if not player_url:
-            player_url = self._extract_player_url(webpage)
-        self._unthrottle_format_urls(video_id, player_url, formats)
-
         info = {
             'id': video_id,
             'title': self._live_title(video_title) if is_live else video_title,

From dc512e3a8a26a8e3fc7f1f67e5ee5e7699db8659 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 15 Jan 2024 18:38:43 +0000
Subject: [PATCH 302/312] [YouTube] Fix `like_count` extraction using
 `likeButtonViewModel` * also fix various tests * TODO: check against yt-dlp
 tests

---
 test/test_subtitles.py          |  1 +
 youtube_dl/extractor/youtube.py | 18 ++++++++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 1197721ff..e005c78fc 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -295,6 +295,7 @@ class TestNRKSubtitles(BaseTestSubtitles):
     def test_allsubtitles(self):
         self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
+        self.DL.params['format'] = 'best/bestvideo'
         subtitles = self.getSubtitles()
         self.assertEqual(set(subtitles.keys()), set(['nb-ttv']))
         self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index cd4b3ef60..db840fc45 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -39,6 +39,7 @@ from ..utils import (
     mimetype2ext,
     NO_DEFAULT,
     parse_codecs,
+    parse_count,
     parse_duration,
     parse_qs,
     qualities,
@@ -46,6 +47,7 @@ from ..utils import (
     smuggle_url,
     str_or_none,
     str_to_int,
+    T,
     traverse_obj,
     try_call,
     try_get,
@@ -1250,7 +1252,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'title': 'IMG 3456',
                 'description': '',
                 'upload_date': '20170613',
-                'uploader': 'ElevageOrVert',
+                'uploader': "l'Or Vert asbl",
                 'uploader_id': '@ElevageOrVert',
             },
             'params': {
@@ -2474,6 +2476,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             'like_count': str_to_int(like_count),
                             'dislike_count': str_to_int(dislike_count),
                         })
+                    else:
+                        info['like_count'] = traverse_obj(vpir, (
+                            'videoActions', 'menuRenderer', 'topLevelButtons', Ellipsis,
+                            'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
+                            'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
+                            'buttonViewModel', (('title', ('accessibilityText', T(lambda s: s.split()), Ellipsis))), T(parse_count)),
+                            get_all=False)
+
                 vsir = content.get('videoSecondaryInfoRenderer')
                 if vsir:
                     rows = try_get(
@@ -2588,7 +2598,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         'playlist_mincount': 94,
         'info_dict': {
             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
-            'title': 'Igor Kleiner - Playlists',
+            'title': r're:Igor Kleiner(?: Ph\.D\.)? - Playlists',
             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
             'uploader': 'Igor Kleiner',
             'uploader_id': '@IgorDataScience',
@@ -2599,7 +2609,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         'playlist_mincount': 94,
         'info_dict': {
             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
-            'title': 'Igor Kleiner - Playlists',
+            'title': r're:Igor Kleiner(?: Ph\.D\.)? - Playlists',
             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
             'uploader': 'Igor Kleiner',
             'uploader_id': '@IgorDataScience',
@@ -2711,7 +2721,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
         'info_dict': {
             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
-            'title': 'lex will - Channels',
+            'title': r're:lex will - (?:Home|Channels)',
             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
             'uploader': 'lex will',
             'uploader_id': '@lexwill718',

From c58b655a9ef255eb9d02b4d57706c46cfdf35975 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 27 Jan 2024 15:29:25 +0000
Subject: [PATCH 303/312] [InfoExtractor] Support DASH subtitle extraction
 (yt-dlp back-port)

---
 test/test_InfoExtractor.py     | 113 +++++++++++-
 youtube_dl/extractor/common.py | 312 ++++++++++++++++++++-------------
 2 files changed, 300 insertions(+), 125 deletions(-)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 3f96645de..043b62243 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -993,7 +993,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'tbr': 5997.485,
                     'width': 1920,
                     'height': 1080,
-                }]
+                }],
+                {},
             ), (
                 # https://github.com/ytdl-org/youtube-dl/pull/14844
                 'urls_only',
@@ -1076,7 +1077,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'tbr': 4400,
                     'width': 1920,
                     'height': 1080,
-                }]
+                }],
+                {},
             ), (
                 # https://github.com/ytdl-org/youtube-dl/issues/20346
                 # Media considered unfragmented even though it contains
@@ -1122,18 +1124,119 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'width': 360,
                     'height': 360,
                     'fps': 30,
-                }]
+                }],
+                {},
+            ), (
+                'subtitles',
+                'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/',
+                [{
+                    'format_id': 'audio=128001',
+                    'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'ext': 'm4a',
+                    'tbr': 128.001,
+                    'asr': 48000,
+                    'format_note': 'DASH audio',
+                    'container': 'm4a_dash',
+                    'vcodec': 'none',
+                    'acodec': 'mp4a.40.2',
+                    'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+                    'protocol': 'http_dash_segments',
+                }, {
+                    'format_id': 'video=100000',
+                    'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'ext': 'mp4',
+                    'width': 336,
+                    'height': 144,
+                    'tbr': 100,
+                    'format_note': 'DASH video',
+                    'container': 'mp4_dash',
+                    'vcodec': 'avc1.4D401F',
+                    'acodec': 'none',
+                    'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+                    'protocol': 'http_dash_segments',
+                }, {
+                    'format_id': 'video=326000',
+                    'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'ext': 'mp4',
+                    'width': 562,
+                    'height': 240,
+                    'tbr': 326,
+                    'format_note': 'DASH video',
+                    'container': 'mp4_dash',
+                    'vcodec': 'avc1.4D401F',
+                    'acodec': 'none',
+                    'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+                    'protocol': 'http_dash_segments',
+                }, {
+                    'format_id': 'video=698000',
+                    'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'ext': 'mp4',
+                    'width': 844,
+                    'height': 360,
+                    'tbr': 698,
+                    'format_note': 'DASH video',
+                    'container': 'mp4_dash',
+                    'vcodec': 'avc1.4D401F',
+                    'acodec': 'none',
+                    'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+                    'protocol': 'http_dash_segments',
+                }, {
+                    'format_id': 'video=1493000',
+                    'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'ext': 'mp4',
+                    'width': 1126,
+                    'height': 480,
+                    'tbr': 1493,
+                    'format_note': 'DASH video',
+                    'container': 'mp4_dash',
+                    'vcodec': 'avc1.4D401F',
+                    'acodec': 'none',
+                    'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+                    'protocol': 'http_dash_segments',
+                }, {
+                    'format_id': 'video=4482000',
+                    'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'ext': 'mp4',
+                    'width': 1688,
+                    'height': 720,
+                    'tbr': 4482,
+                    'format_note': 'DASH video',
+                    'container': 'mp4_dash',
+                    'vcodec': 'avc1.4D401F',
+                    'acodec': 'none',
+                    'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                    'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+                    'protocol': 'http_dash_segments',
+                }],
+                {
+                    'en': [
+                        {
+                            'ext': 'mp4',
+                            'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                            'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                            'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+                            'protocol': 'http_dash_segments',
+                        }
+                    ]
+                },
             )
         ]
 
-        for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES:
+        for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES:
             with open('./test/testdata/mpd/%s.mpd' % mpd_file,
                       mode='r', encoding='utf-8') as f:
-                formats = self.ie._parse_mpd_formats(
+                formats, subtitles = self.ie._parse_mpd_formats_and_subtitles(
                     compat_etree_fromstring(f.read().encode('utf-8')),
                     mpd_base_url=mpd_base_url, mpd_url=mpd_url)
                 self.ie._sort_formats(formats)
                 expect_value(self, formats, expected_formats, None)
+                expect_value(self, subtitles, expected_subtitles, None)
 
     def test_parse_f4m_formats(self):
         _TEST_CASES = [
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index d33557135..ed55d3e07 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import base64
+import collections
 import datetime
 import functools
 import hashlib
@@ -58,6 +59,7 @@ from ..utils import (
     GeoRestrictedError,
     GeoUtils,
     int_or_none,
+    join_nonempty,
     js_to_json,
     JSON_LD_RE,
     mimetype2ext,
@@ -74,6 +76,7 @@ from ..utils import (
     str_or_none,
     str_to_int,
     strip_or_none,
+    T,
     traverse_obj,
     try_get,
     unescapeHTML,
@@ -1751,6 +1754,12 @@ class InfoExtractor(object):
             'format_note': 'Quality selection URL',
         }
 
+    def _report_ignoring_subs(self, name):
+        self.report_warning(bug_reports_message(
+            'Ignoring subtitle tracks found in the {0} manifest; '
+            'if any subtitle tracks are missing,'.format(name)
+        ), only_once=True)
+
     def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
                               entry_protocol='m3u8', preference=None,
                               m3u8_id=None, note=None, errnote=None,
@@ -2191,23 +2200,46 @@ class InfoExtractor(object):
             })
         return entries
 
-    def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
+    def _extract_mpd_formats(self, *args, **kwargs):
+        fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self._report_ignoring_subs('DASH')
+        return fmts
+
+    def _extract_mpd_formats_and_subtitles(
+            self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
+            fatal=True, data=None, headers=None, query=None):
+
+        # TODO: or not? param not yet implemented
+        if self.get_param('ignore_no_formats_error'):
+            fatal = False
+
         res = self._download_xml_handle(
             mpd_url, video_id,
-            note=note or 'Downloading MPD manifest',
-            errnote=errnote or 'Failed to download MPD manifest',
-            fatal=fatal, data=data, headers=headers, query=query)
+            note='Downloading MPD manifest' if note is None else note,
+            errnote='Failed to download MPD manifest' if errnote is None else errnote,
+            fatal=fatal, data=data, headers=headers or {}, query=query or {})
         if res is False:
-            return []
+            return [], {}
         mpd_doc, urlh = res
         if mpd_doc is None:
-            return []
-        mpd_base_url = base_url(urlh.geturl())
+            return [], {}
 
-        return self._parse_mpd_formats(
+        # We could have been redirected to a new url when we retrieved our mpd file.
+        mpd_url = urlh.geturl()
+        mpd_base_url = base_url(mpd_url)
+
+        return self._parse_mpd_formats_and_subtitles(
             mpd_doc, mpd_id, mpd_base_url, mpd_url)
 
-    def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
+    def _parse_mpd_formats(self, *args, **kwargs):
+        fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self._report_ignoring_subs('DASH')
+        return fmts
+
+    def _parse_mpd_formats_and_subtitles(
+            self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
         """
         Parse formats from MPD manifest.
         References:
@@ -2215,8 +2247,10 @@ class InfoExtractor(object):
             http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
          2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
         """
-        if mpd_doc.get('type') == 'dynamic':
-            return []
+        # TODO: param not yet implemented: default like previous yt-dl logic
+        if not self.get_param('dynamic_mpd', False):
+            if mpd_doc.get('type') == 'dynamic':
+                return [], {}
 
         namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
 
@@ -2228,6 +2262,7 @@ class InfoExtractor(object):
 
         def extract_multisegment_info(element, ms_parent_info):
             ms_info = ms_parent_info.copy()
+            base_url = ms_info.get('base_url')
 
             # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
             # common attributes and elements.  We will only extract relevant
@@ -2285,7 +2320,8 @@ class InfoExtractor(object):
             return ms_info
 
         mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
-        formats = []
+        formats, subtitles = [], {}
+        stream_numbers = collections.defaultdict(int)
         for period in mpd_doc.findall(_add_ns('Period')):
             period_duration = parse_duration(period.get('duration')) or mpd_duration
             period_ms_info = extract_multisegment_info(period, {
@@ -2295,7 +2331,7 @@ class InfoExtractor(object):
             for adaptation_set in period.findall(_add_ns('AdaptationSet')):
                 if is_drm_protected(adaptation_set):
                     continue
-                adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
+                adaptation_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
                 for representation in adaptation_set.findall(_add_ns('Representation')):
                     if is_drm_protected(representation):
                         continue
@@ -2303,27 +2339,35 @@ class InfoExtractor(object):
                     representation_attrib.update(representation.attrib)
                     # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
                     mime_type = representation_attrib['mimeType']
-                    content_type = mime_type.split('/')[0]
-                    if content_type == 'text':
-                        # TODO implement WebVTT downloading
-                        pass
-                    elif content_type in ('video', 'audio'):
-                        base_url = ''
-                        for element in (representation, adaptation_set, period, mpd_doc):
-                            base_url_e = element.find(_add_ns('BaseURL'))
-                            if base_url_e is not None:
-                                base_url = base_url_e.text + base_url
-                                if re.match(r'^https?://', base_url):
-                                    break
-                        if mpd_base_url and not re.match(r'^https?://', base_url):
-                            if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
-                                mpd_base_url += '/'
-                            base_url = mpd_base_url + base_url
-                        representation_id = representation_attrib.get('id')
-                        lang = representation_attrib.get('lang')
-                        url_el = representation.find(_add_ns('BaseURL'))
-                        filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
-                        bandwidth = int_or_none(representation_attrib.get('bandwidth'))
+                    content_type = representation_attrib.get('contentType') or mime_type.split('/')[0]
+                    codec_str = representation_attrib.get('codecs', '')
+                    # Some kind of binary subtitle found in some youtube livestreams
+                    if mime_type == 'application/x-rawcc':
+                        codecs = {'scodec': codec_str}
+                    else:
+                        codecs = parse_codecs(codec_str)
+                    if content_type not in ('video', 'audio', 'text'):
+                        if mime_type == 'image/jpeg':
+                            content_type = mime_type
+                        elif codecs.get('vcodec', 'none') != 'none':
+                            content_type = 'video'
+                        elif codecs.get('acodec', 'none') != 'none':
+                            content_type = 'audio'
+                        elif codecs.get('scodec', 'none') != 'none':
+                            content_type = 'text'
+                        elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'):
+                            content_type = 'text'
+                        else:
+                            self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
+                            continue
+
+                    representation_id = representation_attrib.get('id')
+                    lang = representation_attrib.get('lang')
+                    url_el = representation.find(_add_ns('BaseURL'))
+                    filesize = int_or_none(url_el.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
+                    bandwidth = int_or_none(representation_attrib.get('bandwidth'))
+                    format_id = join_nonempty(representation_id or content_type, mpd_id)
+                    if content_type in ('video', 'audio'):
                         f = {
                             'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
                             'manifest_url': mpd_url,
@@ -2338,104 +2382,125 @@ class InfoExtractor(object):
                             'filesize': filesize,
                             'container': mimetype2ext(mime_type) + '_dash',
                         }
-                        f.update(parse_codecs(representation_attrib.get('codecs')))
-                        representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
+                        f.update(codecs)
+                    elif content_type == 'text':
+                        f = {
+                            'ext': mimetype2ext(mime_type),
+                            'manifest_url': mpd_url,
+                            'filesize': filesize,
+                        }
+                    elif content_type == 'image/jpeg':
+                        # See test case in VikiIE
+                        # https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1
+                        f = {
+                            'format_id': format_id,
+                            'ext': 'mhtml',
+                            'manifest_url': mpd_url,
+                            'format_note': 'DASH storyboards (jpeg)',
+                            'acodec': 'none',
+                            'vcodec': 'none',
+                        }
+                    if is_drm_protected(adaptation_set) or is_drm_protected(representation):
+                        f['has_drm'] = True
+                    representation_ms_info = extract_multisegment_info(representation, adaptation_set_ms_info)
 
-                        def prepare_template(template_name, identifiers):
-                            tmpl = representation_ms_info[template_name]
-                            # First of, % characters outside $...$ templates
-                            # must be escaped by doubling for proper processing
-                            # by % operator string formatting used further (see
-                            # https://github.com/ytdl-org/youtube-dl/issues/16867).
-                            t = ''
-                            in_template = False
-                            for c in tmpl:
+                    def prepare_template(template_name, identifiers):
+                        tmpl = representation_ms_info[template_name]
+                        # First of, % characters outside $...$ templates
+                        # must be escaped by doubling for proper processing
+                        # by % operator string formatting used further (see
+                        # https://github.com/ytdl-org/youtube-dl/issues/16867).
+                        t = ''
+                        in_template = False
+                        for c in tmpl:
+                            t += c
+                            if c == '$':
+                                in_template = not in_template
+                            elif c == '%' and not in_template:
                                 t += c
-                                if c == '$':
-                                    in_template = not in_template
-                                elif c == '%' and not in_template:
-                                    t += c
-                            # Next, $...$ templates are translated to their
-                            # %(...) counterparts to be used with % operator
-                            t = t.replace('$RepresentationID$', representation_id)
-                            t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
-                            t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
-                            t.replace('$$', '$')
-                            return t
+                        # Next, $...$ templates are translated to their
+                        # %(...) counterparts to be used with % operator
+                        t = t.replace('$RepresentationID$', representation_id)
+                        t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
+                        t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
+                        t.replace('$$', '$')
+                        return t
 
-                        # @initialization is a regular template like @media one
-                        # so it should be handled just the same way (see
-                        # https://github.com/ytdl-org/youtube-dl/issues/11605)
-                        if 'initialization' in representation_ms_info:
-                            initialization_template = prepare_template(
-                                'initialization',
-                                # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
-                                # $Time$ shall not be included for @initialization thus
-                                # only $Bandwidth$ remains
-                                ('Bandwidth', ))
-                            representation_ms_info['initialization_url'] = initialization_template % {
-                                'Bandwidth': bandwidth,
-                            }
+                    # @initialization is a regular template like @media one
+                    # so it should be handled just the same way (see
+                    # https://github.com/ytdl-org/youtube-dl/issues/11605)
+                    if 'initialization' in representation_ms_info:
+                        initialization_template = prepare_template(
+                            'initialization',
+                            # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
+                            # $Time$ shall not be included for @initialization thus
+                            # only $Bandwidth$ remains
+                            ('Bandwidth', ))
+                        representation_ms_info['initialization_url'] = initialization_template % {
+                            'Bandwidth': bandwidth,
+                        }
 
-                        def location_key(location):
-                            return 'url' if re.match(r'^https?://', location) else 'path'
+                    def location_key(location):
+                        return 'url' if re.match(r'^https?://', location) else 'path'
 
-                        if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
+                    if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
 
-                            media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
-                            media_location_key = location_key(media_template)
+                        media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
+                        media_location_key = location_key(media_template)
 
-                            # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
-                            # can't be used at the same time
-                            if '%(Number' in media_template and 's' not in representation_ms_info:
-                                segment_duration = None
-                                if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
-                                    segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
-                                    representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
-                                representation_ms_info['fragments'] = [{
-                                    media_location_key: media_template % {
-                                        'Number': segment_number,
-                                        'Bandwidth': bandwidth,
-                                    },
-                                    'duration': segment_duration,
-                                } for segment_number in range(
-                                    representation_ms_info['start_number'],
-                                    representation_ms_info['total_number'] + representation_ms_info['start_number'])]
-                            else:
-                                # $Number*$ or $Time$ in media template with S list available
-                                # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
-                                # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
-                                representation_ms_info['fragments'] = []
-                                segment_time = 0
-                                segment_d = None
-                                segment_number = representation_ms_info['start_number']
+                        # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
+                        # can't be used at the same time
+                        if '%(Number' in media_template and 's' not in representation_ms_info:
+                            segment_duration = None
+                            if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
+                                segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
+                                representation_ms_info['total_number'] = int(math.ceil(
+                                    float_or_none(period_duration, segment_duration, default=0)))
+                            representation_ms_info['fragments'] = [{
+                                media_location_key: media_template % {
+                                    'Number': segment_number,
+                                    'Bandwidth': bandwidth,
+                                },
+                                'duration': segment_duration,
+                            } for segment_number in range(
+                                representation_ms_info['start_number'],
+                                representation_ms_info['total_number'] + representation_ms_info['start_number'])]
+                        else:
+                            # $Number*$ or $Time$ in media template with S list available
+                            # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
+                            # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
+                            representation_ms_info['fragments'] = []
+                            segment_time = 0
+                            segment_d = None
+                            segment_number = representation_ms_info['start_number']
 
-                                def add_segment_url():
-                                    segment_url = media_template % {
-                                        'Time': segment_time,
-                                        'Bandwidth': bandwidth,
-                                        'Number': segment_number,
-                                    }
-                                    representation_ms_info['fragments'].append({
-                                        media_location_key: segment_url,
-                                        'duration': float_or_none(segment_d, representation_ms_info['timescale']),
-                                    })
+                            def add_segment_url():
+                                segment_url = media_template % {
+                                    'Time': segment_time,
+                                    'Bandwidth': bandwidth,
+                                    'Number': segment_number,
+                                }
+                                representation_ms_info['fragments'].append({
+                                    media_location_key: segment_url,
+                                    'duration': float_or_none(segment_d, representation_ms_info['timescale']),
+                                })
 
-                                for num, s in enumerate(representation_ms_info['s']):
-                                    segment_time = s.get('t') or segment_time
-                                    segment_d = s['d']
+                            for num, s in enumerate(representation_ms_info['s']):
+                                segment_time = s.get('t') or segment_time
+                                segment_d = s['d']
+                                add_segment_url()
+                                segment_number += 1
+                                for r in range(s.get('r', 0)):
+                                    segment_time += segment_d
                                     add_segment_url()
                                     segment_number += 1
-                                    for r in range(s.get('r', 0)):
-                                        segment_time += segment_d
-                                        add_segment_url()
-                                        segment_number += 1
-                                    segment_time += segment_d
-                        elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
+                                segment_time += segment_d
+                    elif 'segment_urls' in representation_ms_info:
+                        fragments = []
+                        if 's' in representation_ms_info:
                             # No media template
                             # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
                             # or any YouTube dashsegments video
-                            fragments = []
                             segment_index = 0
                             timescale = representation_ms_info['timescale']
                             for s in representation_ms_info['s']:
@@ -2487,8 +2552,15 @@ class InfoExtractor(object):
                             f['url'] = base_url
                         formats.append(f)
                     else:
-                        self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
-        return formats
+                        # Assuming direct URL to unfragmented media.
+                        f['url'] = representation_ms_info['base_url']
+                    if content_type in ('video', 'audio', 'image/jpeg'):
+                        f['manifest_stream_number'] = stream_numbers[f['url']]
+                        stream_numbers[f['url']] += 1
+                        formats.append(f)
+                    elif content_type == 'text':
+                        subtitles.setdefault(lang or 'und', []).append(f)
+        return formats, subtitles
 
     def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
         res = self._download_xml_handle(

From bec9180e8904a12c55cfa838b0541879d16bf20f Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 27 Jan 2024 00:07:14 +0000
Subject: [PATCH 304/312] [downloader/dash] Support `range` in fragment (format
 f'{start}-{end}')  * adapted from
 https://github.com/ytdl-org/youtube-dl/pull/30279  * thx former GH user
 kikuyan

---
 youtube_dl/downloader/dash.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
index 2800d4260..f3c058879 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@@ -35,6 +35,7 @@ class DashSegmentsFD(FragmentFD):
         for frag_index, fragment in enumerate(fragments, 1):
             if frag_index <= ctx['fragment_index']:
                 continue
+            success = False
             # In DASH, the first segment contains necessary headers to
             # generate a valid MP4 file, so always abort for the first segment
             fatal = frag_index == 1 or not skip_unavailable_fragments
@@ -42,10 +43,14 @@ class DashSegmentsFD(FragmentFD):
             if not fragment_url:
                 assert fragment_base_url
                 fragment_url = urljoin(fragment_base_url, fragment['path'])
-            success = False
+            headers = info_dict.get('http_headers')
+            fragment_range = fragment.get('range')
+            if fragment_range:
+                headers = headers.copy() if headers else {}
+                headers['Range'] = 'bytes=%s' % (fragment_range,)
             for count in itertools.count():
                 try:
-                    success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
+                    success, frag_content = self._download_fragment(ctx, fragment_url, info_dict, headers)
                     if not success:
                         return False
                     self._append_fragment(ctx, frag_content)

From 4eaeb9b2c680ed097770ce976c3b37a1b05c0800 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 27 Jan 2024 15:37:08 +0000
Subject: [PATCH 305/312] [InfoExtractor] Support byte range for DASH * adapted
 from https://github.com/ytdl-org/youtube-dl/pull/30279 * thx former GH user
 kikuyan

---
 test/test_InfoExtractor.py          |  66 ++++++
 test/testdata/mpd/range_only.mpd    |  35 +++
 test/testdata/mpd/subtitles.mpd     | 351 ++++++++++++++++++++++++++++
 test/testdata/mpd/url_and_range.mpd |  32 +++
 youtube_dl/extractor/common.py      | 114 ++++++---
 5 files changed, 562 insertions(+), 36 deletions(-)
 create mode 100644 test/testdata/mpd/range_only.mpd
 create mode 100644 test/testdata/mpd/subtitles.mpd
 create mode 100644 test/testdata/mpd/url_and_range.mpd

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 043b62243..d55d6ad54 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -1126,6 +1126,72 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'fps': 30,
                 }],
                 {},
+            ), (
+                # https://github.com/ytdl-org/youtube-dl/issues/30235
+                # Bento4 generated test mpd
+                # mp4dash --mpd-name=manifest.mpd --no-split --use-segment-list mediafiles
+                'url_and_range',
+                'http://unknown/manifest.mpd',  # mpd_url
+                'http://unknown/',  # mpd_base_url
+                [{
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'fragment_base_url': 'http://unknown/',
+                    'ext': 'm4a',
+                    'format_id': 'audio-und-mp4a.40.2',
+                    'format_note': 'DASH audio',
+                    'container': 'm4a_dash',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'none',
+                    'tbr': 98.808,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'fragment_base_url': 'http://unknown/',
+                    'ext': 'mp4',
+                    'format_id': 'video-avc1',
+                    'format_note': 'DASH video',
+                    'container': 'mp4_dash',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.4D401E',
+                    'tbr': 699.597,
+                    'width': 768,
+                    'height': 432
+                }],
+                {},
+            ), (
+                # https://github.com/ytdl-org/youtube-dl/issues/27575
+                # GPAC generated test mpd
+                # MP4Box -dash 10000 -single-file -out manifest.mpd mediafiles
+                'range_only',
+                'http://unknown/manifest.mpd',  # mpd_url
+                'http://unknown/',  # mpd_base_url
+                [{
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'fragment_base_url': 'http://unknown/audio_dashinit.mp4',
+                    'ext': 'm4a',
+                    'format_id': '2',
+                    'format_note': 'DASH audio',
+                    'container': 'm4a_dash',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'none',
+                    'tbr': 98.096,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'fragment_base_url': 'http://unknown/video_dashinit.mp4',
+                    'ext': 'mp4',
+                    'format_id': '1',
+                    'format_note': 'DASH video',
+                    'container': 'mp4_dash',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.4D401E',
+                    'tbr': 526.987,
+                    'width': 768,
+                    'height': 432
+                }],
+                {},
             ), (
                 'subtitles',
                 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
diff --git a/test/testdata/mpd/range_only.mpd b/test/testdata/mpd/range_only.mpd
new file mode 100644
index 000000000..e0c2152d1
--- /dev/null
+++ b/test/testdata/mpd/range_only.mpd
@@ -0,0 +1,35 @@
+<?xml version="1.0"?>
+<!-- MPD file Generated with GPAC version 1.0.1-revrelease at 2021-11-27T20:53:11.690Z -->
+<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" minBufferTime="PT1.500S" type="static" mediaPresentationDuration="PT0H0M30.196S" maxSegmentDuration="PT0H0M10.027S" profiles="urn:mpeg:dash:profile:full:2011">
+ <ProgramInformation moreInformationURL="http://gpac.io">
+  <Title>manifest.mpd generated by GPAC</Title>
+ </ProgramInformation>
+
+ <Period duration="PT0H0M30.196S">
+  <AdaptationSet segmentAlignment="true" maxWidth="768" maxHeight="432" maxFrameRate="30000/1001" par="16:9" lang="und" startWithSAP="1">
+   <Representation id="1" mimeType="video/mp4" codecs="avc1.4D401E" width="768" height="432" frameRate="30000/1001" sar="1:1" bandwidth="526987">
+    <BaseURL>video_dashinit.mp4</BaseURL>
+    <SegmentList timescale="90000" duration="900000">
+     <Initialization range="0-881"/>
+     <SegmentURL mediaRange="882-876094" indexRange="882-925"/>
+     <SegmentURL mediaRange="876095-1466732" indexRange="876095-876138"/>
+     <SegmentURL mediaRange="1466733-1953615" indexRange="1466733-1466776"/>
+     <SegmentURL mediaRange="1953616-1994211" indexRange="1953616-1953659"/>
+    </SegmentList>
+   </Representation>
+  </AdaptationSet>
+  <AdaptationSet segmentAlignment="true" lang="und" startWithSAP="1">
+   <Representation id="2" mimeType="audio/mp4" codecs="mp4a.40.2" audioSamplingRate="48000" bandwidth="98096">
+    <AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
+    <BaseURL>audio_dashinit.mp4</BaseURL>
+    <SegmentList timescale="48000" duration="480000">
+     <Initialization range="0-752"/>
+     <SegmentURL mediaRange="753-124129" indexRange="753-796"/>
+     <SegmentURL mediaRange="124130-250544" indexRange="124130-124173"/>
+     <SegmentURL mediaRange="250545-374929" indexRange="250545-250588"/>
+    </SegmentList>
+   </Representation>
+  </AdaptationSet>
+ </Period>
+</MPD>
+
diff --git a/test/testdata/mpd/subtitles.mpd b/test/testdata/mpd/subtitles.mpd
new file mode 100644
index 000000000..6f948adba
--- /dev/null
+++ b/test/testdata/mpd/subtitles.mpd
@@ -0,0 +1,351 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Created with Unified Streaming Platform (version=1.10.18-20255) -->
+<MPD
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xmlns="urn:mpeg:dash:schema:mpd:2011"
+  xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-DASH_schema_files/DASH-MPD.xsd"
+  type="static"
+  mediaPresentationDuration="PT14M48S"
+  maxSegmentDuration="PT1M"
+  minBufferTime="PT10S"
+  profiles="urn:mpeg:dash:profile:isoff-live:2011">
+  <Period
+    id="1"
+    duration="PT14M48S">
+    <BaseURL>dash/</BaseURL>
+    <AdaptationSet
+      id="1"
+      group="1"
+      contentType="audio"
+      segmentAlignment="true"
+      audioSamplingRate="48000"
+      mimeType="audio/mp4"
+      codecs="mp4a.40.2"
+      startWithSAP="1">
+      <AudioChannelConfiguration
+        schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011"
+        value="2" />
+      <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
+      <SegmentTemplate
+        timescale="48000"
+        initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
+        media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
+        <SegmentTimeline>
+          <S t="0" d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="3584" />
+        </SegmentTimeline>
+      </SegmentTemplate>
+      <Representation
+        id="audio=128001"
+        bandwidth="128001">
+      </Representation>
+    </AdaptationSet>
+    <AdaptationSet
+      id="2"
+      group="3"
+      contentType="text"
+      lang="en"
+      mimeType="application/mp4"
+      codecs="stpp"
+      startWithSAP="1">
+      <Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle" />
+      <SegmentTemplate
+        timescale="1000"
+        initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
+        media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
+        <SegmentTimeline>
+          <S t="0" d="60000" r="9" />
+          <S d="24000" />
+        </SegmentTimeline>
+      </SegmentTemplate>
+      <Representation
+        id="textstream_eng=1000"
+        bandwidth="1000">
+      </Representation>
+    </AdaptationSet>
+    <AdaptationSet
+      id="3"
+      group="2"
+      contentType="video"
+      par="960:409"
+      minBandwidth="100000"
+      maxBandwidth="4482000"
+      maxWidth="1689"
+      maxHeight="720"
+      segmentAlignment="true"
+      mimeType="video/mp4"
+      codecs="avc1.4D401F"
+      startWithSAP="1">
+      <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
+      <SegmentTemplate
+        timescale="12288"
+        initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
+        media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
+        <SegmentTimeline>
+          <S t="0" d="24576" r="443" />
+        </SegmentTimeline>
+      </SegmentTemplate>
+      <Representation
+        id="video=100000"
+        bandwidth="100000"
+        width="336"
+        height="144"
+        sar="2880:2863"
+        scanType="progressive">
+      </Representation>
+      <Representation
+        id="video=326000"
+        bandwidth="326000"
+        width="562"
+        height="240"
+        sar="115200:114929"
+        scanType="progressive">
+      </Representation>
+      <Representation
+        id="video=698000"
+        bandwidth="698000"
+        width="844"
+        height="360"
+        sar="86400:86299"
+        scanType="progressive">
+      </Representation>
+      <Representation
+        id="video=1493000"
+        bandwidth="1493000"
+        width="1126"
+        height="480"
+        sar="230400:230267"
+        scanType="progressive">
+      </Representation>
+      <Representation
+        id="video=4482000"
+        bandwidth="4482000"
+        width="1688"
+        height="720"
+        sar="86400:86299"
+        scanType="progressive">
+      </Representation>
+    </AdaptationSet>
+  </Period>
+</MPD>
diff --git a/test/testdata/mpd/url_and_range.mpd b/test/testdata/mpd/url_and_range.mpd
new file mode 100644
index 000000000..b8c68aad2
--- /dev/null
+++ b/test/testdata/mpd/url_and_range.mpd
@@ -0,0 +1,32 @@
+<?xml version="1.0" ?>
+<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" profiles="urn:mpeg:dash:profile:isoff-live:2011" minBufferTime="PT10.01S" mediaPresentationDuration="PT30.097S" type="static">
+  <!-- Created with Bento4 mp4-dash.py, VERSION=2.0.0-639 -->
+  <Period>
+    <!-- Video -->
+    <AdaptationSet mimeType="video/mp4" segmentAlignment="true" startWithSAP="1" maxWidth="768" maxHeight="432">
+      <Representation id="video-avc1" codecs="avc1.4D401E" width="768" height="432" scanType="progressive" frameRate="30000/1001" bandwidth="699597">
+        <SegmentList timescale="1000" duration="10010">
+          <Initialization sourceURL="video-frag.mp4" range="36-746"/>
+          <SegmentURL media="video-frag.mp4" mediaRange="747-876117"/>
+          <SegmentURL media="video-frag.mp4" mediaRange="876118-1466913"/>
+          <SegmentURL media="video-frag.mp4" mediaRange="1466914-1953954"/>
+          <SegmentURL media="video-frag.mp4" mediaRange="1953955-1994652"/>
+        </SegmentList>
+      </Representation>
+    </AdaptationSet>
+    <!-- Audio -->
+    <AdaptationSet mimeType="audio/mp4" startWithSAP="1" segmentAlignment="true">
+      <Representation id="audio-und-mp4a.40.2" codecs="mp4a.40.2" bandwidth="98808" audioSamplingRate="48000">
+        <AudioChannelConfiguration schemeIdUri="urn:mpeg:mpegB:cicp:ChannelConfiguration" value="2"/>
+        <SegmentList timescale="1000" duration="10010">
+          <Initialization sourceURL="audio-frag.mp4" range="32-623"/>
+          <SegmentURL media="audio-frag.mp4" mediaRange="624-124199"/>
+          <SegmentURL media="audio-frag.mp4" mediaRange="124200-250303"/>
+          <SegmentURL media="audio-frag.mp4" mediaRange="250304-374365"/>
+          <SegmentURL media="audio-frag.mp4" mediaRange="374366-374836"/>
+        </SegmentList>
+      </Representation>
+    </AdaptationSet>
+  </Period>
+</MPD>
+
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index ed55d3e07..76414554a 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -183,6 +183,8 @@ class InfoExtractor(object):
                                             fragment_base_url
                                  * "duration" (optional, int or float)
                                  * "filesize" (optional, int)
+                                 * "range" (optional, str of the form "start-end"
+                                            to use in HTTP Range header)
                     * preference Order number of this format. If this field is
                                  present and not None, the formats get sorted
                                  by this field, regardless of all other values.
@@ -2296,15 +2298,27 @@ class InfoExtractor(object):
             def extract_Initialization(source):
                 initialization = source.find(_add_ns('Initialization'))
                 if initialization is not None:
-                    ms_info['initialization_url'] = initialization.attrib['sourceURL']
+                    ms_info['initialization_url'] = initialization.get('sourceURL') or base_url
+                    initialization_url_range = initialization.get('range')
+                    if initialization_url_range:
+                        ms_info['initialization_url_range'] = initialization_url_range
 
             segment_list = element.find(_add_ns('SegmentList'))
             if segment_list is not None:
                 extract_common(segment_list)
                 extract_Initialization(segment_list)
                 segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
-                if segment_urls_e:
-                    ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
+                segment_urls = traverse_obj(segment_urls_e, (
+                    Ellipsis, T(lambda e: e.attrib), 'media'))
+                if segment_urls:
+                    ms_info['segment_urls'] = segment_urls
+                segment_urls_range = traverse_obj(segment_urls_e, (
+                    Ellipsis, T(lambda e: e.attrib), 'mediaRange',
+                    T(lambda r: re.findall(r'^\d+-\d+$', r)), 0))
+                if segment_urls_range:
+                    ms_info['segment_urls_range'] = segment_urls_range
+                    if not segment_urls:
+                        ms_info['segment_urls'] = [base_url for _ in segment_urls_range]
             else:
                 segment_template = element.find(_add_ns('SegmentTemplate'))
                 if segment_template is not None:
@@ -2443,6 +2457,11 @@ class InfoExtractor(object):
                     def location_key(location):
                         return 'url' if re.match(r'^https?://', location) else 'path'
 
+                    def calc_segment_duration():
+                        return float_or_none(
+                            representation_ms_info['segment_duration'],
+                            representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
+
                     if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
 
                         media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
@@ -2512,45 +2531,68 @@ class InfoExtractor(object):
                                         'duration': duration,
                                     })
                                     segment_index += 1
-                            representation_ms_info['fragments'] = fragments
-                        elif 'segment_urls' in representation_ms_info:
+                        elif 'segment_urls_range' in representation_ms_info:
+                            # Segment URLs with mediaRange
+                            # Example: https://kinescope.io/200615537/master.mpd
+                            # https://github.com/ytdl-org/youtube-dl/issues/30235
+                            # or any mpd generated with Bento4 `mp4dash --no-split --use-segment-list`
+                            segment_duration = calc_segment_duration()
+                            for segment_url, segment_url_range in zip(
+                                    representation_ms_info['segment_urls'], representation_ms_info['segment_urls_range']):
+                                fragments.append({
+                                    location_key(segment_url): segment_url,
+                                    'range': segment_url_range,
+                                    'duration': segment_duration,
+                                })
+                        else:
                             # Segment URLs with no SegmentTimeline
                             # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
                             # https://github.com/ytdl-org/youtube-dl/pull/14844
-                            fragments = []
-                            segment_duration = float_or_none(
-                                representation_ms_info['segment_duration'],
-                                representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
+                            segment_duration = calc_segment_duration()
                             for segment_url in representation_ms_info['segment_urls']:
-                                fragment = {
+                                fragments.append({
                                     location_key(segment_url): segment_url,
-                                }
-                                if segment_duration:
-                                    fragment['duration'] = segment_duration
-                                fragments.append(fragment)
-                            representation_ms_info['fragments'] = fragments
-                        # If there is a fragments key available then we correctly recognized fragmented media.
-                        # Otherwise we will assume unfragmented media with direct access. Technically, such
-                        # assumption is not necessarily correct since we may simply have no support for
-                        # some forms of fragmented media renditions yet, but for now we'll use this fallback.
-                        if 'fragments' in representation_ms_info:
-                            f.update({
-                                # NB: mpd_url may be empty when MPD manifest is parsed from a string
-                                'url': mpd_url or base_url,
-                                'fragment_base_url': base_url,
-                                'fragments': [],
-                                'protocol': 'http_dash_segments',
+                                    'duration': segment_duration,
+                                })
+                        representation_ms_info['fragments'] = fragments
+
+                    # If there is a fragments key available then we correctly recognized fragmented media.
+                    # Otherwise we will assume unfragmented media with direct access. Technically, such
+                    # assumption is not necessarily correct since we may simply have no support for
+                    # some forms of fragmented media renditions yet, but for now we'll use this fallback.
+                    if 'fragments' in representation_ms_info:
+                        base_url = representation_ms_info['base_url'] 
+                        f.update({
+                            # NB: mpd_url may be empty when MPD manifest is parsed from a string
+                            'url': mpd_url or base_url,
+                            'fragment_base_url': base_url,
+                            'fragments': [],
+                            'protocol': 'http_dash_segments',
+                        })
+                        if 'initialization_url' in representation_ms_info and 'initialization_url_range' in representation_ms_info:
+                            # Initialization URL with range (accompanied by Segment URLs with mediaRange above)
+                            # https://github.com/ytdl-org/youtube-dl/issues/30235
+                            initialization_url = representation_ms_info['initialization_url']
+                            f['fragments'].append({
+                                location_key(initialization_url): initialization_url,
+                                'range': representation_ms_info['initialization_url_range'],
                             })
-                            if 'initialization_url' in representation_ms_info:
-                                initialization_url = representation_ms_info['initialization_url']
-                                if not f.get('url'):
-                                    f['url'] = initialization_url
-                                f['fragments'].append({location_key(initialization_url): initialization_url})
-                            f['fragments'].extend(representation_ms_info['fragments'])
-                        else:
-                            # Assuming direct URL to unfragmented media.
-                            f['url'] = base_url
-                        formats.append(f)
+                        elif 'initialization_url' in representation_ms_info:
+                            initialization_url = representation_ms_info['initialization_url']
+                            if not f.get('url'):
+                                f['url'] = initialization_url
+                            f['fragments'].append({location_key(initialization_url): initialization_url})
+                        elif 'initialization_url_range' in representation_ms_info:
+                            # no Initialization URL but range (accompanied by no Segment URLs but mediaRange above)
+                            # https://github.com/ytdl-org/youtube-dl/issues/27575
+                            f['fragments'].append({
+                                location_key(base_url): base_url,
+                                'range': representation_ms_info['initialization_url_range'],
+                            })
+                        f['fragments'].extend(representation_ms_info['fragments'])
+                        if not period_duration:
+                            period_duration = sum(traverse_obj(representation_ms_info, (
+                                'fragments', Ellipsis, 'duration', T(float_or_none))))
                     else:
                         # Assuming direct URL to unfragmented media.
                         f['url'] = representation_ms_info['base_url']

From 1fd8f802b836cc1e8bf87b2dbe02decd6a980cb6 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 27 Jan 2024 15:45:43 +0000
Subject: [PATCH 306/312] [InfoExtractor] Correctly resolve BaseURL in DASH
 manifest Specs: * ISO/IEC 23009-1:2012 section 5.6 * RFC 3986 section 5.

---
 youtube_dl/extractor/common.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 76414554a..0e5dfd8fa 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2262,9 +2262,24 @@ class InfoExtractor(object):
         def is_drm_protected(element):
             return element.find(_add_ns('ContentProtection')) is not None
 
+        from ..utils import YoutubeDLHandler
+        fix_path = YoutubeDLHandler._fix_path
+
+        def resolve_base_url(element, parent_base_url=None):
+            # TODO: use native XML traversal when ready
+            b_url = traverse_obj(element, (
+                T(lambda e: e.find(_add_ns('BaseURL')).text)))
+            if parent_base_url and b_url:
+                if not parent_base_url[-1] in ('/', ':'):
+                    parent_base_url += '/'
+                b_url = compat_urlparse.urljoin(parent_base_url, b_url)
+            if b_url:
+                b_url = fix_path(b_url)
+            return b_url or parent_base_url
+
         def extract_multisegment_info(element, ms_parent_info):
             ms_info = ms_parent_info.copy()
-            base_url = ms_info.get('base_url')
+            base_url = ms_info['base_url'] = resolve_base_url(element, ms_info.get('base_url'))
 
             # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
             # common attributes and elements.  We will only extract relevant
@@ -2336,11 +2351,13 @@ class InfoExtractor(object):
         mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
         formats, subtitles = [], {}
         stream_numbers = collections.defaultdict(int)
+        mpd_base_url = resolve_base_url(mpd_doc, mpd_base_url or mpd_url)
         for period in mpd_doc.findall(_add_ns('Period')):
             period_duration = parse_duration(period.get('duration')) or mpd_duration
             period_ms_info = extract_multisegment_info(period, {
                 'start_number': 1,
                 'timescale': 1,
+                'base_url': mpd_base_url,
             })
             for adaptation_set in period.findall(_add_ns('AdaptationSet')):
                 if is_drm_protected(adaptation_set):
@@ -2561,7 +2578,7 @@ class InfoExtractor(object):
                     # assumption is not necessarily correct since we may simply have no support for
                     # some forms of fragmented media renditions yet, but for now we'll use this fallback.
                     if 'fragments' in representation_ms_info:
-                        base_url = representation_ms_info['base_url'] 
+                        base_url = representation_ms_info['base_url']
                         f.update({
                             # NB: mpd_url may be empty when MPD manifest is parsed from a string
                             'url': mpd_url or base_url,

From bdda6b81df61f52eed2502c8ae624d297d918488 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 27 Jan 2024 15:57:25 +0000
Subject: [PATCH 307/312] [Vbox7IE] Improve extraction * DASH extraction no
 longer fails with new range support * but always find combined formats if
 available * suppress ineffective XFF geo-bypass (causes time-outs) * adapted
 from https://github.com/ytdl-org/youtube-dl/pull/29680 * thx former GH user
 kikuyan

---
 youtube_dl/extractor/vbox7.py | 121 ++++++++++++++++++++++++----------
 1 file changed, 86 insertions(+), 35 deletions(-)

diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py
index 8152acefd..c504c5311 100644
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@@ -2,9 +2,19 @@
 from __future__ import unicode_literals
 
 import re
+import time
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    determine_ext,
+    ExtractorError,
+    float_or_none,
+    merge_dicts,
+    T,
+    traverse_obj,
+    txt_or_none,
+    url_or_none,
+)
 
 
 class Vbox7IE(InfoExtractor):
@@ -20,10 +30,12 @@ class Vbox7IE(InfoExtractor):
                         )
                         (?P<id>[\da-fA-F]+)
                     '''
+    _EMBED_REGEX = [r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)']
     _GEO_COUNTRIES = ['BG']
+    _GEO_BYPASS = False
     _TESTS = [{
-        'url': 'http://vbox7.com/play:0946fff23c',
-        'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
+        'url': 'https://vbox7.com/play:0946fff23c',
+        'md5': '50ca1f78345a9c15391af47d8062d074',
         'info_dict': {
             'id': '0946fff23c',
             'ext': 'mp4',
@@ -34,18 +46,21 @@ class Vbox7IE(InfoExtractor):
             'upload_date': '20160812',
             'uploader': 'zdraveibulgaria',
         },
-        'params': {
-            'proxy': '127.0.0.1:8118',
-        },
+        'expected_warnings': [
+            'Unable to download webpage',
+        ],
     }, {
         'url': 'http://vbox7.com/play:249bb972c2',
-        'md5': '99f65c0c9ef9b682b97313e052734c3f',
+        'md5': 'aaf19465e37ec0b30b918df83ec32c50',
         'info_dict': {
             'id': '249bb972c2',
             'ext': 'mp4',
             'title': 'Смях! Чудо - чист за секунди - Скрита камера',
+            'description': 'Смях! Чудо - чист за секунди - Скрита камера',
+            'timestamp': 1360215023,
+            'upload_date': '20130207',
+            'uploader': 'svideteliat_ot_varshava',
         },
-        'skip': 'georestricted',
     }, {
         'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
         'only_matching': True,
@@ -54,52 +69,88 @@ class Vbox7IE(InfoExtractor):
         'only_matching': True,
     }]
 
-    @staticmethod
-    def _extract_url(webpage):
-        mobj = re.search(
-            r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)',
-            webpage)
+    @classmethod
+    def _extract_url(cls, webpage):
+        mobj = re.search(cls._EMBED_REGEX[0], webpage)
         if mobj:
             return mobj.group('url')
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        url = 'https://vbox7.com/play:%s' % (video_id,)
 
+        now = time.time()
         response = self._download_json(
-            'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id,
-            video_id)
+            'https://www.vbox7.com/aj/player/item/options?vid=%s' % (video_id,),
+            video_id, headers={'Referer': url})
+        # estimate time to which possible `ago` member is relative
+        now = now + 0.5 * (time.time() - now)
 
         if 'error' in response:
             raise ExtractorError(
                 '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
 
-        video = response['options']
+        video_url = traverse_obj(response, ('options', 'src', T(url_or_none)))
 
-        title = video['title']
-        video_url = video['src']
-
-        if '/na.mp4' in video_url:
+        if '/na.mp4' in video_url or '':
             self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
 
-        uploader = video.get('uploader')
+        ext = determine_ext(video_url)
+        if ext == 'mpd':
+            # In case MPD cannot be parsed, or anyway, get mp4 combined
+            # formats usually provided to Safari, iOS, and old Windows
+            try:
+                formats, subtitles = self._extract_mpd_formats_and_subtitles(
+                    video_url, video_id, 'dash', fatal=False)
+            except KeyError:
+                self.report_warning('Failed to parse MPD manifest')
+                formats, subtitles = [], {}
 
-        webpage = self._download_webpage(
-            'http://vbox7.com/play:%s' % video_id, video_id, fatal=None)
+            video = response['options']
+            resolutions = (1080, 720, 480, 240, 144)
+            highest_res = traverse_obj(video, ('highestRes', T(int))) or resolutions[0]
+            for res in traverse_obj(video, ('resolutions', lambda _, r: int(r) > 0)) or resolutions:
+                if res > highest_res:
+                    continue
+                formats.append({
+                    'url': video_url.replace('.mpd', '_%d.mp4' % res),
+                    'format_id': '%dp' % res,
+                    'height': res,
+                })
+            # if above formats are flaky, enable the line below
+            # self._check_formats(formats, video_id)
+        else:
+            formats = [{
+                'url': video_url,
+            }]
+            subtitles = {}
+        self._sort_formats(formats)
 
-        info = {}
+        webpage = self._download_webpage(url, video_id, fatal=False) or ''
 
-        if webpage:
-            info = self._search_json_ld(
-                webpage.replace('"/*@context"', '"@context"'), video_id,
-                fatal=False)
+        info = self._search_json_ld(
+            webpage.replace('"/*@context"', '"@context"'), video_id,
+            fatal=False) if webpage else {}
 
-        info.update({
+        if not info.get('title'):
+            info['title'] = traverse_obj(response, (
+                'options', 'title', T(txt_or_none))) or self._og_search_title(webpage)
+
+        def if_missing(k):
+            return lambda x: None if k in info else x
+
+        info = merge_dicts(info, {
             'id': video_id,
-            'title': title,
-            'url': video_url,
-            'uploader': uploader,
-            'thumbnail': self._proto_relative_url(
+            'formats': formats,
+            'subtitles': subtitles or None,
+        }, info, traverse_obj(response, ('options', {
+            'uploader': ('uploader', T(txt_or_none)),
+            'timestamp': ('ago', T(if_missing('timestamp')), T(lambda t: int(round((now - t) / 60.0)) * 60)),
+            'duration': ('duration', T(if_missing('duration')), T(float_or_none)),
+        })))
+        if 'thumbnail' not in info:
+            info['thumbnail'] = self._proto_relative_url(
                 info.get('thumbnail') or self._og_search_thumbnail(webpage),
-                'http:'),
-        })
+                'https:'),
+
         return info

From 4416f82c809a81737d68875dcb201e366d58dabd Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 27 Jan 2024 18:17:09 +0000
Subject: [PATCH 308/312] [Vbox7IE] Sanitise ld+json containing unexpected
 characters * based on PR #29680 * added hack to force invoking
 `transform_source` * fixes #26218

---
 youtube_dl/extractor/vbox7.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py
index c504c5311..d114ecb07 100644
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@@ -5,6 +5,7 @@ import re
 import time
 
 from .common import InfoExtractor
+from ..compat import compat_kwargs
 from ..utils import (
     determine_ext,
     ExtractorError,
@@ -75,6 +76,27 @@ class Vbox7IE(InfoExtractor):
         if mobj:
             return mobj.group('url')
 
+    # transform_source=None, fatal=True
+    def _parse_json(self, json_string, video_id, *args, **kwargs):
+        if '"@context"' in json_string[:30]:
+            # this is ld+json, or that's the way to bet
+            transform_source = args[0] if len(args) > 0 else kwargs.get('transform_source')
+            if not transform_source:
+
+                def fix_chars(src):
+                    # fix malformed ld+json: replace raw CRLFs with escaped LFs
+                    return re.sub(
+                        r'"[^"]+"', lambda m: re.sub(r'\r?\n', r'\\n', m.group(0)), src)
+
+                if len(args) > 0:
+                    args = (fix_chars,) + args[1:]
+                else:
+                    kwargs['transform_source'] = fix_chars
+                    kwargs = compat_kwargs(kwargs)
+
+        return super(Vbox7IE, self)._parse_json(
+            json_string, video_id, *args, **kwargs)
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         url = 'https://vbox7.com/play:%s' % (video_id,)

From 7687389f08a5c7c49e57d1b7f7b11b1c87b47b68 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 18 Feb 2024 20:55:01 +0000
Subject: [PATCH 309/312] [Vbox7] Improve extraction, adding features from
 yt-dlp PR #9100

* changes from https://github.com/yt-dlp/yt-dlp/pull/9100 (thx
seproDev):
  - attempt HLS extraction
  - re-enable XFF
  - test `view_count`, `duration` extraction
* improve commenting, error checks
---
 youtube_dl/extractor/vbox7.py | 80 +++++++++++++++++++++++------------
 1 file changed, 53 insertions(+), 27 deletions(-)

diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py
index d114ecb07..1c0b77074 100644
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@@ -7,6 +7,7 @@ import time
 from .common import InfoExtractor
 from ..compat import compat_kwargs
 from ..utils import (
+    base_url,
     determine_ext,
     ExtractorError,
     float_or_none,
@@ -14,6 +15,7 @@ from ..utils import (
     T,
     traverse_obj,
     txt_or_none,
+    url_basename,
     url_or_none,
 )
 
@@ -33,8 +35,8 @@ class Vbox7IE(InfoExtractor):
                     '''
     _EMBED_REGEX = [r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)']
     _GEO_COUNTRIES = ['BG']
-    _GEO_BYPASS = False
     _TESTS = [{
+        # the http: URL just redirects here
         'url': 'https://vbox7.com/play:0946fff23c',
         'md5': '50ca1f78345a9c15391af47d8062d074',
         'info_dict': {
@@ -42,17 +44,19 @@ class Vbox7IE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Борисов: Притеснен съм за бъдещето на България',
             'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"',
-            'thumbnail': r're:^https?://.*\.jpg$',
             'timestamp': 1470982814,
             'upload_date': '20160812',
             'uploader': 'zdraveibulgaria',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'view_count': int,
+            'duration': 2640,
         },
         'expected_warnings': [
             'Unable to download webpage',
         ],
     }, {
         'url': 'http://vbox7.com/play:249bb972c2',
-        'md5': 'aaf19465e37ec0b30b918df83ec32c50',
+        'md5': '99f65c0c9ef9b682b97313e052734c3f',
         'info_dict': {
             'id': '249bb972c2',
             'ext': 'mp4',
@@ -61,7 +65,11 @@ class Vbox7IE(InfoExtractor):
             'timestamp': 1360215023,
             'upload_date': '20130207',
             'uploader': 'svideteliat_ot_varshava',
+            'thumbnail': 'https://i49.vbox7.com/o/249/249bb972c20.jpg',
+            'view_count': int,
+            'duration': 83,
         },
+        'expected_warnings': ['Failed to download m3u8 information'],
     }, {
         'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
         'only_matching': True,
@@ -76,6 +84,9 @@ class Vbox7IE(InfoExtractor):
         if mobj:
             return mobj.group('url')
 
+    # specialisation to transform what looks like ld+json that
+    # may contain invalid character combinations
+
     # transform_source=None, fatal=True
     def _parse_json(self, json_string, video_id, *args, **kwargs):
         if '"@context"' in json_string[:30]:
@@ -103,49 +114,64 @@ class Vbox7IE(InfoExtractor):
 
         now = time.time()
         response = self._download_json(
-            'https://www.vbox7.com/aj/player/item/options?vid=%s' % (video_id,),
-            video_id, headers={'Referer': url})
+            'https://www.vbox7.com/aj/player/item/options', video_id,
+            query={'vid': video_id}, headers={'Referer': url})
         # estimate time to which possible `ago` member is relative
         now = now + 0.5 * (time.time() - now)
 
-        if 'error' in response:
+        if traverse_obj(response, 'error'):
             raise ExtractorError(
                 '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
 
-        video_url = traverse_obj(response, ('options', 'src', T(url_or_none)))
+        src_url = traverse_obj(response, ('options', 'src', T(url_or_none))) or ''
 
-        if '/na.mp4' in video_url or '':
+        fmt_base = url_basename(src_url).rsplit('.', 1)[0].rsplit('_', 1)[0]
+        if fmt_base in ('na', 'vn'):
             self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
 
-        ext = determine_ext(video_url)
+        ext = determine_ext(src_url)
         if ext == 'mpd':
-            # In case MPD cannot be parsed, or anyway, get mp4 combined
-            # formats usually provided to Safari, iOS, and old Windows
+            # extract MPD
             try:
                 formats, subtitles = self._extract_mpd_formats_and_subtitles(
-                    video_url, video_id, 'dash', fatal=False)
-            except KeyError:
+                    src_url, video_id, 'dash', fatal=False)
+            except KeyError:  # fatal doesn't catch this
                 self.report_warning('Failed to parse MPD manifest')
                 formats, subtitles = [], {}
+        elif ext != 'm3u8':
+            formats = [{
+                'url': src_url,
+            }] if src_url else []
+            subtitles = {}
 
+        if src_url:
+            # possibly extract HLS, based on https://github.com/yt-dlp/yt-dlp/pull/9100
+            fmt_base = base_url(src_url) + fmt_base
+            # prepare for _extract_m3u8_formats_and_subtitles()
+            # hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
+            hls_formats = self._extract_m3u8_formats(
+                '{0}.m3u8'.format(fmt_base), video_id, m3u8_id='hls', fatal=False)
+            formats.extend(hls_formats)
+            # self._merge_subtitles(hls_subs, target=subtitles)
+
+            # In case MPD/HLS cannot be parsed, or anyway, get mp4 combined
+            # formats usually provided to Safari, iOS, and old Windows
             video = response['options']
             resolutions = (1080, 720, 480, 240, 144)
-            highest_res = traverse_obj(video, ('highestRes', T(int))) or resolutions[0]
-            for res in traverse_obj(video, ('resolutions', lambda _, r: int(r) > 0)) or resolutions:
-                if res > highest_res:
-                    continue
-                formats.append({
-                    'url': video_url.replace('.mpd', '_%d.mp4' % res),
-                    'format_id': '%dp' % res,
+            highest_res = traverse_obj(video, (
+                'highestRes', T(int))) or resolutions[0]
+            resolutions = traverse_obj(video, (
+                'resolutions', lambda _, r: highest_res >= int(r) > 0)) or resolutions
+            mp4_formats = traverse_obj(resolutions, (
+                Ellipsis, T(lambda res: {
+                    'url': '{0}_{1}.mp4'.format(fmt_base, res),
+                    'format_id': 'http-{0}'.format(res),
                     'height': res,
-                })
+                })))
             # if above formats are flaky, enable the line below
-            # self._check_formats(formats, video_id)
-        else:
-            formats = [{
-                'url': video_url,
-            }]
-            subtitles = {}
+            # self._check_formats(mp4_formats, video_id)
+            formats.extend(mp4_formats)
+
         self._sort_formats(formats)
 
         webpage = self._download_webpage(url, video_id, fatal=False) or ''

From 48ddab1f3a616d40480818ec9b73eaa27e3a55fa Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 21 Feb 2024 16:29:08 +0000
Subject: [PATCH 310/312] [downloader/external] Fix WgetFD proxy (rev 2)

From PR (defunct source), closes #29343.
Matches https://github.com/yt-dlp/yt-dlp/pull/3152
Thx former user kikuyan.
---
 youtube_dl/downloader/external.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index 7fc864e85..bc228960e 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -206,7 +206,10 @@ class WgetFD(ExternalFD):
                 retry[1] = '0'
             cmd += retry
         cmd += self._option('--bind-address', 'source_address')
-        cmd += self._option('--proxy', 'proxy')
+        proxy = self.params.get('proxy')
+        if proxy:
+            for var in ('http_proxy', 'https_proxy'):
+                cmd += ['--execute', '%s=%s' % (var, proxy)]
         cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
         cmd += self._configuration_args()
         cmd += ['--', info_dict['url']]

From 70f230f9cf28e948662599b6257cb7d1262870e3 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Thu, 22 Feb 2024 12:44:00 +0000
Subject: [PATCH 311/312] [GBNews]Add new extractor for GB News TV channel
 (#29432)

* Add extractor for GB News TV channel

* Support more GBNews URL formats
Allow alphanumeric and _ in place of `shows`, which redirect to site's preferred URL

* Update for 2024
---
 youtube_dl/extractor/extractors.py |   1 +
 youtube_dl/extractor/gbnews.py     | 139 +++++++++++++++++++++++++++++
 2 files changed, 140 insertions(+)
 create mode 100644 youtube_dl/extractor/gbnews.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 82221445f..b6d0f42f5 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -443,6 +443,7 @@ from .gamespot import GameSpotIE
 from .gamestar import GameStarIE
 from .gaskrank import GaskrankIE
 from .gazeta import GazetaIE
+from .gbnews import GBNewsIE
 from .gdcvault import GDCVaultIE
 from .gedidigital import GediDigitalIE
 from .generic import GenericIE
diff --git a/youtube_dl/extractor/gbnews.py b/youtube_dl/extractor/gbnews.py
new file mode 100644
index 000000000..f04f30e5a
--- /dev/null
+++ b/youtube_dl/extractor/gbnews.py
@@ -0,0 +1,139 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    extract_attributes,
+    ExtractorError,
+    T,
+    traverse_obj,
+    txt_or_none,
+    url_or_none,
+)
+
+
+class GBNewsIE(InfoExtractor):
+    IE_DESC = 'GB News clips, features and live stream'
+
+    # \w+ is normally shows or news, but apparently any word redirects to the correct URL
+    _VALID_URL = r'https?://(?:www\.)?gbnews\.(?:uk|com)/(?:\w+/)?(?P<id>[^#?]+)'
+
+    _PLATFORM = 'safari'
+    _SSMP_URL = 'https://mm-v2.simplestream.com/ssmp/api.php'
+    _TESTS = [{
+        'url': 'https://www.gbnews.uk/shows/andrew-neils-message-to-companies-choosing-to-boycott-gb-news/106889',
+        'info_dict': {
+            'id': '106889',
+            'ext': 'mp4',
+            'title': "Andrew Neil's message to companies choosing to boycott GB News",
+            'description': 'md5:b281f5d22fd6d5eda64a4e3ba771b351',
+        },
+        'skip': '404 not found',
+    }, {
+        'url': 'https://www.gbnews.com/news/bbc-claudine-gay-harvard-university-antisemitism-row',
+        'info_dict': {
+            'id': '52264136',
+            'display_id': 'bbc-claudine-gay-harvard-university-antisemitism-row',
+            'ext': 'mp4',
+            'title': 'BBC deletes post after furious backlash over headline downplaying antisemitism',
+            'description': 'The post was criticised by former employers of the broadcaster',
+        },
+    }, {
+        'url': 'https://www.gbnews.uk/watchlive',
+        'info_dict': {
+            'id': '1069',
+            'display_id': 'watchlive',
+            'ext': 'mp4',
+            'title': 'GB News Live',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': 'm3u8',
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url).split('/')[-1]
+
+        webpage = self._download_webpage(url, display_id)
+        # extraction based on https://github.com/ytdl-org/youtube-dl/issues/29341
+        '''
+        <div id="video-106908"
+            class="simplestream"
+            data-id="GB001"
+            data-type="vod"
+            data-key="3Li3Nt2Qs8Ct3Xq9Fi5Uy0Mb2Bj0Qs"
+            data-token="f9c317c727dc07f515b20036c8ef14a6"
+            data-expiry="1624300052"
+            data-uvid="37900558"
+            data-poster="https://thumbnails.simplestreamcdn.com/gbnews/ondemand/37900558.jpg?width=700&"
+            data-npaw="false"
+            data-env="production">
+        '''
+        # exception if no match
+        video_data = self._search_regex(
+            r'(<div\s[^>]*\bclass\s*=\s*(\'|")(?!.*sidebar\b)simplestream(?:\s[\s\w$-]*)?\2[^>]*>)',
+            webpage, 'video data')
+
+        video_data = extract_attributes(video_data)
+        ss_id = video_data.get('data-id')
+        if not ss_id:
+            raise ExtractorError('Simplestream ID not found')
+
+        json_data = self._download_json(
+            self._SSMP_URL, display_id,
+            note='Downloading Simplestream JSON metadata',
+            errnote='Unable to download Simplestream JSON metadata',
+            query={
+                'id': ss_id,
+                'env': video_data.get('data-env', 'production'),
+            }, fatal=False)
+
+        meta_url = traverse_obj(json_data, ('response', 'api_hostname'))
+        if not meta_url:
+            raise ExtractorError('No API host found')
+
+        uvid = video_data['data-uvid']
+        dtype = video_data.get('data-type')
+        stream_data = self._download_json(
+            '%s/api/%s/stream/%s' % (meta_url, 'show' if dtype == 'vod' else dtype, uvid),
+            uvid,
+            query={
+                'key': video_data.get('data-key'),
+                'platform': self._PLATFORM,
+            },
+            headers={
+                'Token': video_data.get('data-token'),
+                'Token-Expiry': video_data.get('data-expiry'),
+                'Uvid': uvid,
+            }, fatal=False)
+
+        stream_url = traverse_obj(stream_data, (
+            'response', 'stream', T(url_or_none)))
+        if not stream_url:
+            raise ExtractorError('No stream data/URL')
+
+        # now known to be a dict
+        stream_data = stream_data['response']
+        drm = stream_data.get('drm')
+        if drm:
+            self.report_drm(uvid)
+
+        formats = self._extract_m3u8_formats(
+            stream_url, uvid, ext='mp4', entry_protocol='m3u8_native',
+            fatal=False)
+        # exception if no formats
+        self._sort_formats(formats)
+
+        return {
+            'id': uvid,
+            'display_id': display_id,
+            'title': (traverse_obj(stream_data, ('title', T(txt_or_none)))
+                      or self._og_search_title(webpage, default=None)
+                      or display_id.replace('-', ' ').capitalize()),
+            'description': self._og_search_description(webpage, default=None),
+            'thumbnail': (traverse_obj(video_data, ('data-poster', T(url_or_none)))
+                          or self._og_search_thumbnail(webpage)),
+            'formats': formats,
+            'is_live': (dtype == 'live') or None,
+        }

From 40bd5c18153afe765caa6726302ee1dd8a9a2ce6 Mon Sep 17 00:00:00 2001
From: Aaron Tan <70739609+aaron-tan@users.noreply.github.com>
Date: Thu, 22 Feb 2024 23:54:07 +1100
Subject: [PATCH 312/312] [caffeine.tv] Add new extractor (#32514)

* Add CaffeineTVIE info extractor to support site caffeine.tv

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/caffeine.py   | 79 ++++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |  1 +
 2 files changed, 80 insertions(+)
 create mode 100644 youtube_dl/extractor/caffeine.py

diff --git a/youtube_dl/extractor/caffeine.py b/youtube_dl/extractor/caffeine.py
new file mode 100644
index 000000000..bffedb9a7
--- /dev/null
+++ b/youtube_dl/extractor/caffeine.py
@@ -0,0 +1,79 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    merge_dicts,
+    parse_iso8601,
+    T,
+    traverse_obj,
+    txt_or_none,
+    urljoin,
+)
+
+
+class CaffeineTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/]+/video/(?P<id>[0-9a-f-]+)'
+    _TESTS = [{
+        'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e',
+        'info_dict': {
+            'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e',
+            'ext': 'mp4',
+            'title': 'GOOOOD MORNINNNNN #highlights',
+            'timestamp': 1654702180,
+            'upload_date': '20220608',
+            'uploader': 'TsuSurf',
+            'duration': 3145,
+            'age_limit': 17,
+        },
+        'params': {
+            'format': 'bestvideo',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        json_data = self._download_json(
+            'https://api.caffeine.tv/social/public/activity/' + video_id,
+            video_id)
+        broadcast_info = traverse_obj(json_data, ('broadcast_info', T(dict))) or {}
+        title = broadcast_info['broadcast_title']
+        video_url = broadcast_info['video_url']
+
+        ext = determine_ext(video_url)
+        if ext == 'm3u8':
+            formats = self._extract_m3u8_formats(
+                video_url, video_id, 'mp4', entry_protocol='m3u8',
+                fatal=False)
+        else:
+            formats = [{'url': video_url}]
+        self._sort_formats(formats)
+
+        return merge_dicts({
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+        }, traverse_obj(json_data, {
+            'uploader': ((None, 'user'), 'username'),
+        }, get_all=False), traverse_obj(json_data, {
+            'like_count': ('like_count', T(int_or_none)),
+            'view_count': ('view_count', T(int_or_none)),
+            'comment_count': ('comment_count', T(int_or_none)),
+            'tags': ('tags', Ellipsis, T(txt_or_none)),
+            'is_live': 'is_live',
+            'uploader': ('user', 'name'),
+        }), traverse_obj(broadcast_info, {
+            'duration': ('content_duration', T(int_or_none)),
+            'timestamp': ('broadcast_start_time', T(parse_iso8601)),
+            'thumbnail': ('preview_image_path', T(lambda u: urljoin(url, u))),
+            'age_limit': ('content_rating', T(lambda r: r and {
+                # assume Apple Store ratings [1]
+                # 1. https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
+                'FOUR_PLUS': 0,
+                'NINE_PLUS': 9,
+                'TWELVE_PLUS': 12,
+                'SEVENTEEN_PLUS': 17,
+            }.get(r, 17))),
+        }))
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index b6d0f42f5..29b655c94 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -159,6 +159,7 @@ from .businessinsider import BusinessInsiderIE
 from .buzzfeed import BuzzFeedIE
 from .byutv import BYUtvIE
 from .c56 import C56IE
+from .caffeine import CaffeineTVIE
 from .callin import CallinIE
 from .camdemy import (
     CamdemyIE,