From ab7c61ca29ed1d1216d463d01794eb112a9144d5 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 6 Jan 2025 01:22:16 +0000
Subject: [PATCH 1/8] [YouTube] Apply code style changes, trailing commas, etc

---
 youtube_dl/extractor/youtube.py | 60 +++++++++++++++++----------------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 7ea30fd40..e9603d155 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -9,6 +9,7 @@ import json
 import os.path
 import random
 import re
+import string
 import time
 import traceback
 
@@ -67,6 +68,7 @@ from ..utils import (
 
 class YoutubeBaseInfoExtractor(InfoExtractor):
     """Provide base functions for Youtube extractors"""
+
     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
 
@@ -138,7 +140,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                 [2, 1, None, 1,
                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
                  None, [], 4],
-                1, [None, None, []], None, None, None, True
+                1, [None, None, []], None, None, None, True,
             ],
             username,
         ]
@@ -160,7 +162,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             None, 1, None, [1, None, None, None, [password, None, True]],
             [
                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
-                1, [None, None, []], None, None, None, True
+                1, [None, None, []], None, None, None, True,
             ]]
 
         challenge_results = req(
@@ -213,7 +215,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                     user_hash, None, 2, None,
                     [
                         9, None, None, None, None, None, None, None,
-                        [None, tfa_code, True, 2]
+                        [None, tfa_code, True, 2],
                     ]]
 
                 tfa_results = req(
@@ -284,7 +286,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             'client': {
                 'clientName': 'WEB',
                 'clientVersion': '2.20201021.03.00',
-            }
+            },
         },
     }
 
@@ -385,7 +387,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                 'client': {
                     'clientName': 'WEB',
                     'clientVersion': '2.20201021.03.00',
-                }
+                },
             },
             'query': query,
         }
@@ -462,7 +464,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             #       (HTML, videodetails, metadata, renderers)
             'name': ('content', 'author', (('ownerChannelName', None), 'title'), ['text']),
             'url': ('href', 'ownerProfileUrl', 'vanityChannelUrl',
-                    ['navigationEndpoint', 'browseEndpoint', 'canonicalBaseUrl'])
+                    ['navigationEndpoint', 'browseEndpoint', 'canonicalBaseUrl']),
         }
         if any((videodetails, metadata, renderers)):
             result = (
@@ -671,7 +673,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
                 'description': '',
                 'uploader': '8KVIDEO',
-                'title': 'UHDTV TEST 8K VIDEO.mp4'
+                'title': 'UHDTV TEST 8K VIDEO.mp4',
             },
             'params': {
                 'youtube_include_dash_manifest': True,
@@ -711,7 +713,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@theamazingatheist',
                 'title': 'Burning Everyone\'s Koran',
                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
-            }
+            },
         },
         # Age-gated videos
         {
@@ -839,7 +841,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             },
             'expected_warnings': [
                 'DASH manifest missing',
-            ]
+            ],
         },
         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
         {
@@ -1820,8 +1822,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         # cpn generation algorithm is reverse engineered from base.js.
         # In fact it works even with dummy cpn.
-        CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
-        cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
+        CPN_ALPHABET = string.ascii_letters + string.digits + '-_'
+        cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
 
         # more consistent results setting it to right before the end
         qs = parse_qs(playback_url)
@@ -1881,8 +1883,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
         if mobj is None:
             raise ExtractorError('Invalid URL: %s' % url)
-        video_id = mobj.group(2)
-        return video_id
+        return mobj.group(2)
 
     def _extract_chapters_from_json(self, data, video_id, duration):
         chapters_list = try_get(
@@ -2035,7 +2036,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             headers = {
                 'X-YouTube-Client-Name': '85',
                 'X-YouTube-Client-Version': '2.0',
-                'Origin': 'https://www.youtube.com'
+                'Origin': 'https://www.youtube.com',
             }
 
             video_info = self._call_api('player', query, video_id, fatal=False, headers=headers)
@@ -2064,8 +2065,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
 
         search_meta = (
-            lambda x: self._html_search_meta(x, webpage, default=None)) \
-            if webpage else lambda x: None
+            (lambda x: self._html_search_meta(x, webpage, default=None))
+            if webpage else lambda _: None)
 
         video_details = player_response.get('videoDetails') or {}
         microformat = try_get(
@@ -2137,7 +2138,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         def build_fragments(f):
             return LazyList({
                 'url': update_url_query(f['url'], {
-                    'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, f['filesize']))
+                    'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, f['filesize'])),
                 })
             } for range_start in range(0, f['filesize'], CHUNK_SIZE))
 
@@ -2236,7 +2237,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'protocol': 'http_dash_segments',
                     'fragments': build_fragments(dct),
                 } if dct['filesize'] else {
-                    'downloader_options': {'http_chunk_size': CHUNK_SIZE}  # No longer useful?
+                    'downloader_options': {'http_chunk_size': CHUNK_SIZE},  # No longer useful?
                 })
 
             formats.append(dct)
@@ -2454,7 +2455,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             info['subtitles'] = subtitles
 
         parsed_url = compat_urllib_parse_urlparse(url)
-        for component in [parsed_url.fragment, parsed_url.query]:
+        for component in (parsed_url.fragment, parsed_url.query):
             query = compat_parse_qs(component)
             for k, v in query.items():
                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
@@ -2684,7 +2685,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'title': 'Super Cooper Shorts - Shorts',
             'uploader': 'Super Cooper Shorts',
             'uploader_id': '@SuperCooperShorts',
-        }
+        },
     }, {
         # Channel that does not have a Shorts tab. Test should just download videos on Home tab instead
         'url': 'https://www.youtube.com/@emergencyawesome/shorts',
@@ -2738,7 +2739,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
             'uploader': 'ThirstForScience',
             'uploader_id': '@ThirstForScience',
-        }
+        },
     }, {
         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
         'only_matching': True,
@@ -3037,7 +3038,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'uploader': '3Blue1Brown',
             'uploader_id': '@3blue1brown',
             'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
-        }
+        },
     }]
 
     @classmethod
@@ -3335,7 +3336,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'client': {
                 'clientName': 'WEB',
                 'clientVersion': client_version,
-            }
+            },
         }
         visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
 
@@ -3354,7 +3355,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                 headers['x-goog-visitor-id'] = visitor_data
             data['continuation'] = continuation['continuation']
             data['clickTracking'] = {
-                'clickTrackingParams': continuation['itct']
+                'clickTrackingParams': continuation['itct'],
             }
             count = 0
             retries = 3
@@ -3613,7 +3614,7 @@ class YoutubePlaylistIE(InfoExtractor):
             'uploader': 'milan',
             'uploader_id': '@milan5503',
             'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
-        }
+        },
     }, {
         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
         'playlist_mincount': 455,
@@ -3623,7 +3624,7 @@ class YoutubePlaylistIE(InfoExtractor):
             'uploader': 'LBK',
             'uploader_id': '@music_king',
             'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
-        }
+        },
     }, {
         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
         'only_matching': True,
@@ -3734,7 +3735,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
         'info_dict': {
             'id': 'youtube-dl test video',
             'title': 'youtube-dl test video',
-        }
+        },
     }]
 
     def _get_n_results(self, query, n):
@@ -3754,7 +3755,7 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
         'info_dict': {
             'id': 'youtube-dl test video',
             'title': 'youtube-dl test video',
-        }
+        },
     }]
 
 
@@ -3769,7 +3770,7 @@ class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
             'id': 'youtube-dl test video',
             'title': 'youtube-dl test video',
         },
-        'params': {'playlistend': 5}
+        'params': {'playlistend': 5},
     }, {
         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
         'only_matching': True,
@@ -3785,6 +3786,7 @@ class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
     """
     Base class for feed extractors
+
     Subclasses must define the _FEED_NAME property.
     """
     _LOGIN_REQUIRED = True

From 00ad2b8ca12d4f9b830ed83876d0d1ab3d698675 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 6 Jan 2025 01:24:30 +0000
Subject: [PATCH 2/8] [YouTube] Refactor subtitle processing * move to internal
 function * use `traverse-obj()`

---
 youtube_dl/extractor/youtube.py | 46 +++++++++++++++++----------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index e9603d155..56957a661 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2415,9 +2415,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'is_live': is_live,
         }
 
-        pctr = try_get(
+        pctr = traverse_obj(
             player_response,
-            lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
+            ('captions', 'playerCaptionsTracklistRenderer', T(dict)))
         if pctr:
             def process_language(container, base_url, lang_code, query):
                 lang_subs = []
@@ -2431,28 +2431,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     })
                 container[lang_code] = lang_subs
 
-            subtitles = {}
-            for caption_track in (pctr.get('captionTracks') or []):
-                base_url = caption_track.get('baseUrl')
-                if not base_url:
-                    continue
-                if caption_track.get('kind') != 'asr':
-                    lang_code = caption_track.get('languageCode')
-                    if not lang_code:
+            def process_subtitles():
+                subtitles = {}
+                for caption_track in traverse_obj(pctr, (
+                        'captionTracks', lambda _, v: v.get('baseUrl'))):
+                    if not base_url:
                         continue
-                    process_language(
-                        subtitles, base_url, lang_code, {})
-                    continue
-                automatic_captions = {}
-                for translation_language in (pctr.get('translationLanguages') or []):
-                    translation_language_code = translation_language.get('languageCode')
-                    if not translation_language_code:
+                    if caption_track.get('kind') != 'asr':
+                        lang_code = caption_track.get('languageCode')
+                        if not lang_code:
+                            continue
+                        process_language(
+                            subtitles, base_url, lang_code, {})
                         continue
-                    process_language(
-                        automatic_captions, base_url, translation_language_code,
-                        {'tlang': translation_language_code})
-                info['automatic_captions'] = automatic_captions
-            info['subtitles'] = subtitles
+                    automatic_captions = {}
+                    for translation_language in traverse_obj(pctr, (
+                            'translationLanguages', lambda _, v: v.get('languageCode'))):
+                        translation_language_code = translation_language['languageCode']
+                        process_language(
+                            automatic_captions, base_url, translation_language_code,
+                            {'tlang': translation_language_code})
+                    info['automatic_captions'] = automatic_captions
+                info['subtitles'] = subtitles
+
+            process_subtitles()
 
         parsed_url = compat_urllib_parse_urlparse(url)
         for component in (parsed_url.fragment, parsed_url.query):

From 1036478d130c5f2001eca2d7d12558abe601d933 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 6 Jan 2025 01:39:04 +0000
Subject: [PATCH 3/8] [YouTube] Endure subtitle URLs are complete * WEB URLs
 are, MWEB not * resolves #33017

---
 youtube_dl/extractor/youtube.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 56957a661..6171df84a 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2435,6 +2435,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 subtitles = {}
                 for caption_track in traverse_obj(pctr, (
                         'captionTracks', lambda _, v: v.get('baseUrl'))):
+                    base_url = self._yt_urljoin(caption_track['baseUrl'])
                     if not base_url:
                         continue
                     if caption_track.get('kind') != 'asr':

From 21fff051217d1c14a7d50fa752052eadbcafee4e Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 15 Jan 2025 03:19:15 +0000
Subject: [PATCH 4/8] [YouTube] Switch to TV API client * thx
 yt-dlp/yt-dlp#12059

---
 youtube_dl/extractor/youtube.py | 102 ++++++++++++++++++++++++++------
 1 file changed, 83 insertions(+), 19 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 6171df84a..1424277ac 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -31,7 +31,9 @@ from ..utils import (
     dict_get,
     error_to_compat_str,
     ExtractorError,
+    filter_dict,
     float_or_none,
+    get_first,
     extract_attributes,
     get_element_by_attribute,
     int_or_none,
@@ -82,6 +84,34 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 
     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
 
+    _INNERTUBE_CLIENTS = {
+        # mweb has 'ultralow' formats
+        # See: https://github.com/yt-dlp/yt-dlp/pull/557
+        'mweb': {
+            'INNERTUBE_CONTEXT': {
+                'client': {
+                    'clientName': 'MWEB',
+                    'clientVersion': '2.20241202.07.00',
+                    # mweb previously did not require PO Token with this UA
+                    'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
+                },
+            },
+            'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
+            'REQUIRE_PO_TOKEN': True,
+            'SUPPORTS_COOKIES': True,
+        },
+        'tv': {
+            'INNERTUBE_CONTEXT': {
+                'client': {
+                    'clientName': 'TVHTML5',
+                    'clientVersion': '7.20241201.18.00',
+                },
+            },
+            'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
+            'SUPPORTS_COOKIES': True,
+        },
+    }
+
     def _login(self):
         """
         Attempt to log in to YouTube.
@@ -321,19 +351,24 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             '{0} {1} {2}'.format(time_now, self._SAPISID, origin).encode('utf-8')).hexdigest()
         return 'SAPISIDHASH {0}_{1}'.format(time_now, sapisidhash)
 
-    def _call_api(self, ep, query, video_id, fatal=True, headers=None):
+    def _call_api(self, ep, query, video_id, fatal=True, headers=None,
+                  note='Downloading API JSON'):
         data = self._DEFAULT_API_DATA.copy()
         data.update(query)
         real_headers = {'content-type': 'application/json'}
         if headers:
             real_headers.update(headers)
 
+        # was: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
+        api_key = self.get_param('youtube_innertube_key')
         return self._download_json(
             'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
-            note='Downloading API JSON', errnote='Unable to download API page',
+            note=note, errnote='Unable to download API page',
             data=json.dumps(data).encode('utf8'), fatal=fatal,
-            headers=real_headers,
-            query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
+            headers=real_headers, query=filter_dict({
+                'key': api_key,
+                'prettyPrint': 'false',
+            }))
 
     def _extract_yt_initial_data(self, video_id, webpage):
         return self._parse_json(
@@ -342,6 +377,22 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
             video_id)
 
+    def _extract_visitor_data(self, *args):
+        """
+        Extract visitorData from an API response or ytcfg
+
+        Appears to be used to track session state
+        """
+        visitor_data = self.get_param('youtube_visitor_data')
+        if visitor_data:
+            return visitor_data
+
+        return get_first(
+            args, (('VISITOR_DATA',
+                    ('INNERTUBE_CONTEXT', 'client', 'visitorData'),
+                    ('responseContext', 'visitorData')),
+                   T(compat_str)))
+
     def _extract_ytcfg(self, video_id, webpage):
         return self._parse_json(
             self._search_regex(
@@ -1957,6 +2008,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if sts:
                 pb_context['signatureTimestamp'] = sts
 
+            client = traverse_obj(self._INNERTUBE_CLIENTS, (
+                lambda _, v: not v.get('REQUIRE_PO_TOKEN')),
+                get_all=False)
+
             query = {
                 'playbackContext': {
                     'contentPlaybackContext': pb_context,
@@ -1964,30 +2019,39 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'racyCheckOk': True,
                 },
                 'context': {
-                    'client': {
-                        'clientName': 'MWEB',
-                        'clientVersion': '2.20241202.07.00',
-                        'hl': 'en',
-                        'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
-                        'timeZone': 'UTC',
-                        'utcOffsetMinutes': 0,
-                    },
+                    'client': merge_dicts(
+                        traverse_obj(client, ('INNERTUBE_CONTEXT', 'client')), {
+                            'hl': 'en',
+                            'timeZone': 'UTC',
+                            'utcOffsetMinutes': 0,
+                        }),
                 },
                 'videoId': video_id,
             }
-            headers = {
-                'X-YouTube-Client-Name': '2',
-                'X-YouTube-Client-Version': '2.20241202.07.00',
-                'Origin': origin,
+
+            headers = merge_dicts({
                 'Sec-Fetch-Mode': 'navigate',
-                'User-Agent': query['context']['client']['userAgent'],
-            }
+                'Origin': origin,
+                # 'X-Goog-Visitor-Id': self._extract_visitor_data(ytcfg) or '',
+            }, traverse_obj(client, {
+                'X-YouTube-Client-Name': 'INNERTUBE_CONTEXT_CLIENT_NAME',
+                'X-YouTube-Client-Version': (
+                    'INNERTUBE_CONTEXT', 'client', 'clientVersion'),
+                'User-Agent': (
+                    'INNERTUBE_CONTEXT', 'client', 'userAgent'),
+            }))
+
             auth = self._generate_sapisidhash_header(origin)
             if auth is not None:
                 headers['Authorization'] = auth
                 headers['X-Origin'] = origin
 
-            player_response = self._call_api('player', query, video_id, fatal=False, headers=headers)
+            player_response = self._call_api(
+                'player', query, video_id, fatal=False, headers=headers,
+                note=join_nonempty(
+                    'Downloading', traverse_obj(query, (
+                        'context', 'client', 'clientName')),
+                    'API JSON', delim=' '))
 
         def is_agegated(playability):
             if not isinstance(playability, dict):

From 55ad8a24cacee03a91fe70d8d48aa9a02cc0ab11 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 15 Jan 2025 03:22:56 +0000
Subject: [PATCH 5/8] [YouTube] Support `...
 /feeds/videos.xml?playlist_id={pl_id}`

---
 youtube_dl/extractor/youtube.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 1424277ac..f0406b357 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -3601,10 +3601,23 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
     def _real_extract(self, url):
         item_id = self._match_id(url)
         url = update_url(url, netloc='www.youtube.com')
-        # Handle both video/playlist URLs
         qs = parse_qs(url)
-        video_id = qs.get('v', [None])[0]
-        playlist_id = qs.get('list', [None])[0]
+
+        def qs_get(key, default=None):
+            return qs.get(key, [default])[-1]
+
+        # Go around for /feeds/videos.xml?playlist_id={pl_id}
+        if item_id == 'feeds' and '/feeds/videos.xml?' in url:
+            playlist_id = qs_get('playlist_id')
+            if playlist_id:
+                return self.url_result(
+                    update_url_query('https://www.youtube.com/playlist', {
+                        'list': playlist_id,
+                    }), ie=self.ie_key(), video_id=playlist_id)
+
+        # Handle both video/playlist URLs
+        video_id = qs_get('v')
+        playlist_id = qs_get('list')
         if video_id and playlist_id:
             if self._downloader.params.get('noplaylist'):
                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

From b09442a2f4a8d255569abf0bb6b4867c53d0c2e9 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 19 Jan 2025 01:18:34 +0000
Subject: [PATCH 6/8] [YouTube] Also use ios client when is_live

---
 youtube_dl/extractor/youtube.py | 118 ++++++++++++++++++++++----------
 1 file changed, 81 insertions(+), 37 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index f0406b357..32e836d49 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -85,6 +85,22 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
 
     _INNERTUBE_CLIENTS = {
+        'ios': {
+            'INNERTUBE_CONTEXT': {
+                'client': {
+                    'clientName': 'IOS',
+                    'clientVersion': '19.45.4',
+                    'deviceMake': 'Apple',
+                    'deviceModel': 'iPhone16,2',
+                    'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)',
+                    'osName': 'iPhone',
+                    'osVersion': '18.1.0.22B83',
+                },
+            },
+            'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
+            'REQUIRE_JS_PLAYER': False,
+            'REQUIRE_PO_TOKEN': True,
+        },
         # mweb has 'ultralow' formats
         # See: https://github.com/yt-dlp/yt-dlp/pull/557
         'mweb': {
@@ -110,6 +126,17 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
             'SUPPORTS_COOKIES': True,
         },
+        'web': {
+            'INNERTUBE_CONTEXT': {
+                'client': {
+                    'clientName': 'WEB',
+                    'clientVersion': '2.20241126.01.00',
+                },
+            },
+            'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
+            'REQUIRE_PO_TOKEN': True,
+            'SUPPORTS_COOKIES': True,
+        },
     }
 
     def _login(self):
@@ -1995,6 +2022,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             player_response = self._extract_yt_initial_variable(
                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
                 video_id, 'initial player response')
+        is_live = traverse_obj(player_response, ('videoDetails', 'isLive'))
+
         if False and not player_response:
             player_response = self._call_api(
                 'player', {'videoId': video_id}, video_id)
@@ -2008,50 +2037,65 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if sts:
                 pb_context['signatureTimestamp'] = sts
 
-            client = traverse_obj(self._INNERTUBE_CLIENTS, (
-                lambda _, v: not v.get('REQUIRE_PO_TOKEN')),
-                get_all=False)
+            client_names = traverse_obj(self._INNERTUBE_CLIENTS, (
+                T(dict.items), lambda _, k_v: not k_v[1].get('REQUIRE_PO_TOKEN'),
+                0))[:1]
 
-            query = {
-                'playbackContext': {
-                    'contentPlaybackContext': pb_context,
-                    'contentCheckOk': True,
-                    'racyCheckOk': True,
-                },
-                'context': {
-                    'client': merge_dicts(
-                        traverse_obj(client, ('INNERTUBE_CONTEXT', 'client')), {
-                            'hl': 'en',
-                            'timeZone': 'UTC',
-                            'utcOffsetMinutes': 0,
-                        }),
-                },
-                'videoId': video_id,
-            }
+            if is_live and 'ios' not in client_names:
+                client_names.append('ios')
 
-            headers = merge_dicts({
+            headers = {
                 'Sec-Fetch-Mode': 'navigate',
                 'Origin': origin,
                 # 'X-Goog-Visitor-Id': self._extract_visitor_data(ytcfg) or '',
-            }, traverse_obj(client, {
-                'X-YouTube-Client-Name': 'INNERTUBE_CONTEXT_CLIENT_NAME',
-                'X-YouTube-Client-Version': (
-                    'INNERTUBE_CONTEXT', 'client', 'clientVersion'),
-                'User-Agent': (
-                    'INNERTUBE_CONTEXT', 'client', 'userAgent'),
-            }))
-
+            }
             auth = self._generate_sapisidhash_header(origin)
             if auth is not None:
                 headers['Authorization'] = auth
                 headers['X-Origin'] = origin
 
-            player_response = self._call_api(
-                'player', query, video_id, fatal=False, headers=headers,
-                note=join_nonempty(
-                    'Downloading', traverse_obj(query, (
-                        'context', 'client', 'clientName')),
-                    'API JSON', delim=' '))
+            for client in traverse_obj(self._INNERTUBE_CLIENTS, (client_names, T(dict))):
+
+                query = {
+                    'playbackContext': {
+                        'contentPlaybackContext': pb_context,
+                        'contentCheckOk': True,
+                        'racyCheckOk': True,
+                    },
+                    'context': {
+                        'client': merge_dicts(
+                            traverse_obj(client, ('INNERTUBE_CONTEXT', 'client')), {
+                                'hl': 'en',
+                                'timeZone': 'UTC',
+                                'utcOffsetMinutes': 0,
+                            }),
+                    },
+                    'videoId': video_id,
+                }
+
+                api_headers = merge_dicts(headers, traverse_obj(client, {
+                    'X-YouTube-Client-Name': 'INNERTUBE_CONTEXT_CLIENT_NAME',
+                    'X-YouTube-Client-Version': (
+                        'INNERTUBE_CONTEXT', 'client', 'clientVersion'),
+                    'User-Agent': (
+                        'INNERTUBE_CONTEXT', 'client', 'userAgent'),
+                }))
+
+                api_player_response = self._call_api(
+                    'player', query, video_id, fatal=False, headers=api_headers,
+                    note=join_nonempty(
+                        'Downloading', traverse_obj(query, (
+                            'context', 'client', 'clientName')),
+                        'API JSON', delim=' '))
+
+                hls = [
+                    traverse_obj(
+                        resp, ('streamingData', 'hlsManifestUrl', T(url_or_none)))
+                    for resp in (player_response, api_player_response)]
+                if not hls[0] and hls[1]:
+                    player_response['streamingData']['hlsManifestUrl'] = hls[1]
+                else:
+                    player_response.update(api_player_response or {})
 
         def is_agegated(playability):
             if not isinstance(playability, dict):
@@ -2194,6 +2238,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         itag_qualities = {}
         q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
         CHUNK_SIZE = 10 << 20
+        is_live = video_details.get('isLive')
 
         streaming_data = player_response.get('streamingData') or {}
         streaming_formats = streaming_data.get('formats') or []
@@ -2338,7 +2383,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         hls_manifest_url = streaming_data.get('hlsManifestUrl')
         if hls_manifest_url:
             for f in self._extract_m3u8_formats(
-                    hls_manifest_url, video_id, 'mp4', fatal=False):
+                    hls_manifest_url, video_id, 'mp4',
+                    entry_protocol='m3u8_native', live=is_live, fatal=False):
                 if process_manifest_format(
                         f, 'hls', None, self._search_regex(
                             r'/itag/(\d+)', f['url'], 'itag', default=None)):
@@ -2444,8 +2490,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 # Strictly de-prioritize damaged formats
                 f['preference'] = -10
 
-        is_live = video_details.get('isLive')
-
         owner_profile_url = self._yt_urljoin(self._extract_author_var(
             webpage, 'url', videodetails=video_details, metadata=microformat))
 

From 63fb0fc4159397618b12fa115f957b9ba70f3f88 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 20 Jan 2025 13:23:54 +0000
Subject: [PATCH 7/8] [YouTube] Retain .videoDetails members from all player
 responses

---
 youtube_dl/extractor/youtube.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 32e836d49..edaae5bd3 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2095,7 +2095,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 if not hls[0] and hls[1]:
                     player_response['streamingData']['hlsManifestUrl'] = hls[1]
                 else:
+                    video_details = merge_dicts(*traverse_obj(
+                        (player_response, api_player_response),
+                        (Ellipsis, 'videoDetails', T(dict))))
                     player_response.update(api_player_response or {})
+                    player_response['videoDetails'] = video_details
 
         def is_agegated(playability):
             if not isinstance(playability, dict):

From 5975d7bb96095fae7c35e7cfcd819255a5b57087 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 22 Jan 2025 06:52:40 +0000
Subject: [PATCH 8/8] [YouTube] Use X-Goog-Visitor-Id * required with tv player
 client * resolves #33030

---
 youtube_dl/extractor/youtube.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index edaae5bd3..c93a2a1f9 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -142,6 +142,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     def _login(self):
         """
         Attempt to log in to YouTube.
+
         True is returned if successful or skipped.
         False is returned if login failed.
 
@@ -2040,6 +2041,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             client_names = traverse_obj(self._INNERTUBE_CLIENTS, (
                 T(dict.items), lambda _, k_v: not k_v[1].get('REQUIRE_PO_TOKEN'),
                 0))[:1]
+            if 'web' not in client_names:
+                # webpage links won't download: ignore links and playability
+                player_response = filter_dict(
+                    player_response or {},
+                    lambda k, _: k not in ('streamingData', 'playabilityStatus'))
 
             if is_live and 'ios' not in client_names:
                 client_names.append('ios')
@@ -2047,7 +2053,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             headers = {
                 'Sec-Fetch-Mode': 'navigate',
                 'Origin': origin,
-                # 'X-Goog-Visitor-Id': self._extract_visitor_data(ytcfg) or '',
+                'X-Goog-Visitor-Id': self._extract_visitor_data(ytcfg) or '',
             }
             auth = self._generate_sapisidhash_header(origin)
             if auth is not None:
@@ -2059,9 +2065,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 query = {
                     'playbackContext': {
                         'contentPlaybackContext': pb_context,
-                        'contentCheckOk': True,
-                        'racyCheckOk': True,
                     },
+                    'contentCheckOk': True,
+                    'racyCheckOk': True,
                     'context': {
                         'client': merge_dicts(
                             traverse_obj(client, ('INNERTUBE_CONTEXT', 'client')), {
@@ -2088,11 +2094,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             'context', 'client', 'clientName')),
                         'API JSON', delim=' '))
 
-                hls = [
-                    traverse_obj(
-                        resp, ('streamingData', 'hlsManifestUrl', T(url_or_none)))
-                    for resp in (player_response, api_player_response)]
-                if not hls[0] and hls[1]:
+                hls = traverse_obj(
+                    (player_response, api_player_response),
+                    (Ellipsis, 'streamingData', 'hlsManifestUrl', T(url_or_none)))
+                if len(hls) == 2 and not hls[0] and hls[1]:
                     player_response['streamingData']['hlsManifestUrl'] = hls[1]
                 else:
                     video_details = merge_dicts(*traverse_obj(
@@ -3467,7 +3472,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             if not continuation:
                 break
             if visitor_data:
-                headers['x-goog-visitor-id'] = visitor_data
+                headers['X-Goog-Visitor-Id'] = visitor_data
             data['continuation'] = continuation['continuation']
             data['clickTracking'] = {
                 'clickTrackingParams': continuation['itct'],