Merge ce604ae7dce406aaf8fde8f25ee40bd390255b58 into 673277e510ebd996b62a2fcc76169bf3cce29910

[YouTube] Fix 91b1569
[YouTube] Fix channel playlist extraction (#33074 )
2025-07-18 01:14:14 +09:00 · 2025-03-08 23:46:08 +00:00 · 2025-02-28 01:02:20 +00:00 · 2025-02-28 00:02:10 +00:00 · 2021-08-03 00:16:49 -04:00 · 2021-08-02 23:27:43 -04:00
3 changed files with 136 additions and 27 deletions
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@ -393,6 +393,19 @@ class FFmpegFD(ExternalFD):
            # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
            # http://trac.ffmpeg.org/ticket/6125#comment:10
            args += ['-seekable', '1' if seekable else '0']
+        http_seekable = info_dict.get('_http_seekable')
+        if http_seekable is not None:
+            # setting -http_seekable prevents ffmpeg from guessing if the server
+            # supports seeking in other kinds of requests (by adding the same header
+            # as above: `Range: bytes=0-`)
+            args += ['-http_seekable', '1' if http_seekable else '0']
+        icy = info_dict.get('_icy')
+        if icy is not None:
+            # setting -icy 0 prevents ffmpeg from sending the header `Icy-Metadata: 1`,
+            # which can cause also problems
+            # https://github.com/ytdl-org/youtube-dl/pull/29688
+            # https://trac.ffmpeg.org/ticket/5460#comment:5
+            args += ['-icy', '1' if icy else '0']

        args += self._configuration_args()

--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@ -477,43 +477,94 @@ class NBCOlympicsIE(InfoExtractor):
 class NBCOlympicsStreamIE(AdobePassIE):
    IE_NAME = 'nbcolympics:stream'
    _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
-    _TEST = {
-        'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8',
+    _TESTS = [
+        # "Tokenized" .m3u8 source URL
+        {
+            'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11',
            'info_dict': {
-            'id': '203493',
+                'id': '2019740',
                'ext': 'mp4',
-            'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+                'title': r"re:Women's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$",
            },
            'params': {
                # m3u8 download
                'skip_download': True,
            },
-    }
+        },
+        # Plain .m3u8 source URL
+        {
+            'url': 'https://stream.nbcolympics.com/gymnastics-event-finals-mens-floor-pommel-horse-womens-vault-bars',
+            'info_dict': {
+                'id': '2021729',
+                'ext': 'mp4',
+                'title': r're:Event Finals: M Floor, W Vault, M Pommel, W Uneven Bars [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+    ]
    _DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json'
+    _LEAP_URL_TEMPLATE = 'https://api-leap.nbcsports.com/feeds/assets/%s?application=NBCOlympics&platform=%s&format=nbc-player&env=staging'

    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid')
-        resource = self._search_regex(
-            r"resource\s*=\s*'(.+)';", webpage,
-            'resource').replace("' + pid + '", pid)
+
        event_config = self._download_json(
            self._DATA_URL_TEMPLATE % ('event_config', pid),
-            pid)['eventConfig']
+            pid,
+            'Downloading event config',
+        )['eventConfig']
+        resource = event_config.get('resourceId', 'NBCOlympics')
        title = self._live_title(event_config['eventTitle'])
-        source_url = self._download_json(
-            self._DATA_URL_TEMPLATE % ('live_sources', pid),
-            pid)['videoSources'][0]['sourceUrl']
+
+        leap_config = self._download_json(
+            self._LEAP_URL_TEMPLATE % (pid, 'desktop'),
+            pid,
+            'Downloading leap config',
+        )
+        source_url = leap_config['videoSources'][0]['cdnSources']['primary'][0]['sourceUrl']
+
+        ap_resource = self._get_mvpd_resource(
+            resource,
+            re.sub(r'[^\w\d ]+', '', event_config['eventTitle']),
+            pid,
+            event_config.get('ratingId', 'NO VALUE'),
+        )
        media_token = self._extract_mvpd_auth(
-            url, pid, event_config.get('requestorId', 'NBCOlympics'), resource)
-        formats = self._extract_m3u8_formats(self._download_webpage(
-            'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={
+            url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource)
+
+        if event_config.get('cdnToken') is True:
+            source_url = self._download_json(
+                'https://tokens.playmakerservices.com/',
+                pid,
+                'Retrieving tokenized URL',
+                data=json.dumps({
+                    'application': 'NBCSports',
+                    'authentication-type': 'adobe-pass',
                    'cdn': 'akamai',
-                'mediaToken': base64.b64encode(media_token.encode()),
-                'resource': base64.b64encode(resource.encode()),
+                    # Indicates that the player communicates its token not via the path
+                    # but via a cookie? NBC's player specifies `'false'` but field just
+                    # doesn't seem to have an effect.
+                    # 'inPath': 'false',
+                    'pid': pid,
+                    'platform': 'desktop',
+                    'requestorId': 'NBCOlympics',
+                    'resourceId': base64.b64encode(ap_resource.encode()).decode(),
+                    'token': base64.b64encode(media_token.encode()).decode(),
                    'url': source_url,
-            }), pid, 'mp4')
+                    'version': 'v1',
+                }).encode(),
+            )['akamai'][0]['tokenizedUrl']
+
+        formats = self._extract_m3u8_formats(source_url, pid, 'mp4')
+        for f in formats:
+            f['_seekable'] = False
+            f['_http_seekable'] = False
+            f['_icy'] = False
        self._sort_formats(formats)

        return {
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -27,6 +27,7 @@ from ..compat import (
 )
 from ..jsinterp import JSInterpreter
 from ..utils import (
+    bug_reports_message,
    clean_html,
    dict_get,
    error_to_compat_str,
@ -65,6 +66,7 @@ from ..utils import (
    url_or_none,
    urlencode_postdata,
    urljoin,
+    variadic,
 )


@ -460,6 +462,26 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
            'uploader': uploader,
        }

+    @staticmethod
+    def _extract_thumbnails(data, *path_list, **kw_final_key):
+        """
+        Extract thumbnails from thumbnails dict
+        @param path_list: path list to level that contains 'thumbnails' key
+        """
+        final_key = kw_final_key.get('final_key', 'thumbnails')
+
+        return traverse_obj(data, ((
+            tuple(variadic(path) + (final_key, Ellipsis)
+                  for path in path_list or [()])), {
+            'url': ('url', T(url_or_none),
+                    # Sometimes youtube gives a wrong thumbnail URL. See:
+                    # https://github.com/yt-dlp/yt-dlp/issues/233
+                    # https://github.com/ytdl-org/youtube-dl/issues/28023
+                    T(lambda u: update_url(u, query=None) if u and 'maxresdefault' in u else u)),
+            'height': ('height', T(int_or_none)),
+            'width': ('width', T(int_or_none)),
+        }, T(lambda t: t if t.get('url') else None)))
+
    def _search_results(self, query, params):
        data = {
            'context': {
@ -3183,8 +3205,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
            expected_type=txt_or_none)

    def _grid_entries(self, grid_renderer):
-        for item in grid_renderer['items']:
-            if not isinstance(item, dict):
+        for item in traverse_obj(grid_renderer, ('items', Ellipsis, T(dict))):
+            lockup_view_model = traverse_obj(item, ('lockupViewModel', T(dict)))
+            if lockup_view_model:
+                entry = self._extract_lockup_view_model(lockup_view_model)
+                if entry:
+                    yield entry
                continue
            renderer = self._extract_grid_item_renderer(item)
            if not isinstance(renderer, dict):
@ -3268,6 +3294,25 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                continue
            yield self._extract_video(renderer)

+    def _extract_lockup_view_model(self, view_model):
+        content_id = view_model.get('contentId')
+        if not content_id:
+            return
+        content_type = view_model.get('contentType')
+        if content_type not in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'):
+            self.report_warning(
+                'Unsupported lockup view model content type "{0}"{1}'.format(content_type, bug_reports_message()), only_once=True)
+            return
+        return merge_dicts(self.url_result(
+            update_url_query('https://www.youtube.com/playlist', {'list': content_id}),
+            ie=YoutubeTabIE.ie_key(), video_id=content_id), {
+                'title': traverse_obj(view_model, (
+                    'metadata', 'lockupMetadataViewModel', 'title', 'content', T(compat_str))),
+                'thumbnails': self._extract_thumbnails(view_model, (
+                    'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail',
+                    'thumbnailViewModel', 'image'), final_key='sources'),
+        })
+
    def _video_entry(self, video_renderer):
        video_id = video_renderer.get('videoId')
        if video_id:
Author	SHA1	Message	Date
Nikhil Chelliah	89896d2c58	Merge ce604ae7dce406aaf8fde8f25ee40bd390255b58 into 673277e510ebd996b62a2fcc76169bf3cce29910	2025-03-08 23:46:08 +00:00
dirkf	673277e510	[YouTube] Fix 91b1569	2025-02-28 01:02:20 +00:00
dirkf	91b1569f68	[YouTube] Fix channel playlist extraction (#33074 ) * [YouTube] Extract playlist items from LOCKUP_VIEW_MODEL_... * resolves #33073 * thx seproDev (yt-dlp/yt-dlp#11615) Co-authored-by: sepro <sepro@sepr0.com>	2025-02-28 00:02:10 +00:00
nikhil	ce604ae7dc	Support _non_-tokenized source URLs too	2021-08-03 00:16:49 -04:00
nikhil	b675a6e6b9	Satisfy flake8, coding conventions, tests	2021-08-02 23:27:43 -04:00
nikhil	b511872fbf	Unset ffmpeg's `-seekable`, `-http_seekable`, and `-icy` flags	2021-08-02 20:39:20 -04:00
nikhil	236e3d30ba	More fixes	2021-08-01 18:45:06 -04:00
nikhil	e7f4793d4d	[extractor/nbc] Fix NBC Olympics extractor	2021-07-29 11:31:27 -04:00