Merge adc6e0196439645953adbcaad00dfa5bcb7a97fa into 673277e510ebd996b62a2fcc76169bf3cce29910

[YouTube] Fix 91b1569
[YouTube] Fix channel playlist extraction (#33074 )
2025-07-21 02:44:14 +09:00 · 2025-03-01 02:05:47 +00:00 · 2025-02-28 01:02:20 +00:00 · 2025-02-28 00:02:10 +00:00 · 2022-02-03 17:09:13 +08:00 · 2021-02-25 04:11:18 +08:00
3 changed files with 245 additions and 2 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -645,6 +645,10 @@ from .livestream import (
 )
 from .lnkgo import LnkGoIE
 from .localnews8 import LocalNews8IE
 from .loom import (
    LoomIE,
    LoomFolderIE
 )
 from .lovehomeporn import LoveHomePornIE
 from .lrt import LRTIE
 from .lynda import (
--- a/youtube_dl/extractor/loom.py
+++ b/youtube_dl/extractor/loom.py
@ -0,0 +1,194 @@
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..compat import (
    compat_urllib_parse_unquote,
    compat_urllib_parse_urlencode,
    compat_urllib_request
 )
 from ..utils import (
    int_or_none,
    js_to_json,
    try_get,
    unified_timestamp,
    url_or_none
 )
 class LoomBaseInfoIE(InfoExtractor):
    _BASE_URL = 'https://www.loom.com/'
 class LoomIE(LoomBaseInfoIE):
    _VALID_URL = r'https?://(?:www\.)?loom\.com/share/(?!folder)(?P<id>[a-zA-Z0-9]+)'
    _TESTS = [
        {
            'url': 'https://www.loom.com/share/31b41727a5b24dacb6c1417a565b2ebf',
            'md5': '8b94361aabff2075141dc60bd6d35453',
            'info_dict': {
                'id': '31b41727a5b24dacb6c1417a565b2ebf',
                'ext': 'mp4',
                'title': 'How to resize your camera bubble',
                'uploader': 'Allie Hitchcock',
                'upload_date': '20201007',
                'timestamp': 1602089241
            }
        },
        {
            'url': 'https://www.loom.com/share/7e5168ec3b0744cab5e08a340cc7e086',
            'md5': '47dd14aa1d8054c249b68ca57ad9963f',
            'info_dict': {
                'id': '7e5168ec3b0744cab5e08a340cc7e086',
                'ext': 'mp4',
                'title': 'How to flip your camera ',
                'uploader': 'Matthew Flores',
                'upload_date': '20200423',
                'timestamp': 1587646164
            }
        },
        {
            'url': 'https://www.loom.com/share/6670e3eba3c84dc09ada8306c7138075',
            'md5': 'bfad8181ed49d6252b10dfdeb46c535e',
            'info_dict': {
                'id': '6670e3eba3c84dc09ada8306c7138075',
                'ext': 'mp4',
                'title': 'How to record your first video on Loom',
                'uploader': 'Allie Hitchcock',
                'upload_date': '20201118',
                'timestamp': 1605729404
            }
        }
    ]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        info_json = self._html_search_regex(
            r'window.loomSSRVideo = (.+?);',
            webpage,
            'info')
        info = self._parse_json(info_json, 'json', js_to_json)
        formats = []
        for type in ['transcoded-url', 'raw-url']:
            json_doc = self._download_json(
                self._BASE_URL + 'api/campaigns/sessions/' + video_id + '/' + type,
                video_id, data={})
            url = url_or_none(json_doc.get('url'))
            part_credentials = json_doc.get('part_credentials')
            ext = self._search_regex(
                r'\.([a-zA-Z0-9]+)\?',
                url, 'ext', default=None)
            if ext != 'm3u8':
                formats.append({
                    'url': url,
                    'ext': ext,
                    'format_id': type,
                    'width': int_or_none(try_get(info, lambda x: x['video_properties']['width'])),
                    'height': int_or_none(try_get(info, lambda x: x['video_properties']['height']))
                })
            else:
                credentials = compat_urllib_parse_urlencode(part_credentials)
                m3u8_formats = self._extract_m3u8_formats(url, video_id)
                for item in m3u8_formats:
                    item['protocol'] = 'm3u8_native'
                    item['url'] += '?' + credentials
                    item['ext'] = 'mp4'
                    item['format_id'] = 'hls-' + str(item.get('height', 0))
                    item['extra_param_to_segment_url'] = credentials
                for i in range(len(m3u8_formats)):
                    formats.insert(
                        (-1, len(formats))[i == len(m3u8_formats) - 1],
                        m3u8_formats[i])
        return {
            'id': info.get('id') or video_id,
            'title': info.get('name'),
            'formats': formats,
            'thumbnails': [
                {
                    'id': key,
                    'url': url_or_none(self._BASE_URL + value)
                } for key, value in info.get('thumbnails').items()
            ],
            'description': info.get('description'),
            'uploader': info.get('owner_full_name'),
            'timestamp': unified_timestamp(info.get('createdAt'))
        }
 class LoomFolderIE(LoomBaseInfoIE):
    _VALID_URL = r'https?://(?:www\.)?loom\.com/share/folder/(?P<id>.+)/?'
    _TESTS = [
        {
            'url': 'https://www.loom.com/share/folder/997db4db046f43e5912f10dc5f817b5c/List%20A-%20a%2C%20i%2C%20o',
            'info_dict': {
                'id': '9a8a87f6b6f546d9a400c8e7575ff7f2',
                'title': 'List A- a, i, o'
            },
            'playlist_mincount': 12
        },
        {
            'url': 'https://www.loom.com/share/folder/997db4db046f43e5912f10dc5f817b5c',
            'info_dict': {
                'id': '997db4db046f43e5912f10dc5f817b5c',
                'title': 'Blending Lessons '
            },
            'playlist_mincount': 16
        }
    ]
    def _get_real_folder_id(self, path):
        subfolders = re.match(
            r'^([a-zA-Z0-9]+)(?:\/(.+))*$',
            compat_urllib_parse_unquote(path))
        folder_names = subfolders.groups()[1:]
        parent_folder_id = subfolders.group(1)
        if(folder_names[0] is None):
            return path
        # Fetch folder id
        request = compat_urllib_request.Request(
            self._BASE_URL + 'v1/folders/by_name',
            json.dumps({
                'folder_names': folder_names,
                'parent_folder_id': parent_folder_id
            }).encode('utf-8'))
        json_doc = self._download_json(request, parent_folder_id)
        return try_get(json_doc, lambda x: x['current_folder']['id'])
    def _get_folder_info(self, folder_id):
        json_doc = self._download_json(url_or_none(self._BASE_URL + 'v1/folders/' + folder_id), folder_id)
        videos = []
        # Recursive call for subfolder
        for folder in json_doc.get('folders'):
            subfolder_info = self._get_folder_info(folder.get('id'))
            videos.extend(subfolder_info.get('entries'))
        videos.extend([val.get('id') for val in json_doc.get('videos')])
        return {
            'id': folder_id,
            'title': json_doc.get('name'),
            'description': json_doc.get('description'),
            'entries': videos
        }
    def _real_extract(self, url):
        folder_id = self._match_id(url)
        folder_id = self._get_real_folder_id(folder_id)
        folder_info = self._get_folder_info(folder_id)
        folder_info['_type'] = 'playlist'
        for i in range(len(folder_info['entries'])):
            video_id = folder_info['entries'][i]
            folder_info['entries'][i] = self.url_result(url_or_none(self._BASE_URL + 'share/' + video_id), 'Loom', video_id)
        return folder_info
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -27,6 +27,7 @@ from ..compat import (
 )
 from ..jsinterp import JSInterpreter
 from ..utils import (
    bug_reports_message,
    clean_html,
    dict_get,
    error_to_compat_str,
@ -65,6 +66,7 @@ from ..utils import (
    url_or_none,
    urlencode_postdata,
    urljoin,
    variadic,
 )
@ -460,6 +462,26 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
            'uploader': uploader,
        }
    @staticmethod
    def _extract_thumbnails(data, *path_list, **kw_final_key):
        """
        Extract thumbnails from thumbnails dict
        @param path_list: path list to level that contains 'thumbnails' key
        """
        final_key = kw_final_key.get('final_key', 'thumbnails')
        return traverse_obj(data, ((
            tuple(variadic(path) + (final_key, Ellipsis)
                  for path in path_list or [()])), {
            'url': ('url', T(url_or_none),
                    # Sometimes youtube gives a wrong thumbnail URL. See:
                    # https://github.com/yt-dlp/yt-dlp/issues/233
                    # https://github.com/ytdl-org/youtube-dl/issues/28023
                    T(lambda u: update_url(u, query=None) if u and 'maxresdefault' in u else u)),
            'height': ('height', T(int_or_none)),
            'width': ('width', T(int_or_none)),
        }, T(lambda t: t if t.get('url') else None)))
    def _search_results(self, query, params):
        data = {
            'context': {
@ -3183,8 +3205,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
            expected_type=txt_or_none)
    def _grid_entries(self, grid_renderer):
-        for item in grid_renderer['items']:
+        for item in traverse_obj(grid_renderer, ('items', Ellipsis, T(dict))):
-            if not isinstance(item, dict):
+            lockup_view_model = traverse_obj(item, ('lockupViewModel', T(dict)))
            if lockup_view_model:
                entry = self._extract_lockup_view_model(lockup_view_model)
                if entry:
                    yield entry
                continue
            renderer = self._extract_grid_item_renderer(item)
            if not isinstance(renderer, dict):
@ -3268,6 +3294,25 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                continue
            yield self._extract_video(renderer)
    def _extract_lockup_view_model(self, view_model):
        content_id = view_model.get('contentId')
        if not content_id:
            return
        content_type = view_model.get('contentType')
        if content_type not in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'):
            self.report_warning(
                'Unsupported lockup view model content type "{0}"{1}'.format(content_type, bug_reports_message()), only_once=True)
            return
        return merge_dicts(self.url_result(
            update_url_query('https://www.youtube.com/playlist', {'list': content_id}),
            ie=YoutubeTabIE.ie_key(), video_id=content_id), {
                'title': traverse_obj(view_model, (
                    'metadata', 'lockupMetadataViewModel', 'title', 'content', T(compat_str))),
                'thumbnails': self._extract_thumbnails(view_model, (
                    'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail',
                    'thumbnailViewModel', 'image'), final_key='sources'),
        })
    def _video_entry(self, video_renderer):
        video_id = video_renderer.get('videoId')
        if video_id:
Author	SHA1	Message	Date
Wong Yiu Hang	fdde6d9326	Merge adc6e0196439645953adbcaad00dfa5bcb7a97fa into 673277e510ebd996b62a2fcc76169bf3cce29910	2025-03-01 02:05:47 +00:00
dirkf	673277e510	[YouTube] Fix 91b1569	2025-02-28 01:02:20 +00:00
dirkf	91b1569f68	[YouTube] Fix channel playlist extraction (#33074 ) * [YouTube] Extract playlist items from LOCKUP_VIEW_MODEL_... * resolves #33073 * thx seproDev (yt-dlp/yt-dlp#11615) Co-authored-by: sepro <sepro@sepr0.com>	2025-02-28 00:02:10 +00:00
Wong Yiu Hang	adc6e01964	Release 2021.12.17 -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEE7X9b9Gs7vtgchzaOLDk+DxipI20FAmG7ijgACgkQLDk+Dxip I22UFg//XeU4ZtSI8szkW98sNPd8RRk908h/tNaGkitpd9u1rsQMbspDsjX6BYOn hCaD8Y3aavdQo+9a1uuJVtxfB3qh/ieElAP1VGZ94S5ID3DGQrEOg+/dEvLK2Gpv wH+tAK/pMW4TKrbq6Nb1jRjZLoaUT8Dy8Rz9HzZzzB1w9BaWseb4McsPPOfKbKB1 MEr0gvUViC8wdhc8k7vvL216+P+a/Orws+ClSEHHSkEgCW3aQ36oeZz2K7Cyh85A pOOzdwn/LYhhlyAuqXjAfwk+0pgBEmx2g15Gig/j9CkPmFgpZWAhevfUHFkOgKhH u/eSKoxW/g+lpkjcspJ5jsfDFtv2aAuinJdm40aBq0mmTfHuzzIWwJQvtP0/lLVP GrAyLgGPnZkj3R+jhWVpl/dAO7HPhPGwxuBcjk7GadCvRSq+yR9TCDbUPe9WnaFM /MF1AYQn22lwsZayThoH8GGltOpy4VKPF6fiN/cxFdtglRDcyWbO1shzuzOJHj6D 7BkSH1KIZ/vNzJNNS2PMAQCL6bXC74xPp7oXsG9CnKo0OzBzWadYwYHF+oJn+nSG byq3SSQhsBESLS4CRDC2RbT3uKjheNIYIilqGy80R3JGOHDxG60p3Cb1oNPtRULQ 1B5iPLj7EdtxHmQ+jqssTkbYjOvpi0jRAvIaxQFFRFGvx6ev3AY= =J1bE -----END PGP SIGNATURE----- Merge tag '2021.12.17' into loom Release 2021.12.17	2022-02-03 17:09:13 +08:00
Wong Yiu Hang	e218b26725	[Loom] Add url_or_none back	2021-02-25 04:11:18 +08:00
Wong Yiu Hang	1b2651ed30	[Loom] Use url_result instead	2021-02-25 03:46:55 +08:00
Wong Yiu Hang	70b804526c	[Loom] Move request back into _download_json	2021-02-25 03:20:51 +08:00
Wong Yiu Hang	81bd98a03f	[Loom] Add fallback to mandatory attribute	2021-02-25 03:20:51 +08:00
Wong Yiu Hang	29c4168cec	[Loom] Add missing parsing function	2021-02-25 03:20:51 +08:00
Wong Yiu Hang	34e6a6b559	[Loom] Moved functions to inline Removed if statement parentheses	2021-02-25 03:20:51 +08:00
Wong Yiu Hang	c9f3667e2e	[Loom] Update: Change test case to avoid a false-positive result from test/test_unicode_literals.py	2021-02-04 00:53:18 +08:00
Wong Yiu Hang	287e710bff	[Loom] Add: Additional playlist extractor for folder support	2021-02-04 00:34:05 +08:00
Wong Yiu Hang	918f4f374a	[Loom] Update: Move related member functions into LoomIE	2021-02-04 00:34:05 +08:00
Wong Yiu Hang	14df8ad329	Merge branch 'master' into loom	2021-02-04 00:33:10 +08:00
Wong Yiu Hang	2302f32ced	[Loom] Add new extractor	2021-02-01 16:00:24 +08:00