Merge 9680022207 into 1036478d13

[YouTube] Endure subtitle URLs are complete
* WEB URLs are, MWEB not * resolves #33017
2025-01-10 13:30:09 +09:00 · 2025-01-06 19:20:30 +02:00 · 2025-01-06 01:39:04 +00:00 · 2025-01-06 01:24:30 +00:00 · 2025-01-06 01:22:16 +00:00 · 2021-12-13 00:08:02 +02:00
4 changed files with 346 additions and 51 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1078,6 +1078,10 @@ from .rutube import (
    RutubePersonIE,
    RutubePlaylistIE,
 )
 from .glomex import (
    GlomexIE,
    GlomexEmbedIE,
 )
 from .rutv import RUTVIE
 from .ruutu import RuutuIE
 from .ruv import RuvIE
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -102,6 +102,7 @@ from .ustream import UstreamIE
 from .arte import ArteTVEmbedIE
 from .videopress import VideoPressIE
 from .rutube import RutubeIE
 from .glomex import GlomexEmbedIE
 from .limelight import LimelightBaseIE
 from .anvato import AnvatoIE
 from .washingtonpost import WashingtonPostIE
@ -3400,6 +3401,12 @@ class GenericIE(InfoExtractor):
            return self.playlist_from_matches(
                rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
        # Look for Glomex embeds
        glomex_urls = list(GlomexEmbedIE._extract_urls(webpage, url))
        if glomex_urls:
            return self.playlist_from_matches(
                glomex_urls, video_id, video_title, ie=GlomexEmbedIE.ie_key())
        # Look for WashingtonPost embeds
        wapo_urls = WashingtonPostIE._extract_urls(webpage)
        if wapo_urls:
--- a/youtube_dl/extractor/glomex.py
+++ b/youtube_dl/extractor/glomex.py
@ -0,0 +1,279 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import (
    compat_urllib_parse_urlparse,
    compat_urllib_parse_urlencode,
 )
 from ..utils import (
    ExtractorError,
    determine_ext,
    int_or_none,
    try_get,
    smuggle_url,
    unsmuggle_url,
    unescapeHTML,
 )
 class GlomexBaseIE(InfoExtractor):
    _DEFAULT_ORIGIN_URL = 'https://player.glomex.com/'
    _API_URL = 'https://integration-cloudfront-eu-west-1.mes.glomex.cloud/'
    @staticmethod
    def _smuggle_origin_url(url, origin_url):
        return smuggle_url(url, {'origin': origin_url})
    @classmethod
    def _unsmuggle_origin_url(cls, url, fallback_origin_url=None):
        defaults = {'origin': fallback_origin_url or cls._DEFAULT_ORIGIN_URL}
        unsmuggled_url, data = unsmuggle_url(url, default=defaults)
        return unsmuggled_url, data['origin']
    def _get_videoid_type(self, video_id):
        _VIDEOID_TYPES = {
            'v': 'video',
            'pl': 'playlist',
            'rl': 'related videos playlist',
            'cl': 'curated playlist',
        }
        prefix = video_id.split('-')[0]
        return _VIDEOID_TYPES.get(prefix, 'unknown type')
    def _download_api_data(self, video_id, integration, current_url=None):
        query = {
            'integration_id': integration,
            'playlist_id': video_id,
            'current_url': current_url or self._DEFAULT_ORIGIN_URL,
        }
        video_id_type = self._get_videoid_type(video_id)
        return self._download_json(
            self._API_URL,
            video_id, 'Downloading %s JSON' % video_id_type,
            'Unable to download %s JSON' % video_id_type,
            query=query)
    def _download_and_extract_api_data(self, video_id, integration, current_url):
        api_data = self._download_api_data(video_id, integration, current_url)
        videos = api_data['videos']
        if not videos:
            raise ExtractorError('no videos found for %s' % video_id)
        if len(videos) == 1:
            return self._extract_api_data(videos[0], video_id)
        # assume some kind of playlist
        videos = [
            self._extract_api_data(video, video_id)
            for video in videos
        ]
        return self.playlist_result(videos, video_id)
    def _extract_api_data(self, video, video_id):
        if video.get('error_code') == 'contentGeoblocked':
            self.raise_geo_restricted(countries=video['geo_locations'])
        info = self._extract_info(video, video_id)
        info['formats'] = self._extract_formats(video, video_id)
        return info
    @staticmethod
    def _extract_info(video, video_id=None, require_title=True):
        title = video['title'] if require_title else video.get('title')
        def append_image_url(url, default='profile:player-960x540'):
            if url:
                return '%s/%s' % (url, default)
        thumbnail = append_image_url(try_get(video,
                                             lambda x: x['image']['url']))
        thumbnails = [
            dict(width=960, height=540,
                 **{k: append_image_url(v) if k == 'url' else v
                    for k, v in image.items() if k in ('id', 'url')})
            for image in video.get('images', [])
        ] or None
        return {
            'id': video.get('clip_id') or video_id,
            'title': title,
            'description': video.get('description'),
            'thumbnail': thumbnail,
            'thumbnails': thumbnails,
            'duration': int_or_none(video.get('clip_duration')),
            'timestamp': video.get('created_at'),
        }
    def _extract_formats(self, options, video_id):
        formats = []
        for format_id, format_url in options['source'].items():
            ext = determine_ext(format_url)
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    format_url, video_id, 'mp4', m3u8_id=format_id,
                    fatal=False))
            else:
                formats.append({
                    'url': format_url,
                    'format_id': format_id,
                })
        if options.get('language'):
            for format in formats:
                format['language'] = options.get('language')
        self._sort_formats(formats)
        return formats
 class GlomexIE(GlomexBaseIE):
    IE_NAME = 'glomex'
    IE_DESC = 'Glomex videos'
    _VALID_URL = r'https?://video\.glomex\.com/[^/]+/(?P<id>v-[^-]+)'
    # Hard-coded integration ID for video.glomex.com
    _INTEGRATION_ID = '19syy24xjn1oqlpc'
    _TEST = {
        'url': 'https://video.glomex.com/sport/v-cb24uwg77hgh-nach-2-0-sieg-guardiola-mit-mancity-vor-naechstem-titel',
        'md5': 'cec33a943c4240c9cb33abea8c26242e',
        'info_dict': {
            'id': 'v-cb24uwg77hgh',
            'ext': 'mp4',
            'title': 'md5:38a90cedcfadd72982c81acf13556e0c',
            'description': 'md5:1ea6b6caff1443fcbbba159e432eedb8',
            'duration': 29600,
            'timestamp': 1619895017,
            'upload_date': '20210501',
            'age_limit': None,
        },
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        # Defer to glomex:embed IE: Build and return a player URL using the
        # matched video ID and the hard-coded integration ID
        return self.url_result(
            GlomexEmbedIE.build_player_url(video_id, self._INTEGRATION_ID,
                                           url),
            GlomexEmbedIE.ie_key(),
            video_id
        )
 class GlomexEmbedIE(GlomexBaseIE):
    IE_NAME = 'glomex:embed'
    IE_DESC = 'Glomex embedded videos'
    _BASE_PLAYER_URL = 'https://player.glomex.com/integration/1/iframe-player.html'
    _VALID_URL = r'''(?x)https?://player\.glomex\.com/integration/[^/]+/iframe-player\.html
        \?(?:(?:integrationId=(?P<integration>[^&#]+)|playlistId=(?P<id>[^&#]+)|[^&=#]+=[^&#]+)&?)+'''
    _TESTS = [{
        'url': 'https://player.glomex.com/integration/1/iframe-player.html?integrationId=4059a013k56vb2yd&playlistId=v-cfa6lye0dkdd-sf',
        'info_dict': {
            'id': 'v-cfa6lye0dkdd-sf',
            'ext': 'mp4',
            'timestamp': 1635337199,
            'duration': 133080,
            'upload_date': '20211027',
            'description': 'md5:e741185fc309310ff5d0c789b437be66',
            'title': 'md5:35647293513a6c92363817a0fb0a7961',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://player.glomex.com/integration/1/iframe-player.html?origin=fullpage&integrationId=19syy24xjn1oqlpc&playlistId=rl-vcb49w1fb592p&playlistIndex=0',
        'info_dict': {
            'id': 'rl-vcb49w1fb592p',
        },
        'playlist_count': 100,
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://player.glomex.com/integration/1/iframe-player.html?playlistId=cl-bgqaata6aw8x&integrationId=19syy24xjn1oqlpc',
        'info_dict': {
            'id': 'cl-bgqaata6aw8x',
        },
        'playlist_mincount': 2,
        'params': {
            'skip_download': True,
        },
    }]
    @classmethod
    def build_player_url(cls, video_id, integration, origin_url=None):
        query_string = compat_urllib_parse_urlencode({
            'playlistId': video_id,
            'integrationId': integration,
        })
        player_url = '%s?%s' % (cls._BASE_PLAYER_URL, query_string)
        if origin_url is not None:
            player_url = cls._smuggle_origin_url(player_url, origin_url)
        return player_url
    @classmethod
    def _extract_urls(cls, webpage, origin_url):
        # make the scheme in _VALID_URL optional
        _URL_RE = r'(?:https?:)?//' + cls._VALID_URL.split('://', 1)[1]
        # simplify the query string part of _VALID_URL; after extracting iframe
        # src, the URL will be matched again
        _URL_RE = _URL_RE.split(r'\?', 1)[0] + r'\?(?:(?!(?P=_q1)).)+'
        # https://docs.glomex.com/publisher/video-player-integration/javascript-api/
        EMBED_RE = r'''(?x)
        (?:
            <iframe[^>]+?src=(?P<_q1>%(quot_re)s)
                (?P<url>%(url_re)s)(?P=_q1)|
            <(?P<html_tag>glomex-player|div)(?:
                data-integration-id=(?P<_q2>%(quot_re)s)(?P<integration_html>(?:(?!(?P=_q2)).)+)(?P=_q2)|
                data-playlist-id=(?P<_q3>%(quot_re)s)(?P<id_html>(?:(?!(?P=_q3)).)+)(?P=_q3)|
                data-glomex-player=(?P<_q4>%(quot_re)s)(?P<glomex_player>true)(?P=_q4)|
                [^>]*?
            )+>|
            # naive parsing of inline scripts for hard-coded integration parameters
            <(?P<script_tag>script)[^<]*?>(?:
                (?P<_stjs1>dataset\.)?integrationId\s*(?(_stjs1)=|:)\s*
                    (?P<_q5>%(quot_re)s)(?P<integration_js>(?:(?!(?P=_q5)).)+)(?P=_q5)\s*(?(_stjs1);|,)?|
                (?P<_stjs2>dataset\.)?playlistId\s*(?(_stjs2)=|:)\s*
                    (?P<_q6>%(quot_re)s)(?P<id_js>(?:(?!(?P=_q6)).)+)(?P=_q6)\s*(?(_stjs2);|,)?|
                (?:\s|.)*?
            )+</script>
        )
        ''' % {'quot_re': r'["\']', 'url_re': _URL_RE}
        for mobj in re.finditer(EMBED_RE, webpage):
            url, html_tag, video_id_html, integration_html, glomex_player, \
                script_tag, video_id_js, integration_js = \
                mobj.group('url', 'html_tag', 'id_html',
                           'integration_html', 'glomex_player', 'script_tag',
                           'id_js', 'integration_js')
            if url:
                url = unescapeHTML(url)
                if url.startswith('//'):
                    scheme = compat_urllib_parse_urlparse(origin_url).scheme \
                        if origin_url else 'https'
                    url = '%s:%s' % (scheme, url)
                if not cls.suitable(url):
                    continue
                yield cls._smuggle_origin_url(url, origin_url)
            elif html_tag:
                if html_tag == "div" and not glomex_player:
                    continue
                if not video_id_html or not integration_html:
                    continue
                yield cls.build_player_url(video_id_html, integration_html,
                                           origin_url)
            elif script_tag:
                if not video_id_js or not integration_js:
                    continue
                yield cls.build_player_url(video_id_js, integration_js,
                                           origin_url)
    def _real_extract(self, url):
        url, origin_url = self._unsmuggle_origin_url(url)
        # must return a valid match since it was already tested when selecting the IE
        try:
            matches = self._VALID_URL_RE.match(url).groupdict()
        except AttributeError:
            matches = re.match(self._VALID_URL, url).groupdict()
        # id is not enforced in the pattern, so do it now; ditto integration
        video_id = matches['id']
        integration = matches['integration']
        return self._download_and_extract_api_data(video_id, integration,
                                                   origin_url)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -9,6 +9,7 @@ import json
 import os.path
 import random
 import re
 import string
 import time
 import traceback
@ -67,6 +68,7 @@ from ..utils import (
 class YoutubeBaseInfoExtractor(InfoExtractor):
    """Provide base functions for Youtube extractors"""
    _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
    _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
@ -138,7 +140,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                [2, 1, None, 1,
                 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
                 None, [], 4],
-                1, [None, None, []], None, None, None, True
+                1, [None, None, []], None, None, None, True,
            ],
            username,
        ]
@ -160,7 +162,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
            None, 1, None, [1, None, None, None, [password, None, True]],
            [
                None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
-                1, [None, None, []], None, None, None, True
+                1, [None, None, []], None, None, None, True,
            ]]
        challenge_results = req(
@ -213,7 +215,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                    user_hash, None, 2, None,
                    [
                        9, None, None, None, None, None, None, None,
-                        [None, tfa_code, True, 2]
+                        [None, tfa_code, True, 2],
                    ]]
                tfa_results = req(
@ -284,7 +286,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
            'client': {
                'clientName': 'WEB',
                'clientVersion': '2.20201021.03.00',
-            }
+            },
        },
    }
@ -385,7 +387,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                'client': {
                    'clientName': 'WEB',
                    'clientVersion': '2.20201021.03.00',
-                }
+                },
            },
            'query': query,
        }
@ -462,7 +464,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
            #       (HTML, videodetails, metadata, renderers)
            'name': ('content', 'author', (('ownerChannelName', None), 'title'), ['text']),
            'url': ('href', 'ownerProfileUrl', 'vanityChannelUrl',
-                    ['navigationEndpoint', 'browseEndpoint', 'canonicalBaseUrl'])
+                    ['navigationEndpoint', 'browseEndpoint', 'canonicalBaseUrl']),
        }
        if any((videodetails, metadata, renderers)):
            result = (
@ -671,7 +673,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
                'description': '',
                'uploader': '8KVIDEO',
-                'title': 'UHDTV TEST 8K VIDEO.mp4'
+                'title': 'UHDTV TEST 8K VIDEO.mp4',
            },
            'params': {
                'youtube_include_dash_manifest': True,
@ -711,7 +713,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/@theamazingatheist',
                'title': 'Burning Everyone\'s Koran',
                'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
-            }
+            },
        },
        # Age-gated videos
        {
@ -839,7 +841,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            },
            'expected_warnings': [
                'DASH manifest missing',
-            ]
+            ],
        },
        # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
        {
@ -1820,8 +1822,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        # cpn generation algorithm is reverse engineered from base.js.
        # In fact it works even with dummy cpn.
-        CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
+        CPN_ALPHABET = string.ascii_letters + string.digits + '-_'
-        cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
+        cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
        # more consistent results setting it to right before the end
        qs = parse_qs(playback_url)
@ -1881,8 +1883,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
        if mobj is None:
            raise ExtractorError('Invalid URL: %s' % url)
-        video_id = mobj.group(2)
+        return mobj.group(2)
        return video_id
    def _extract_chapters_from_json(self, data, video_id, duration):
        chapters_list = try_get(
@ -2035,7 +2036,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            headers = {
                'X-YouTube-Client-Name': '85',
                'X-YouTube-Client-Version': '2.0',
-                'Origin': 'https://www.youtube.com'
+                'Origin': 'https://www.youtube.com',
            }
            video_info = self._call_api('player', query, video_id, fatal=False, headers=headers)
@ -2064,8 +2065,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
        search_meta = (
-            lambda x: self._html_search_meta(x, webpage, default=None)) \
+            (lambda x: self._html_search_meta(x, webpage, default=None))
-            if webpage else lambda x: None
+            if webpage else lambda _: None)
        video_details = player_response.get('videoDetails') or {}
        microformat = try_get(
@ -2137,7 +2138,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        def build_fragments(f):
            return LazyList({
                'url': update_url_query(f['url'], {
-                    'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, f['filesize']))
+                    'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, f['filesize'])),
                })
            } for range_start in range(0, f['filesize'], CHUNK_SIZE))
@ -2236,7 +2237,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    'protocol': 'http_dash_segments',
                    'fragments': build_fragments(dct),
                } if dct['filesize'] else {
-                    'downloader_options': {'http_chunk_size': CHUNK_SIZE}  # No longer useful?
+                    'downloader_options': {'http_chunk_size': CHUNK_SIZE},  # No longer useful?
                })
            formats.append(dct)
@ -2414,9 +2415,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            'is_live': is_live,
        }
-        pctr = try_get(
+        pctr = traverse_obj(
            player_response,
-            lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
+            ('captions', 'playerCaptionsTracklistRenderer', T(dict)))
        if pctr:
            def process_language(container, base_url, lang_code, query):
                lang_subs = []
@ -2430,9 +2431,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    })
                container[lang_code] = lang_subs
            def process_subtitles():
                subtitles = {}
-            for caption_track in (pctr.get('captionTracks') or []):
+                for caption_track in traverse_obj(pctr, (
-                base_url = caption_track.get('baseUrl')
+                        'captionTracks', lambda _, v: v.get('baseUrl'))):
                    base_url = self._yt_urljoin(caption_track['baseUrl'])
                    if not base_url:
                        continue
                    if caption_track.get('kind') != 'asr':
@ -2443,18 +2446,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                            subtitles, base_url, lang_code, {})
                        continue
                    automatic_captions = {}
-                for translation_language in (pctr.get('translationLanguages') or []):
+                    for translation_language in traverse_obj(pctr, (
-                    translation_language_code = translation_language.get('languageCode')
+                            'translationLanguages', lambda _, v: v.get('languageCode'))):
-                    if not translation_language_code:
+                        translation_language_code = translation_language['languageCode']
                        continue
                        process_language(
                            automatic_captions, base_url, translation_language_code,
                            {'tlang': translation_language_code})
                    info['automatic_captions'] = automatic_captions
                info['subtitles'] = subtitles
            process_subtitles()
        parsed_url = compat_urllib_parse_urlparse(url)
-        for component in [parsed_url.fragment, parsed_url.query]:
+        for component in (parsed_url.fragment, parsed_url.query):
            query = compat_parse_qs(component)
            for k, v in query.items():
                for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
@ -2684,7 +2688,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
            'title': 'Super Cooper Shorts - Shorts',
            'uploader': 'Super Cooper Shorts',
            'uploader_id': '@SuperCooperShorts',
-        }
+        },
    }, {
        # Channel that does not have a Shorts tab. Test should just download videos on Home tab instead
        'url': 'https://www.youtube.com/@emergencyawesome/shorts',
@ -2738,7 +2742,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
            'description': 'md5:609399d937ea957b0f53cbffb747a14c',
            'uploader': 'ThirstForScience',
            'uploader_id': '@ThirstForScience',
-        }
+        },
    }, {
        'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
        'only_matching': True,
@ -3037,7 +3041,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
            'uploader': '3Blue1Brown',
            'uploader_id': '@3blue1brown',
            'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
-        }
+        },
    }]
    @classmethod
@ -3335,7 +3339,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
            'client': {
                'clientName': 'WEB',
                'clientVersion': client_version,
-            }
+            },
        }
        visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
@ -3354,7 +3358,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                headers['x-goog-visitor-id'] = visitor_data
            data['continuation'] = continuation['continuation']
            data['clickTracking'] = {
-                'clickTrackingParams': continuation['itct']
+                'clickTrackingParams': continuation['itct'],
            }
            count = 0
            retries = 3
@ -3613,7 +3617,7 @@ class YoutubePlaylistIE(InfoExtractor):
            'uploader': 'milan',
            'uploader_id': '@milan5503',
            'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
-        }
+        },
    }, {
        'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
        'playlist_mincount': 455,
@ -3623,7 +3627,7 @@ class YoutubePlaylistIE(InfoExtractor):
            'uploader': 'LBK',
            'uploader_id': '@music_king',
            'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
-        }
+        },
    }, {
        'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
        'only_matching': True,
@ -3734,7 +3738,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
        'info_dict': {
            'id': 'youtube-dl test video',
            'title': 'youtube-dl test video',
-        }
+        },
    }]
    def _get_n_results(self, query, n):
@ -3754,7 +3758,7 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
        'info_dict': {
            'id': 'youtube-dl test video',
            'title': 'youtube-dl test video',
-        }
+        },
    }]
@ -3769,7 +3773,7 @@ class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
            'id': 'youtube-dl test video',
            'title': 'youtube-dl test video',
        },
-        'params': {'playlistend': 5}
+        'params': {'playlistend': 5},
    }, {
        'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
        'only_matching': True,
@ -3785,6 +3789,7 @@ class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
    """
    Base class for feed extractors
    Subclasses must define the _FEED_NAME property.
    """
    _LOGIN_REQUIRED = True
Author	SHA1	Message	Date
Zenon Mousmoulas	eabedc452f	Merge `9680022207` into `1036478d13`	2025-01-06 19:20:30 +02:00
dirkf	1036478d13	[YouTube] Endure subtitle URLs are complete * WEB URLs are, MWEB not * resolves #33017	2025-01-06 01:39:04 +00:00
dirkf	00ad2b8ca1	[YouTube] Refactor subtitle processing * move to internal function * use `traverse-obj()`	2025-01-06 01:24:30 +00:00
dirkf	ab7c61ca29	[YouTube] Apply code style changes, trailing commas, etc	2025-01-06 01:22:16 +00:00
Zenon Mousmoulas	9680022207	Remove unused method	2021-12-13 00:08:02 +02:00
Zenon Mousmoulas	ae8fb74131	Fix typo url -> origin_url	2021-11-19 08:23:55 +02:00
Zenon Mousmoulas	699390c40d	Remove unnecessary quote escape	2021-11-13 08:47:38 +02:00
Zenon Mousmoulas	d303e1e05f	GlomexEmbedIE: Reuse _VALID_URL in _extract_urls * Let _extract_urls reuse _VALID_URL after making scheme optional and simplifying the query string part * Upon an iframe match * Add the scheme to the matched URL, if necessary * Match the URL against the full _VALID_URL	2021-11-11 11:16:29 +02:00
Zenon Mousmoulas	4225c46d3b	Revert to _VALID_URL to match video_id and integration * Retrieve the last instance of said parameters that appears in the query string, rather than the first previously * Resolve the respective comment in #30212	2021-11-11 11:07:56 +02:00
Zenon Mousmoulas	abfc16a123	Regex fixup	2021-11-11 08:30:56 +02:00
Zenon Mousmoulas	6880bf4334	Force evaluation	2021-11-10 07:34:16 +02:00
Zenon Mousmoulas	f561e0d817	Add Glomex IEs * Add new IEs * GlomexBaseIE: Base IE class * GlomexIE: Extract videos from video.glomex.com (by deferring to glomex:embed) * GlomexEmbedIE: Extract Glomex videos by matching the player URL * Query the API to extract metadata, detect video formats and get the respective (JWT protected) stream/source URLs * The API query may return one or more videos: the latter case is treated as a playlist * As this is otherwise identically handled, a separate IE was not deemed necessary * However title and description fields are not set for playlist results * They do not exist in the parent object; obtaining them from the first entry is not indicative for the playlist content * As the playlist order is not always stable (this is true at least for related videos playlists), it makes writing test cases impossible * Let GenericIE detect embeds by matching all three integration methods: * HTML: glomex-player tag or data attributes * Javascript: naive parsing of inline scripts for string constants assigned to integration parameters * Iframe: src attribute GlomexEmbedIE._VALID_URL * Let GlomexIE and the former embed detection pass the origin URL to GlomexEmbedIE by smuggling it in the player URL, as this is an expected parameter in API requests * Add test cases for both single videos and two playlist flavors	2021-11-08 08:17:47 +02:00