Merge 72db2172897a76665414fc9da3fc79f096df9fab into 2b4fbfce25902d557b86b003cf48f738129efce4

[YouTube] Support player 4fcd6e4a
thx seproDev, bashonly: yt-dlp/yt-dlp#12748
2025-07-13 15:04:14 +09:00 · 2025-03-26 07:07:56 +00:00 · 2025-03-26 02:27:25 +00:00 · 2025-03-25 22:35:06 +00:00 · 2025-03-25 22:35:06 +00:00 · 2025-03-25 22:35:06 +00:00
7 changed files with 381 additions and 227 deletions
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@ -84,6 +84,21 @@ _SIG_TESTS = [
        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
        '0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q',
    ),
+    (
+        'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
+        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+        'AAOAOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7vgpDL0QwbdV06sCIEzpWqMGkFR20CFOS21Tp-7vj_EMu-m37KtXJoOy1',
+    ),
+    (
+        'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
+        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+        '0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+    ),
+    (
+        'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
+        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+        'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
+    ),
 ]

 _NSIG_TESTS = [
@ -153,7 +168,7 @@ _NSIG_TESTS = [
    ),
    (
        'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
-        '-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg',
+        'M92UUMHa8PdvPd3wyM', '3hPqLJsiNZx7yA',
    ),
    (
        'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
@ -173,7 +188,7 @@ _NSIG_TESTS = [
    ),
    (
        'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
-        'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
+        'aCi3iElgd2kq0bxVbQ', 'QX1y8jGb2IbZ0w',
    ),
    (
        'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
@ -231,10 +246,6 @@ _NSIG_TESTS = [
        'https://www.youtube.com/s/player/f6e09c70/player_ias_tce.vflset/en_US/base.js',
        'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
    ),
-    (
-        'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
-        'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
-    ),
    (
        'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
        'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
@ -259,6 +270,22 @@ _NSIG_TESTS = [
        'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
        'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
    ),
+    (
+        'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
+        'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
+    ),
+    (
+        'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
+        'eWYu5d5YeY_4LyEDc', 'XJQqf-N7Xra3gg',
+    ),
+    (
+        'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
+        'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
+    ),
+    (
+        'https://www.youtube.com/s/player/4fcd6e4a/tv-player-ias.vflset/tv-player-ias.js',
+        'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
+    ),
 ]


@ -271,6 +298,8 @@ class TestPlayerInfo(unittest.TestCase):
            ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
            ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'),
            ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'),
+            ('https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js', 'e7567ecf'),
+            ('https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js', '643afba4'),
            # obsolete
            ('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
            ('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
@ -280,8 +309,9 @@ class TestPlayerInfo(unittest.TestCase):
            ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
            ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
        )
+        ie = YoutubeIE(FakeYDL({'cachedir': False}))
        for player_url, expected_player_id in PLAYER_URLS:
-            player_id = YoutubeIE._extract_player_info(player_url)
+            player_id = ie._extract_player_info(player_url)
            self.assertEqual(player_id, expected_player_id)


@ -301,8 +331,8 @@ class TestSignature(unittest.TestCase):
 def t_factory(name, sig_func, url_pattern):
    def make_tfunc(url, sig_input, expected_sig):
        m = url_pattern.match(url)
-        assert m, '%r should follow URL format' % url
-        test_id = m.group('id')
+        assert m, '{0!r} should follow URL format'.format(url)
+        test_id = re.sub(r'[/.-]', '_', m.group('id') or m.group('compat_id'))

        def test_func(self):
            basename = 'player-{0}-{1}.js'.format(name, test_id)
@ -335,12 +365,16 @@ def n_sig(jscode, sig_input):


 make_sig_test = t_factory(
-    'signature', signature, re.compile(r'.*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$'))
+    'signature', signature,
+    re.compile(r'''(?x)
+        .+/(?P<h5>html5)?player(?(h5)(?:-en_US)?-|/)(?P<id>[a-zA-Z0-9/._-]+)
+        (?(h5)/(?:watch_as3|html5player))?\.js$
+    '''))
 for test_spec in _SIG_TESTS:
    make_sig_test(*test_spec)

 make_nsig_test = t_factory(
-    'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_-]+)/.+.js$'))
+    'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_/.-]+)\.js$'))
 for test_spec in _NSIG_TESTS:
    make_nsig_test(*test_spec)

--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -413,8 +413,6 @@ from .foxnews import (
    FoxNewsArticleIE,
 )
 from .foxsports import FoxSportsIE
-from .franceculture import FranceCultureIE
-from .franceinter import FranceInterIE
 from .francetv import (
    FranceTVIE,
    FranceTVSiteIE,
@ -1011,7 +1009,11 @@ from .radiocanada import (
 from .radiode import RadioDeIE
 from .radiojavan import RadioJavanIE
 from .radiobremen import RadioBremenIE
-from .radiofrance import RadioFranceIE
+from .radiofrance import (
+    RadioFrancePodcastEpisodeIE,
+    RadioFrancePodcastPlaylistIE,
+    RadioFranceWebradioIE,
+)
 from .rai import (
    RaiPlayIE,
    RaiPlayLiveIE,
--- a/youtube_dl/extractor/franceculture.py
+++ b/youtube_dl/extractor/franceculture.py
@ -1,73 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
-    determine_ext,
-    extract_attributes,
-    int_or_none,
-)
-
-
-class FranceCultureIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-    _TESTS = [{
-        'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
-        'info_dict': {
-            'id': 'rendez-vous-au-pays-des-geeks',
-            'display_id': 'rendez-vous-au-pays-des-geeks',
-            'ext': 'mp3',
-            'title': 'Rendez-vous au pays des geeks',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'upload_date': '20140301',
-            'timestamp': 1393700400,
-            'vcodec': 'none',
-        }
-    }, {
-        # no thumbnail
-        'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, display_id)
-
-        video_data = extract_attributes(self._search_regex(
-            r'''(?sx)
-                (?:
-                    </h1>|
-                    <div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
-                ).*?
-                (<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
-            ''',
-            webpage, 'video data'))
-
-        video_url = video_data.get('data-url') or video_data['data-asset-source']
-        title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
-
-        description = self._html_search_regex(
-            r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
-            webpage, 'description', default=None)
-        thumbnail = self._search_regex(
-            r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
-            webpage, 'thumbnail', default=None)
-        uploader = self._html_search_regex(
-            r'(?s)<span class="author">(.*?)</span>',
-            webpage, 'uploader', default=None)
-        ext = determine_ext(video_url.lower())
-
-        return {
-            'id': display_id,
-            'display_id': display_id,
-            'url': video_url,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'ext': ext,
-            'vcodec': 'none' if ext == 'mp3' else None,
-            'uploader': uploader,
-            'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
-            'duration': int_or_none(video_data.get('data-duration')),
-        }
--- a/youtube_dl/extractor/franceinter.py
+++ b/youtube_dl/extractor/franceinter.py
@ -1,59 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import month_by_name
-
-
-class FranceInterIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)'
-
-    _TEST = {
-        'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016',
-        'md5': '9e54d7bdb6fdc02a841007f8a975c094',
-        'info_dict': {
-            'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016',
-            'ext': 'mp3',
-            'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
-            'description': 'md5:401969c5d318c061f86bda1fa359292b',
-            'thumbnail': r're:^https?://.*\.jpg',
-            'upload_date': '20160907',
-        },
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        video_url = self._search_regex(
-            r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
-            webpage, 'video url', group='url')
-
-        title = self._og_search_title(webpage)
-        description = self._og_search_description(webpage)
-        thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
-
-        upload_date_str = self._search_regex(
-            r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
-            webpage, 'upload date', fatal=False)
-        if upload_date_str:
-            upload_date_list = upload_date_str.split()
-            upload_date_list.reverse()
-            upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0)
-            upload_date_list[2] = '%02d' % int(upload_date_list[2])
-            upload_date = ''.join(upload_date_list)
-        else:
-            upload_date = None
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'upload_date': upload_date,
-            'formats': [{
-                'url': video_url,
-                'vcodec': 'none',
-            }],
-        }
--- a/youtube_dl/extractor/radiofrance.py
+++ b/youtube_dl/extractor/radiofrance.py
@ -4,56 +4,284 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    get_element_by_attribute,
+    int_or_none,
+    parse_iso8601,
+    strip_or_none,
+    url_or_none
+)


-class RadioFranceIE(InfoExtractor):
-    _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
-    IE_NAME = 'radiofrance'
+class RadioFranceBaseIE(InfoExtractor):
+    _BASE_URL = r'https://www.radiofrance.fr/'

-    _TEST = {
-        'url': 'http://maison.radiofrance.fr/radiovisions/one-one',
-        'md5': 'bdbb28ace95ed0e04faab32ba3160daf',
+    def extract_api_data(self, api_path, id, html):
+        pattern = r'<script [^>]*sveltekit:data-url="https://www\.radiofrance\.fr/api/v[\d.]+/%s[^>]*>(?P<json>.*)</script>' % api_path
+        json = self._search_regex(pattern, html, 'API data', flags=re.DOTALL, group='json')
+
+        if not json:
+            raise ExtractorError('%s: JSON data not found' % id)
+
+        try:
+            json = self._parse_json(json, id)
+            json = self._parse_json(json['body'], id)
+
+            if api_path == 'path':
+                return json['content']
+            elif api_path == 'stations':
+                return json
+            else:
+                raise ExtractorError('Coding error')
+        except KeyError:
+            raise ExtractorError('%s: Invalid JSON' % id)
+
+    def get_title(self, api_data, webpage=None):
+        title = strip_or_none(api_data.get('title'))
+        if not title and webpage:
+            title = strip_or_none(get_element_by_attribute('h1', None, webpage, False)) or strip_or_none(self._og_search_title(webpage))
+        return title
+
+    def get_description(self, api_data, webpage=None):
+        description = strip_or_none(api_data.get('standFirst'))
+        if not description and webpage:
+            description = strip_or_none(self._og_search_description(webpage))
+        return description
+
+    def get_thumbnail(self, api_data, webpage=None):
+        thumbnail = None
+        visual = api_data.get('visual')
+        if visual:
+            thumbnail = url_or_none(visual.get('src'))
+        if not thumbnail and webpage:
+            thumbnail = self._og_search_thumbnail(webpage)
+        return thumbnail
+
+    def get_timestamp(self, api_data, webpage=None):
+        timestamp = api_data.get('publishedDate')
+        if not timestamp and webpage:
+            timestamp = parse_iso8601(self._html_search_meta('article:published_time', webpage, 'publication time', ))
+        return timestamp
+
+    def get_brand(self, api_data, webpage=None):
+        brand = strip_or_none(api_data.get('brand'))
+        if not brand and webpage:
+            brand = self._og_search_property('site_name', webpage, 'Station name', fatal=False)
+        return brand
+
+    def extract_episode(self, episode_id, api_data):
+        manifestations = api_data.get('manifestations')
+        if manifestations is None or len(manifestations) == 0:
+            return None, None
+
+        url = url_or_none(manifestations[0]['url'])
+        duration = int_or_none(manifestations[0].get('duration'))
+        return url, duration
+
+    def get_playlist_entries(self, playlist_url, playlist_id, api_data, direction):
+        playlist_data = api_data['expressions']
+
+        entries = []
+        items = playlist_data.get('items')
+        for item in items:
+            episode_path = item.get('path')
+            if episode_path is None:
+                self.report_warning('No path found for episode "%s"', item.get('title'))
+                continue
+            episode_id = RadioFrancePodcastEpisodeIE._match_id(self._BASE_URL + episode_path)
+            if episode_id is None:
+                self.report_warning('Could not parse id of episode from path: "%s"' % episode_path)
+                continue
+            episode_url, duration = self.extract_episode(episode_id, item)
+            if episode_url is None:
+                self.to_screen('Episode "%s" is not available' % episode_path)
+                continue
+            entry = {
+                'id': episode_id,
+                'url': episode_url,
+                'title': self.get_title(item),
+                'description': self.get_description(item),
+                'timestamp': self.get_timestamp(item),
+                'thumbnail': self.get_thumbnail(item),
+                'duration': duration,
+            }
+            entries.append(entry)
+
+        page_number = int_or_none(playlist_data.get('pageNumber'))
+        if page_number:
+            if direction in ['both', 'prev'] and playlist_data.get('prev') is not None:
+                webpage, other_api_data = self.get_data(playlist_url, 'path', playlist_id, page=page_number - 1)
+                entries = self.get_playlist_entries(playlist_url, playlist_id, other_api_data, direction='prev') + entries
+            if direction in ['both', 'next'] and playlist_data.get('next') is not None:
+                webpage, other_api_data = self.get_data(playlist_url, 'path', playlist_id, page=page_number + 1)
+                entries = entries + self.get_playlist_entries(playlist_url, playlist_id, other_api_data, direction='next')
+
+        return entries
+
+    def get_data(self, url, api_path, id, page=None):
+        query = {}
+        note = None
+        if page:
+            query['p'] = page
+            note = "Downloading page %i" % page
+        webpage = self._download_webpage(url, id, query=query, note=note)
+        api_data = self.extract_api_data(api_path, id, webpage)
+        return webpage, api_data
+
+
+class RadioFrancePodcastEpisodeIE(RadioFranceBaseIE):
+    _VALID_URL = r'https?://www\.radiofrance\.fr/(?:francemusique|franceinter|franceculture|franceinfo|mouv|fip)/podcasts/.+/.+-(?P<id>\d+)$'
+
+    _TESTS = [{
+        'note': 'Podcast episode with audio from France Info',
+        'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-brief-eco/le-brief-eco-du-lundi-05-septembre-2022-8310713',
        'info_dict': {
-            'id': 'one-one',
-            'ext': 'ogg',
-            'title': 'One to one',
-            'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
-            'uploader': 'Thomas Hercouët',
-        },
-    }
+            'id': '8310713',
+            'ext': 'mp3',
+            'url': r're:^https?://.*\.mp3$',
+            'title': 'Pour la première fois en vingt ans, l’euro passe sous les 0,99\u00a0dollar',
+            'description': str,
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'timestamp': int,
+            'duration': int,
+            'upload_date': str
+        }
+    }, {
+        'note': 'Podcast episode from France Musique',
+        'url': 'https://www.radiofrance.fr/francemusique/podcasts/allegretto/lever-du-jour-9233228',
+        'only_matching': True
+    }, {
+        'note': 'Podcast episode from FranceInter',
+        'url': 'https://www.radiofrance.fr/franceinter/podcasts/rendez-vous-avec-x/un-mysterieux-echange-digne-de-la-guerre-froide-9343281',
+        'only_matching': True
+    }, {
+        'note': 'Podcast episode from France Culture',
+        'url': 'https://www.radiofrance.fr/franceculture/podcasts/la-science-cqfd/teotihuacan-la-plus-mysterieuse-des-cites-d-or-9224610',
+        'only_matching': True
+    }, {
+        'note': 'Podcast episode from Le Mouv',
+        'url': 'https://www.radiofrance.fr/mouv/podcasts/mouv-dj-la-caution/ncr2a-ne-cherche-rien-d-autre-ailleurs-1197950',
+        'only_matching': True
+    }, {
+        'note': 'Podcast episode from FIP',
+        'url': 'https://www.radiofrance.fr/fip/podcasts/certains-l-aiment-fip/hommage-au-cinema-de-vangelis-4734742',
+        'only_matching': True
+    }]

    def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('id')
-
-        webpage = self._download_webpage(url, video_id)
-        title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
-        description = self._html_search_regex(
-            r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
-            webpage, 'description', fatal=False)
-        uploader = self._html_search_regex(
-            r'<div class="credit">&nbsp;&nbsp;&copy;&nbsp;(.*?)</div>',
-            webpage, 'uploader', fatal=False)
-
-        formats_str = self._html_search_regex(
-            r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
-            webpage, 'audio URLs')
-        formats = [
-            {
-                'format_id': fm[0],
-                'url': fm[1],
-                'vcodec': 'none',
-                'preference': i,
-            }
-            for i, fm in
-            enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str))
-        ]
-        self._sort_formats(formats)
+        id = self._match_id(url)
+        webpage, api_data = self.get_data(url, 'path', id)
+        url, duration = self.extract_episode(id, api_data)
+        if url is None:
+            msg = 'Podcast file is not available. If the show is too recent, the file may not have been uploaded yet: try again later.'
+            raise ExtractorError(msg, expected=True, video_id=id)

        return {
-            'id': video_id,
-            'title': title,
-            'formats': formats,
-            'description': description,
-            'uploader': uploader,
+            'id': id,
+            'url': url,
+            'title': self.get_title(api_data, webpage),
+            'description': self.get_description(api_data, webpage),
+            'timestamp': self.get_timestamp(api_data, webpage),
+            'thumbnail': self.get_thumbnail(api_data, webpage),
+            'channel_id': self.get_brand(api_data, webpage),
+            'duration': duration,
+        }
+
+
+class RadioFrancePodcastPlaylistIE(RadioFranceBaseIE):
+    _VALID_URL = r'https?://www\.radiofrance\.fr/(?:francemusique|franceinter|franceculture|franceinfo|mouv|fip)/podcasts/(?P<id>[^/]+?)(?:[?#].*)?$'
+
+    _TESTS = [{
+        'note': 'Podcast show with multiple pages of episodes and some of them are missing',
+        'url': 'https://www.radiofrance.fr/franceculture/podcasts/une-semaine-dans-le-monde-10-11?p=2',
+        'info_dict': {
+            'id': 'une-semaine-dans-le-monde-10-11',
+            'title': 'Une semaine dans le monde | 10-11',
+            'description': str,
+            'timestamp': int
+        },
+        'playlist_count': 23,
+    }]
+
+    def _real_extract(self, url):
+        id = self._match_id(url)
+        webpage, api_data = self.get_data(url, 'path', id)
+
+        entries = self.get_playlist_entries(url, id, api_data, direction='both')
+        entries.reverse()
+
+        return {
+            'id': id,
+            '_type': 'playlist',
+            'entries': entries,
+            'title': self.get_title(api_data, webpage),
+            'description': self.get_description(api_data, webpage),
+            'timestamp': self.get_timestamp(api_data, webpage),
+            'thumbnail': self.get_thumbnail(api_data, webpage),
+            'channel_id': self.get_brand(api_data, webpage),
+        }
+
+
+class RadioFranceWebradioIE(RadioFranceBaseIE):
+    _VALID_URL = r'https?://www\.radiofrance\.fr/(?:francemusique|franceinter|franceculture|franceinfo|mouv|fip)/(?P<id>radio-[^/]+)$'
+
+    _TESTS = [{
+        'note': 'Full list of webradios available at https://www.radiofrance.fr/ecouter-musique',
+        'url': 'https://www.radiofrance.fr/fip/radio-metal',
+        'info_dict': {
+            'id': 'radio-metal',
+            'ext': 'aac',
+            'title': str,
+        },
+        'params': {
+            'format': 'aac',
+            'skip_download': True,
+        }
+    }]
+
+    def get_livestream_formats(self, id, api_data):
+        sources = api_data['media']['sources']
+
+        formats = []
+        for source in sources:
+            url = source.get('url')
+            if not url:
+                continue
+
+            format_id = source.get('format')
+            format = {
+                'url': url,
+                'format_id': format_id,
+                'asr': 48000,
+                'vcodec': 'none'
+            }
+            if format_id == 'mp3':
+                format['preference'] = 1
+                format['acodec'] = 'mp3'
+                format['abr'] = source.get('bitrate')
+            elif format_id == 'aac':
+                format['preference'] = 2
+                format['acodec'] = 'aac'
+                format['abr'] = source.get('bitrate')
+            elif format_id == 'hls':
+                format['preference'] = 0
+                format['manifest_url'] = url
+            formats.append(format)
+
+        if len(formats) == 0:
+            raise ExtractorError('No live streaming URL found')
+        return formats
+
+    def _real_extract(self, url):
+        id = self._match_id(url)
+        webpage, api_data = self.get_data(url, 'stations', id)
+
+        return {
+            'id': id,
+            'title': self.get_title(api_data, webpage),
+            'formats': self.get_livestream_formats(id, api_data),
+            'thumbnail': self.get_thumbnail(api_data, webpage),
+            'channel_id': self.get_brand(api_data, webpage),
+            'is_live': True
        }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -692,9 +692,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        'invidious': '|'.join(_INVIDIOUS_SITES),
    }
    _PLAYER_INFO_RE = (
-        r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})//(?:tv-)?player',
-        r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
-        r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
+        r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/(?:tv-)?player',
+        r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias(?:_tce)?\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
+        r'\b(?P<id>vfl[a-zA-Z0-9_-]{6,})\b.*?\.js$',
    )
    _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

@ -1626,15 +1626,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        """ Return a string representation of a signature """
        return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

-    @classmethod
-    def _extract_player_info(cls, player_url):
-        for player_re in cls._PLAYER_INFO_RE:
-            id_m = re.search(player_re, player_url)
-            if id_m:
-                break
-        else:
-            raise ExtractorError('Cannot identify player %r' % player_url)
-        return id_m.group('id')
+    def _extract_player_info(self, player_url):
+        try:
+            return self._search_regex(
+                self._PLAYER_INFO_RE, player_url, 'player info', group='id')
+        except ExtractorError as e:
+            raise ExtractorError(
+                'Cannot identify player %r' % (player_url,), cause=e)

    def _load_player(self, video_id, player_url, fatal=True, player_id=None):
        if not player_id:
@ -1711,6 +1709,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                '    return %s\n') % (signature_id_tuple, expr_code)
        self.to_screen('Extracted signature function:\n' + code)

+    def _extract_sig_fn(self, jsi, funcname):
+        var_ay = self._search_regex(
+            r'''(?x)
+                (?:\*/|\{|\n|^)\s*(?:'[^']+'\s*;\s*)
+                    (var\s*[\w$]+\s*=\s*(?:
+                        ('|")(?:\\\2|(?!\2).)+\2\s*\.\s*split\(\s*('|")\W+\3\s*\)|
+                        \[\s*(?:('|")(?:\\\4|(?!\4).)*\4\s*(?:(?=\])|,\s*))+\]
+                    ))(?=\s*[,;])
+            ''', jsi.code, 'useful values', default='')
+
+        sig_fn = jsi.extract_function_code(funcname)
+
+        if var_ay:
+            sig_fn = (sig_fn[0], ';\n'.join((var_ay, sig_fn[1])))
+
+        return sig_fn
+
    def _parse_sig_js(self, jscode):
        # Examples where `sig` is funcname:
        # sig=function(a){a=a.split(""); ... ;return a.join("")};
@ -1736,8 +1751,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            jscode, 'Initial JS player signature function name', group='sig')

        jsi = JSInterpreter(jscode)
-        initial_function = jsi.extract_function(funcname)
-        return lambda s: initial_function([s])
+
+        initial_function = self._extract_sig_fn(jsi, funcname)
+
+        func = jsi.extract_function_from_code(*initial_function)
+
+        return lambda s: func([s])

    def _cached(self, func, *cache_id):
        def inner(*args, **kwargs):
@ -1856,15 +1875,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):

    def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):

-        var_ay = self._search_regex(
-            r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"(?:\\"|[^"])+"\s*\.\s*split\("\W+"\))(?=\s*[,;])',
-            jsi.code, 'useful values', default='')
-
        func_name = self._extract_n_function_name(jsi.code)

-        func_code = jsi.extract_function_code(func_name)
-        if var_ay:
-            func_code = (func_code[0], ';\n'.join((var_ay, func_code[1])))
+        func_code = self._extract_sig_fn(jsi, func_name)

        if player_id:
            self.cache.store('youtube-nsig', player_id, func_code)
@ -2136,7 +2149,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    video_details = merge_dicts(*traverse_obj(
                        (player_response, api_player_response),
                        (Ellipsis, 'videoDetails', T(dict))))
-                    player_response.update(api_player_response or {})
+                    player_response.update(filter_dict(
+                        api_player_response or {}, cndn=lambda k, _: k != 'captions'))
                    player_response['videoDetails'] = video_details

        def is_agegated(playability):
@ -2566,8 +2580,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        }

        pctr = traverse_obj(
-            player_response,
-            ('captions', 'playerCaptionsTracklistRenderer', T(dict)))
+            (player_response, api_player_response),
+            (Ellipsis, 'captions', 'playerCaptionsTracklistRenderer', T(dict)))
        if pctr:
            def process_language(container, base_url, lang_code, query):
                lang_subs = []
@ -2584,20 +2598,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            def process_subtitles():
                subtitles = {}
                for caption_track in traverse_obj(pctr, (
-                        'captionTracks', lambda _, v: v.get('baseUrl'))):
+                        Ellipsis, 'captionTracks', lambda _, v: (
+                            v.get('baseUrl') and v.get('languageCode')))):
                    base_url = self._yt_urljoin(caption_track['baseUrl'])
                    if not base_url:
                        continue
+                    lang_code = caption_track['languageCode']
                    if caption_track.get('kind') != 'asr':
-                        lang_code = caption_track.get('languageCode')
-                        if not lang_code:
-                            continue
                        process_language(
                            subtitles, base_url, lang_code, {})
                        continue
                    automatic_captions = {}
+                    process_language(
+                        automatic_captions, base_url, lang_code, {})
                    for translation_language in traverse_obj(pctr, (
-                            'translationLanguages', lambda _, v: v.get('languageCode'))):
+                            Ellipsis, 'translationLanguages', lambda _, v: v.get('languageCode'))):
                        translation_language_code = translation_language['languageCode']
                        process_language(
                            automatic_captions, base_url, translation_language_code,
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@ -678,7 +678,7 @@ class JSInterpreter(object):
            return len(obj)
        try:
            return obj[int(idx)] if isinstance(obj, list) else obj[compat_str(idx)]
-        except (TypeError, KeyError, IndexError) as e:
+        except (TypeError, KeyError, IndexError, ValueError) as e:
            # allow_undefined is None gives correct behaviour
            if allow_undefined or (
                    allow_undefined is None and not isinstance(e, TypeError)):
@ -1038,6 +1038,10 @@ class JSInterpreter(object):
                    left_val = self._index(left_val, idx)
            if isinstance(idx, float):
                idx = int(idx)
+            if isinstance(left_val, list) and len(left_val) <= int_or_none(idx, default=-1):
+                # JS Array is a sparsely assignable list
+                # TODO: handle extreme sparsity without memory bloat, eg using auxiliary dict
+                left_val.extend((idx - len(left_val) + 1) * [JS_Undefined])
            left_val[idx] = self._operator(
                m.group('op'), self._index(left_val, idx) if m.group('op') else None,
                m.group('expr'), expr, local_vars, allow_recursion)
@ -1204,9 +1208,10 @@ class JSInterpreter(object):
                elif member == 'join':
                    assertion(isinstance(obj, list), 'must be applied on a list')
                    assertion(len(argvals) <= 1, 'takes at most one argument')
-                    return (',' if len(argvals) == 0 else argvals[0]).join(
-                        ('' if x in (None, JS_Undefined) else _js_toString(x))
-                        for x in obj)
+                    return (',' if len(argvals) == 0 or argvals[0] in (None, JS_Undefined)
+                            else argvals[0]).join(
+                                ('' if x in (None, JS_Undefined) else _js_toString(x))
+                                for x in obj)
                elif member == 'reverse':
                    assertion(not argvals, 'does not take any arguments')
                    obj.reverse()
@ -1364,19 +1369,21 @@ class JSInterpreter(object):
        code, _ = self._separate_at_paren(func_m.group('code'))  # refine the match
        return self.build_arglist(func_m.group('args')), code

-    def extract_function(self, funcname):
+    def extract_function(self, funcname, *global_stack):
        return function_with_repr(
-            self.extract_function_from_code(*self.extract_function_code(funcname)),
+            self.extract_function_from_code(*itertools.chain(
+                self.extract_function_code(funcname), global_stack)),
            'F<%s>' % (funcname,))

    def extract_function_from_code(self, argnames, code, *global_stack):
        local_vars = {}

+        start = None
        while True:
-            mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code)
+            mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code[start:])
            if mobj is None:
                break
-            start, body_start = mobj.span()
+            start, body_start = ((start or 0) + x for x in mobj.span())
            body, remaining = self._separate_at_paren(code[body_start - 1:])
            name = self._named_object(local_vars, self.extract_function_from_code(
                [x.strip() for x in mobj.group('args').split(',')],
Author	SHA1	Message	Date
Olivier Trichet	86195979fb	Merge 72db2172897a76665414fc9da3fc79f096df9fab into 2b4fbfce25902d557b86b003cf48f738129efce4	2025-03-26 07:07:56 +00:00
dirkf	2b4fbfce25	[YouTube] Support player `4fcd6e4a` thx seproDev, bashonly: yt-dlp/yt-dlp#12748	2025-03-26 02:27:25 +00:00
dirkf	1bc45b8b6c	[JSInterp] Use `,` for join() with null/undefined argument Eg: [1,2,3].join(null) -> '1,2,3'	2025-03-25 22:35:06 +00:00
dirkf	b982d77d0b	[YouTube] Align signature tests with yt-dlp thx bashonly, yt-dlp/yt-dlp#12725	2025-03-25 22:35:06 +00:00
dirkf	c55dbf4838	[YouTube] Update signature extraction for players `643afba4`, `363db69b`	2025-03-25 22:35:06 +00:00
dirkf	087d865230	[YouTube] Support new player URL patterns	2025-03-25 22:35:06 +00:00
dirkf	a4fc1151f1	[JSInterp] Improve indexing * catch invalid list index with `ValueError` (eg [1, 2]['ab'] -> undefined) * allow assignment outside existing list (eg var l = [1,2]; l[9] = 0;)	2025-03-25 22:35:05 +00:00
dirkf	a464c159e6	[YouTube] Make `_extract_player_info()` use `_search_regex()`	2025-03-25 22:35:05 +00:00
dirkf	7dca08eff0	[YouTube] Also get original of translated automatic captions	2025-03-25 22:35:05 +00:00
dirkf	2239ee7965	[YouTube] Get subtitles/automatic captions from both web and API responses	2025-03-25 22:35:05 +00:00
Olivier Trichet	72db217289	[RadioFrance] Extractor fo thematic webradios	2022-12-22 14:22:19 -05:00
Olivier Trichet	fc933e686b	[RadioFrance] Refactoring	2022-12-22 13:01:10 -05:00
Olivier Trichet	ea02c40539	[RadioFrance] Extractor for podcast playlists	2022-12-22 13:00:54 -05:00
Olivier Trichet	7270ecf3d6	[RadioFrance] Extractor for podcast of Radio France stations	2022-12-22 13:00:17 -05:00
Olivier Trichet	dade9111f1	[RadioFrance] Remove old Radio France stations extractors These are not working anymore after their respectives websites were merged into www.radiofrance.fr.	2022-12-22 13:00:08 -05:00