Merge d4664a53467eade879971de3950424299f68bd3a into 2b4fbfce25902d557b86b003cf48f738129efce4

[YouTube] Support player 4fcd6e4a
thx seproDev, bashonly: yt-dlp/yt-dlp#12748
2025-07-13 06:54:15 +09:00 · 2025-03-26 08:12:20 +00:00 · 2025-03-26 02:27:25 +00:00 · 2025-03-25 22:35:06 +00:00 · 2025-03-25 22:35:06 +00:00 · 2025-03-25 22:35:06 +00:00
8 changed files with 1050 additions and 51 deletions
--- a/3
+++ b/3
@ -246,4 +246,5 @@ Enes Solak
 Nathan Rossi
 Thomas van der Berg
 Luca Cherubin
-Adrian Heine
+Adrian Heine
+Henrik Heimbuerger
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@ -84,6 +84,21 @@ _SIG_TESTS = [
        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
        '0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q',
    ),
+    (
+        'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
+        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+        'AAOAOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7vgpDL0QwbdV06sCIEzpWqMGkFR20CFOS21Tp-7vj_EMu-m37KtXJoOy1',
+    ),
+    (
+        'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
+        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+        '0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+    ),
+    (
+        'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
+        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+        'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
+    ),
 ]

 _NSIG_TESTS = [
@ -153,7 +168,7 @@ _NSIG_TESTS = [
    ),
    (
        'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
-        '-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg',
+        'M92UUMHa8PdvPd3wyM', '3hPqLJsiNZx7yA',
    ),
    (
        'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
@ -173,7 +188,7 @@ _NSIG_TESTS = [
    ),
    (
        'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
-        'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
+        'aCi3iElgd2kq0bxVbQ', 'QX1y8jGb2IbZ0w',
    ),
    (
        'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
@ -231,10 +246,6 @@ _NSIG_TESTS = [
        'https://www.youtube.com/s/player/f6e09c70/player_ias_tce.vflset/en_US/base.js',
        'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
    ),
-    (
-        'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
-        'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
-    ),
    (
        'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
        'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
@ -259,6 +270,22 @@ _NSIG_TESTS = [
        'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
        'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
    ),
+    (
+        'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
+        'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
+    ),
+    (
+        'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
+        'eWYu5d5YeY_4LyEDc', 'XJQqf-N7Xra3gg',
+    ),
+    (
+        'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
+        'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
+    ),
+    (
+        'https://www.youtube.com/s/player/4fcd6e4a/tv-player-ias.vflset/tv-player-ias.js',
+        'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
+    ),
 ]


@ -271,6 +298,8 @@ class TestPlayerInfo(unittest.TestCase):
            ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
            ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'),
            ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'),
+            ('https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js', 'e7567ecf'),
+            ('https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js', '643afba4'),
            # obsolete
            ('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
            ('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
@ -280,8 +309,9 @@ class TestPlayerInfo(unittest.TestCase):
            ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
            ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
        )
+        ie = YoutubeIE(FakeYDL({'cachedir': False}))
        for player_url, expected_player_id in PLAYER_URLS:
-            player_id = YoutubeIE._extract_player_info(player_url)
+            player_id = ie._extract_player_info(player_url)
            self.assertEqual(player_id, expected_player_id)


@ -301,8 +331,8 @@ class TestSignature(unittest.TestCase):
 def t_factory(name, sig_func, url_pattern):
    def make_tfunc(url, sig_input, expected_sig):
        m = url_pattern.match(url)
-        assert m, '%r should follow URL format' % url
-        test_id = m.group('id')
+        assert m, '{0!r} should follow URL format'.format(url)
+        test_id = re.sub(r'[/.-]', '_', m.group('id') or m.group('compat_id'))

        def test_func(self):
            basename = 'player-{0}-{1}.js'.format(name, test_id)
@ -335,12 +365,16 @@ def n_sig(jscode, sig_input):


 make_sig_test = t_factory(
-    'signature', signature, re.compile(r'.*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$'))
+    'signature', signature,
+    re.compile(r'''(?x)
+        .+/(?P<h5>html5)?player(?(h5)(?:-en_US)?-|/)(?P<id>[a-zA-Z0-9/._-]+)
+        (?(h5)/(?:watch_as3|html5player))?\.js$
+    '''))
 for test_spec in _SIG_TESTS:
    make_sig_test(*test_spec)

 make_nsig_test = t_factory(
-    'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_-]+)/.+.js$'))
+    'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_/.-]+)\.js$'))
 for test_spec in _NSIG_TESTS:
    make_nsig_test(*test_spec)

--- a/youtube_dl/extractor/art19.py
+++ b/youtube_dl/extractor/art19.py
@ -0,0 +1,315 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    float_or_none,
+    int_or_none,
+    merge_dicts,
+    parse_iso8601,
+    str_or_none,
+    T,
+    traverse_obj,
+    url_or_none,
+)
+
+
+class Art19IE(InfoExtractor):
+    _UUID_REGEX = r'[\da-f]{8}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{12}'
+    _VALID_URL = (
+        r'https?://(?:www\.)?art19\.com/shows/[^/#?]+/episodes/(?P<id>{0})'.format(_UUID_REGEX),
+        r'https?://rss\.art19\.com/episodes/(?P<id>{0})\.mp3'.format(_UUID_REGEX),
+    )
+    _EMBED_REGEX = (r'<iframe\b[^>]+\bsrc\s*=\s*[\'"](?P<url>{0})'.format(_VALID_URL[0]),)
+
+    _TESTS = [{
+        'url': 'https://rss.art19.com/episodes/5ba1413c-48b8-472b-9cc3-cfd952340bdb.mp3',
+        'info_dict': {
+            'id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
+            'ext': 'mp3',
+            'title': 'Why Did DeSantis Drop Out?',
+            'series': 'The Daily Briefing',
+            'release_timestamp': 1705941275,
+            'description': 'md5:da38961da4a3f7e419471365e3c6b49f',
+            'episode': 'Episode 582',
+            'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
+            'series_id': 'ed52a0ab-08b1-4def-8afc-549e4d93296d',
+            'upload_date': '20240122',
+            'timestamp': 1705940815,
+            'episode_number': 582,
+            # 'modified_date': '20240122',
+            'episode_id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
+            'modified_timestamp': int,
+            'release_date': '20240122',
+            'duration': 527.4,
+        },
+    }, {
+        'url': 'https://art19.com/shows/scamfluencers/episodes/8319b776-4153-4d22-8630-631f204a03dd',
+        'info_dict': {
+            'id': '8319b776-4153-4d22-8630-631f204a03dd',
+            'ext': 'mp3',
+            'title': 'Martha Stewart: The Homemaker Hustler Part 2',
+            # 'modified_date': '20240116',
+            'upload_date': '20240105',
+            'modified_timestamp': int,
+            'episode_id': '8319b776-4153-4d22-8630-631f204a03dd',
+            'series_id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
+            'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
+            'description': r're:(?s)In the summer of 2003, Martha Stewart is indicted .{695}#do-not-sell-my-info\.$',
+            'release_timestamp': 1705305660,
+            'release_date': '20240115',
+            'timestamp': 1704481536,
+            'episode_number': 88,
+            'series': 'Scamfluencers',
+            'duration': 2588.37501,
+            'episode': 'Episode 88',
+        },
+    }]
+    _WEBPAGE_TESTS = [{
+        'url': 'https://www.nu.nl/formule-1/6291456/verstappen-wordt-een-synoniem-voor-formule-1.html',
+        'info_dict': {
+            'id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
+            'ext': 'mp3',
+            'title': "'Verstappen wordt een synoniem voor Formule 1'",
+            'season': 'Seizoen 6',
+            'description': 'md5:39a7159a31c4cda312b2e893bdd5c071',
+            'episode_id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
+            'duration': 3061.82111,
+            'series_id': '93f4e113-2a60-4609-a564-755058fa40d8',
+            'release_date': '20231126',
+            'modified_timestamp': 1701156004,
+            'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
+            'season_number': 6,
+            'episode_number': 52,
+            # 'modified_date': '20231128',
+            'upload_date': '20231126',
+            'timestamp': 1701025981,
+            'season_id': '36097c1e-7455-490d-a2fe-e2f10b4d5f26',
+            'series': 'De Boordradio',
+            'release_timestamp': 1701026308,
+            'episode': 'Episode 52',
+        },
+    }, {
+        'url': 'https://www.wishtv.com/podcast-episode/larry-bucshon-announces-retirement-from-congress/',
+        'info_dict': {
+            'id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
+            'ext': 'mp3',
+            'title': 'Larry Bucshon announces retirement from congress',
+            'upload_date': '20240115',
+            'episode_number': 148,
+            'episode': 'Episode 148',
+            'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
+            'release_date': '20240115',
+            'timestamp': 1705328205,
+            'release_timestamp': 1705329275,
+            'series': 'All INdiana Politics',
+            # 'modified_date': '20240117',
+            'modified_timestamp': 1705458901,
+            'series_id': 'c4af6c27-b10f-4ff2-9f84-0f407df86ff1',
+            'episode_id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
+            'description': 'md5:53b5239e4d14973a87125c217c255b2a',
+            'duration': 1256.18848,
+        },
+    }]
+
+    @classmethod
+    def _extract_embed_urls(cls, url, webpage):
+        for from_ in super(Art19IE, cls)._extract_embed_urls(url, webpage):
+            yield from_
+        for episode_id in re.findall(
+                r'<div\b[^>]+\bclass\s*=\s*[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-episode-id=[\'"]({0})[\'"]'.format(cls._UUID_REGEX), webpage):
+            yield 'https://rss.art19.com/episodes/{0}.mp3'.format(episode_id)
+
+    def _real_extract(self, url):
+        episode_id = self._match_id(url)
+
+        player_metadata = self._download_json(
+            'https://art19.com/episodes/{0}'.format(episode_id), episode_id,
+            note='Downloading player metadata', fatal=False,
+            headers={'Accept': 'application/vnd.art19.v0+json'})
+        rss_metadata = self._download_json(
+            'https://rss.art19.com/episodes/{0}.json'.format(episode_id), episode_id,
+            fatal=False, note='Downloading RSS metadata')
+
+        formats = [{
+            'format_id': 'direct',
+            'url': 'https://rss.art19.com/episodes/{0}.mp3'.format(episode_id),
+            'vcodec': 'none',
+            'acodec': 'mp3',
+        }]
+        for fmt_id, fmt_data in traverse_obj(rss_metadata, (
+                'content', 'media', T(dict.items),
+                lambda _, k_v: k_v[0] != 'waveform_bin' and k_v[1].get('url'))):
+            fmt_url = url_or_none(fmt_data['url'])
+            if not fmt_url:
+                continue
+            formats.append({
+                'format_id': fmt_id,
+                'url': fmt_url,
+                'vcodec': 'none',
+                'acodec': fmt_id,
+                'quality': -2 if fmt_id == 'ogg' else -1,
+            })
+
+        self._sort_formats(formats)
+
+        return merge_dicts({
+            'id': episode_id,
+            'formats': formats,
+        }, traverse_obj(player_metadata, ('episode', {
+            'title': ('title', T(str_or_none)),
+            'description': ('description_plain', T(str_or_none)),
+            'episode_id': ('id', T(str_or_none)),
+            'episode_number': ('episode_number', T(int_or_none)),
+            'season_id': ('season_id', T(str_or_none)),
+            'series_id': ('series_id', T(str_or_none)),
+            'timestamp': ('created_at', T(parse_iso8601)),
+            'release_timestamp': ('released_at', T(parse_iso8601)),
+            'modified_timestamp': ('updated_at', T(parse_iso8601)),
+        })), traverse_obj(rss_metadata, ('content', {
+            'title': ('episode_title', T(str_or_none)),
+            'description': ('episode_description_plain', T(str_or_none)),
+            'episode_id': ('episode_id', T(str_or_none)),
+            'episode_number': ('episode_number', T(int_or_none)),
+            'season': ('season_title', T(str_or_none)),
+            'season_id': ('season_id', T(str_or_none)),
+            'season_number': ('season_number', T(int_or_none)),
+            'series': ('series_title', T(str_or_none)),
+            'series_id': ('series_id', T(str_or_none)),
+            'thumbnail': ('cover_image', T(url_or_none)),
+            'duration': ('duration', T(float_or_none)),
+        })), rev=True)
+
+
+class Art19ShowIE(InfoExtractor):
+    IE_DESC = 'Art19 series'
+    _VALID_URL_BASE = r'https?://(?:www\.)?art19\.com/shows/(?P<id>[\w-]+)(?:/embed)?/?'
+    _VALID_URL = (
+        r'{0}(?:$|[#?])'.format(_VALID_URL_BASE),
+        r'https?://rss\.art19\.com/(?P<id>[\w-]+)/?(?:$|[#?])',
+    )
+    _EMBED_REGEX = (r'<iframe[^>]+\bsrc=[\'"](?P<url>{0}[^\'"])'.format(_VALID_URL_BASE),)
+
+    _TESTS = [{
+        'url': 'https://www.art19.com/shows/5898c087-a14f-48dc-b6fc-a2280a1ff6e0/',
+        'info_dict': {
+            '_type': 'playlist',
+            'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
+            'display_id': 'echt-gebeurd',
+            'title': 'Echt Gebeurd',
+            'description': r're:(?us)Bij\sEcht Gebeurd\svertellen mensen .{1166} Eline Veldhuisen\.$',
+            'timestamp': 1492642167,
+            # 'upload_date': '20170419',
+            'modified_timestamp': int,
+            # 'modified_date': str,
+            'tags': 'count:7',
+        },
+        'playlist_mincount': 425,
+    }, {
+        'url': 'https://rss.art19.com/scamfluencers',
+        'info_dict': {
+            '_type': 'playlist',
+            'id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
+            'display_id': 'scamfluencers',
+            'title': 'Scamfluencers',
+            'description': r're:(?s)You never really know someone\b.{1078} wondery\.com/links/scamfluencers/ now\.$',
+            'timestamp': 1647368573,
+            # 'upload_date': '20220315',
+            'modified_timestamp': int,
+            # 'modified_date': str,
+            'tags': [],
+        },
+        'playlist_mincount': 90,
+    }, {
+        'url': 'https://art19.com/shows/enthuellt/embed',
+        'info_dict': {
+            '_type': 'playlist',
+            'id': 'e2cacf57-bb8a-4263-aa81-719bcdd4f80c',
+            'display_id': 'enthuellt',
+            'title': 'Enthüllt',
+            'description': 'md5:17752246643414a2fd51744fc9a1c08e',
+            'timestamp': 1601645860,
+            # 'upload_date': '20201002',
+            'modified_timestamp': int,
+            # 'modified_date': str,
+            'tags': 'count:10',
+        },
+        'playlist_mincount': 10,
+        'skip': 'Content not found',
+    }]
+    _WEBPAGE_TESTS = [{
+        'url': 'https://deconstructingyourself.com/deconstructing-yourself-podcast',
+        'info_dict': {
+            '_type': 'playlist',
+            'id': 'cfbb9b01-c295-4adb-8726-adde7c03cf21',
+            'display_id': 'deconstructing-yourself',
+            'title': 'Deconstructing Yourself',
+            'description': 'md5:dab5082b28b248a35476abf64768854d',
+            'timestamp': 1570581181,
+            # 'upload_date': '20191009',
+            'modified_timestamp': int,
+            # 'modified_date': str,
+            'tags': 'count:5',
+        },
+        'playlist_mincount': 80,
+    }, {
+        'url': 'https://chicagoreader.com/columns-opinion/podcasts/ben-joravsky-show-podcast-episodes/',
+        'info_dict': {
+            '_type': 'playlist',
+            'id': '9dfa2c37-ab87-4c13-8388-4897914313ec',
+            'display_id': 'the-ben-joravsky-show',
+            'title': 'The Ben Joravsky Show',
+            'description': 'md5:c0f3ec0ee0dbea764390e521adc8780a',
+            'timestamp': 1550875095,
+            # 'upload_date': '20190222',
+            'modified_timestamp': int,
+            # 'modified_date': str,
+            'tags': ['Chicago Politics', 'chicago', 'Ben Joravsky'],
+        },
+        'playlist_mincount': 1900,
+    }]
+
+    @classmethod
+    def _extract_embed_urls(cls, url, webpage):
+        for from_ in super(Art19ShowIE, cls)._extract_embed_urls(url, webpage):
+            yield from_
+        for series_id in re.findall(
+                r'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-series-id=[\'"]([\w-]+)[\'"]', webpage):
+            yield 'https://art19.com/shows/{0}'.format(series_id)
+
+    def _real_extract(self, url):
+        series_id = self._match_id(url)
+        for expected in ((403, 404), None):
+            series_metadata, urlh = self._download_json_handle(
+                'https://art19.com/series/{0}'.format(series_id), series_id, note='Downloading series metadata',
+                headers={'Accept': 'application/vnd.art19.v0+json'},
+                expected_status=(403, 404))
+            if urlh.getcode() == 403:
+                # raise the actual problem with the page
+                urlh = self._request_webpage(url, series_id, expected_status=404)
+                if urlh.getcode() == 404:
+                    raise ExtractorError(
+                        'content not found, possibly expired',
+                        video_id=series_id, expected=True)
+            if urlh.getcode() not in (expected or []):
+                # apparently OK
+                break
+
+        return merge_dicts(
+            self.playlist_result((
+                self.url_result('https://rss.art19.com/episodes/{0}.mp3'.format(episode_id), Art19IE)
+                for episode_id in traverse_obj(series_metadata, ('series', 'episode_ids', Ellipsis, T(str_or_none))))),
+            traverse_obj(series_metadata, ('series', {
+                'id': ('id', T(str_or_none)),
+                'display_id': ('slug', T(str_or_none)),
+                'title': ('title', T(str_or_none)),
+                'description': ('description_plain', T(str_or_none)),
+                'timestamp': ('created_at', T(parse_iso8601)),
+                'modified_timestamp': ('updated_at', T(parse_iso8601)),
+            })),
+            traverse_obj(series_metadata, {
+                'tags': ('tags', Ellipsis, 'name', T(str_or_none)),
+            }, {'tags': T(lambda _: [])}))
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -71,13 +71,17 @@ from .ard import (
    ARDIE,
    ARDMediathekIE,
 )
+from .art19 import (
+    Art19IE,
+    Art19ShowIE,
+)
+from .arnes import ArnesIE
 from .arte import (
    ArteTVIE,
    ArteTVEmbedIE,
    ArteTVPlaylistIE,
    ArteTVCategoryIE,
 )
-from .arnes import ArnesIE
 from .asiancrush import (
    AsianCrushIE,
    AsianCrushPlaylistIE,
@ -776,7 +780,12 @@ from .ndr import (
    NJoyEmbedIE,
 )
 from .ndtv import NDTVIE
-from .netzkino import NetzkinoIE
+from .nebula import (
+    NebulaIE,
+    NebulaChannelIE,
+    NebulaClassIE,
+    NebulaSubscriptionsIE,
+)
 from .nerdcubed import NerdCubedFeedIE
 from .neteasemusic import (
    NetEaseMusicIE,
@ -787,6 +796,7 @@ from .neteasemusic import (
    NetEaseMusicProgramIE,
    NetEaseMusicDjRadioIE,
 )
+from .netzkino import NetzkinoIE
 from .newgrounds import (
    NewgroundsIE,
    NewgroundsPlaylistIE,
--- a/youtube_dl/extractor/nebula.py
+++ b/youtube_dl/extractor/nebula.py
@ -0,0 +1,574 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .art19 import Art19IE
+from .common import InfoExtractor
+from ..compat import (
+    compat_HTTPError as HTTPError,
+    compat_kwargs,
+    compat_str as str,
+)
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    json_stringify,
+    # make_archive_id,
+    merge_dicts,
+    parse_iso8601,
+    smuggle_url,
+    str_or_none,
+    T,
+    traverse_obj,
+    try_call,
+    unsmuggle_url,
+    update_url,
+    url_basename,
+    url_or_none,
+    urljoin,
+)
+
+_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
+
+
+class NebulaBaseIE(InfoExtractor):
+    _NETRC_MACHINE = 'watchnebula'
+    _token = _api_token = None
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        if not self._api_token:
+            self._api_token = try_call(
+                lambda: self._get_cookies('https://nebula.tv')['nebula_auth.apiToken'].value)
+        self._token = self._download_json(
+            'https://users.api.nebula.app/api/v1/authorization/', None,
+            headers={'Authorization': 'Token {0}'.format(self._api_token)} if self._api_token else {},
+            note='Authorizing to Nebula', data=b'')['token']
+        if self._token:
+            return
+
+        username, password = self._get_login_info()
+        if username is None:
+            return
+        self._perform_login(username, password)
+
+    def _perform_login(self, username, password):
+        try:
+            response = self._download_json(
+                'https://nebula.tv/auth/login/', None,
+                'Logging in to Nebula', 'Login failed',
+                data=json_stringify({'email': username, 'password': password}),
+                headers={'content-type': 'application/json'})
+        except ExtractorError as e:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 400:
+                raise ExtractorError('Login failed: Invalid username or password', expected=True)
+            raise
+        self._api_token = traverse_obj(response, ('key', T(str)))
+        if not self._api_token:
+            raise ExtractorError('Login failed: No token')
+
+    def _call_api(self, *args, **kwargs):
+
+        def kwargs_set_token(kw):
+            kw.setdefault('headers', {})['Authorization'] = 'Bearer {0}'.format(self._token)
+            return compat_kwargs(kw)
+
+        if self._token:
+            kwargs = kwargs_set_token(kwargs)
+        try:
+            return self._download_json(*args, **kwargs)
+        except ExtractorError as e:
+            if not isinstance(e.cause, HTTPError) or e.cause.status not in (401, 403):
+                raise
+            self.to_screen(
+                'Reauthorizing with Nebula and retrying, because last API '
+                'call resulted in error {0}'.format(e.cause.status))
+            self._real_initialize()
+            if self._token:
+                kwargs = kwargs_set_token(kwargs)
+            return self._download_json(*args, **kwargs)
+
+    def _extract_formats(self, content_id, slug):
+        for retry in (False, True):
+            try:
+                # fmts, subs = self._extract_m3u8_formats_and_subtitles(
+                fmts, subs = self._extract_m3u8_formats(
+                    'https://content.api.nebula.app/{0}s/{1}/manifest.m3u8'.format(
+                        content_id.split(':', 1)[0], content_id),
+                    slug, 'mp4', query={
+                        'token': self._token,
+                        'app_version': '23.10.0',
+                        'platform': 'ios',
+                    }), {}
+                self._sort_formats(fmts)
+                return {'formats': fmts, 'subtitles': subs}
+            except ExtractorError as e:
+                if not isinstance(e.cause, HTTPError):
+                    raise
+                if e.cause.status == 401:
+                    self.raise_login_required()
+                if not retry and e.cause.status == 403:
+                    self.to_screen('Reauthorizing with Nebula and retrying, because fetching video resulted in error')
+                    self._real_initialize()
+                    continue
+                raise
+
+    def _extract_video_metadata(self, episode):
+        channel_url = traverse_obj(
+            episode, (('channel_slug', 'class_slug'), T(lambda u: urljoin('https://nebula.tv/', u))), get_all=False)
+        return merge_dicts({
+            'id': episode['id'].partition(':')[2],
+            'title': episode['title'],
+            'channel_url': channel_url,
+            'uploader_url': channel_url,
+        }, traverse_obj(episode, {
+            'display_id': 'slug',
+            'description': 'description',
+            'timestamp': ('published_at', T(parse_iso8601)),
+            'duration': ('duration', T(int_or_none)),
+            'channel_id': 'channel_slug',
+            'uploader_id': 'channel_slug',
+            'channel': 'channel_title',
+            'uploader': 'channel_title',
+            'series': 'channel_title',
+            'creator': 'channel_title',
+            'thumbnail': ('images', 'thumbnail', 'src', T(url_or_none)),
+            'episode_number': ('order', T(int_or_none)),
+
+            # Old code was wrongly setting extractor_key from NebulaSubscriptionsIE
+            # '_old_archive_ids': ('zype_id', {lambda x: [
+            #    make_archive_id(NebulaIE, x), make_archive_id(NebulaSubscriptionsIE, x)] if x else None}),
+        }))
+
+
+class NebulaIE(NebulaBaseIE):
+    IE_NAME = 'nebula:video'
+    _VALID_URL = r'{0}/videos/(?P<id>[\w-]+)'.format(_BASE_URL_RE)
+    _TESTS = [{
+        'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
+        'info_dict': {
+            'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
+            'ext': 'mp4',
+            'title': 'That Time Disney Remade Beauty and the Beast',
+            'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
+            'upload_date': '20180731',
+            'timestamp': 1533009600,
+            'channel': 'Lindsay Ellis',
+            'channel_id': 'lindsayellis',
+            'uploader': 'Lindsay Ellis',
+            'uploader_id': 'lindsayellis',
+            'uploader_url': r're:https://nebula\.(tv|app)/lindsayellis',
+            'series': 'Lindsay Ellis',
+            'display_id': 'that-time-disney-remade-beauty-and-the-beast',
+            'channel_url': r're:https://nebula\.(tv|app)/lindsayellis',
+            'creator': 'Lindsay Ellis',
+            'duration': 2212,
+            'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
+            # '_old_archive_ids': ['nebula 5c271b40b13fd613090034fd', 'nebulasubscriptions 5c271b40b13fd613090034fd'],
+        },
+        'params': {
+            'format': 'bestvideo',
+            'skip_download': 'm3u8',
+        },
+    }, {
+        'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
+        'md5': 'd05739cf6c38c09322422f696b569c23',
+        'info_dict': {
+            'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
+            'ext': 'mp4',
+            'title': 'Landing Craft - How The Allies Got Ashore',
+            'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
+            'upload_date': '20200327',
+            'timestamp': 1585348140,
+            'channel': 'Real Engineering — The Logistics of D-Day',
+            'channel_id': 'd-day',
+            'uploader': 'Real Engineering — The Logistics of D-Day',
+            'uploader_id': 'd-day',
+            'series': 'Real Engineering — The Logistics of D-Day',
+            'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
+            'creator': 'Real Engineering — The Logistics of D-Day',
+            'duration': 841,
+            'channel_url': 'https://nebula.tv/d-day',
+            'uploader_url': 'https://nebula.tv/d-day',
+            'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
+            # '_old_archive_ids': ['nebula 5e7e78171aaf320001fbd6be', 'nebulasubscriptions 5e7e78171aaf320001fbd6be'],
+        },
+        'params': {
+            'format': 'bestvideo',
+            'skip_download': 'm3u8',
+        },
+        'skip': 'Only available for registered users',
+    }, {
+        'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
+        'md5': 'ebe28a7ad822b9ee172387d860487868',
+        'info_dict': {
+            'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
+            'ext': 'mp4',
+            'title': 'Episode 1: The Draw',
+            'description': r'contains:There’s free money on offer… if the players can all work together.',
+            'upload_date': '20200323',
+            'timestamp': 1584980400,
+            'channel': 'Tom Scott Presents: Money',
+            'channel_id': 'tom-scott-presents-money',
+            'uploader': 'Tom Scott Presents: Money',
+            'uploader_id': 'tom-scott-presents-money',
+            'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
+            'duration': 825,
+            'channel_url': 'https://nebula.tv/tom-scott-presents-money',
+            'series': 'Tom Scott Presents: Money',
+            'display_id': 'money-episode-1-the-draw',
+            'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
+            # '_old_archive_ids': ['nebula 5e779ebdd157bc0001d1c75a', 'nebulasubscriptions 5e779ebdd157bc0001d1c75a'],
+        },
+        'params': {
+            'format': 'bestvideo',
+            'skip_download': 'm3u8',
+        },
+        'skip': 'Only available for registered users',
+    }, {
+        'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
+        'only_matching': True,
+    }, {
+        'url': 'https://nebula.tv/videos/tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
+        'info_dict': {
+            'id': 'e389af9d-1dab-44f2-8788-ee24deb7ff0d',
+            'ext': 'mp4',
+            'display_id': 'tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
+            'title': 'Did the US Really Blow Up the NordStream Pipelines?',
+            'description': 'md5:b4e2a14e3ff08f546a3209c75261e789',
+            'upload_date': '20230223',
+            'timestamp': 1677144070,
+            'channel': 'TLDR News EU',
+            'channel_id': 'tldrnewseu',
+            'uploader': 'TLDR News EU',
+            'uploader_id': 'tldrnewseu',
+            'uploader_url': r're:https://nebula\.(tv|app)/tldrnewseu',
+            'duration': 524,
+            'channel_url': r're:https://nebula\.(tv|app)/tldrnewseu',
+            'series': 'TLDR News EU',
+            'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
+            'creator': 'TLDR News EU',
+            # '_old_archive_ids': ['nebula 63f64c74366fcd00017c1513', 'nebulasubscriptions 63f64c74366fcd00017c1513'],
+        },
+        'params': {
+            'format': 'bestvideo',
+            'skip_download': 'm3u8',
+        },
+    }, {
+        'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        slug = self._match_id(url)
+        url, smuggled_data = unsmuggle_url(url, {})
+        if smuggled_data.get('id'):
+            return merge_dicts({
+                'id': smuggled_data['id'],
+                'display_id': slug,
+                'title': '',
+            }, self._extract_formats(smuggled_data['id'], slug))
+
+        metadata = self._call_api(
+            'https://content.api.nebula.app/content/videos/{0}'.format(slug),
+            slug, note='Fetching video metadata')
+        return merge_dicts(
+            self._extract_video_metadata(metadata),
+            self._extract_formats(metadata['id'], slug),
+            rev=True
+        )
+
+
+class NebulaClassIE(NebulaBaseIE):
+    IE_NAME = 'nebula:media'
+    _VALID_URL = r'{0}/(?!(?:myshows|library|videos)/)(?P<id>[\w-]+)/(?P<ep>[\w-]+)/?(?:$|[?#])'.format(_BASE_URL_RE)
+    _TESTS = [{
+        'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
+        'info_dict': {
+            'id': 'd7432cdc-c608-474d-942c-f74345daed7b',
+            'ext': 'mp4',
+            'display_id': '14',
+            'channel_url': 'https://nebula.tv/copyright-for-fun-and-profit',
+            'episode_number': 14,
+            'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
+            'uploader_url': 'https://nebula.tv/copyright-for-fun-and-profit',
+            'duration': 646,
+            'episode': 'Episode 14',
+            'title': 'Photos, Sculpture, and Video',
+        },
+        'params': {
+            'format': 'bestvideo',
+            'skip_download': 'm3u8',
+        },
+        'skip': 'Only available for registered users',
+    }, {
+        'add_ies': [Art19IE],
+        'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
+        'info_dict': {
+            'ext': 'mp3',
+            'id': '83ef3b53-049e-4211-b34e-7bb518e67d64',
+            'description': r"re:(?s)20 years ago, what was previously the Soviet Union's .{467}#do-not-sell-my-info\.$",
+            'series_id': 'e0223cfc-f39c-4ad4-8724-bd8731bd31b5',
+            'modified_timestamp': 1629410982,
+            'episode_id': '83ef3b53-049e-4211-b34e-7bb518e67d64',
+            'series': 'Extremities',
+            # 'modified_date': '20200903',
+            'upload_date': '20200902',
+            'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
+            'release_timestamp': 1571237958,
+            'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
+            'duration': 1546.05714,
+            'timestamp': 1599085555,
+            'release_date': '20191016',
+        },
+    }, {
+        'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
+        'info_dict': {
+            'ext': 'mp3',
+            'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
+            'episode_number': 1,
+            'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
+            'release_date': '20230304',
+            'modified_date': '20230403',
+            'series': 'The Layover',
+            'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
+            'modified_timestamp': 1680554566,
+            'duration': 3130.46401,
+            'release_timestamp': 1677943800,
+            'title': 'The Layover — Episode 1',
+            'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
+            'upload_date': '20230303',
+            'episode': 'Episode 1',
+            'timestamp': 1677883672,
+            'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
+        },
+        'params': {
+            'format': 'bestvideo',
+            'skip_download': 'm3u8',
+        },
+        'skip': 'Only available for registered users',
+    }]
+
+    def _real_extract(self, url):
+        slug, episode = self._match_valid_url(url).group('id', 'ep')
+        url, smuggled_data = unsmuggle_url(url, {})
+        if smuggled_data.get('id'):
+            return merge_dicts({
+                'id': smuggled_data['id'],
+                'display_id': slug,
+                'title': '',
+            }, self._extract_formats(smuggled_data['id'], slug))
+
+        metadata = self._call_api(
+            'https://content.api.nebula.app/content/{0}/{1}/?include=lessons'.format(
+                slug, episode),
+            slug, note='Fetching class/podcast metadata')
+        content_type = traverse_obj(metadata, 'type')
+        if content_type == 'lesson':
+            return merge_dicts(
+                self._extract_video_metadata(metadata),
+                self._extract_formats(metadata['id'], slug))
+        elif content_type == 'podcast_episode':
+            episode_url = metadata.get('episode_url')
+            if not episode_url and metadata.get('premium'):
+                self.raise_login_required()
+
+            if Art19IE.suitable(episode_url):
+                return self.url_result(episode_url, Art19IE.ie_key())
+            return merge_dicts({
+                'id': metadata['id'],
+                'title': metadata['title'],
+            }, traverse_obj(metadata, {
+                'url': ('episode_url', T(url_or_none)),
+                'description': ('description', T(str_or_none)),
+                'timestamp': ('published_at', T(parse_iso8601)),
+                'duration': ('duration', T(int_or_none)),
+                'channel_id': ('channel_id', T(str_or_none)),
+                'channel': ('channel_title', T(str_or_none)),
+                'thumbnail': ('assets', 'regular', T(url_or_none)),
+            }))
+
+        raise ExtractorError('Unexpected content type {0!r}'.format(content_type))
+
+
+class NebulaPlaylistBaseIE(NebulaBaseIE):
+    _BASE_API_URL = 'https://content.api.nebula.app/'
+    _API_QUERY = {'ordering': '-published_at'}
+
+    @classmethod
+    def _get_api_url(cls, item_id, path='/video_episodes/'):
+        return update_url(cls._BASE_API_URL, path=path, query_update=cls._API_QUERY)
+
+    @staticmethod
+    def _get_episode_url(episode, episode_id):
+        return 'https://nebula.tv/videos/{0}'.format(episode_id)
+
+    @classmethod
+    def url_result(cls, url, *args, **kwargs):
+        url_transparent = kwargs.pop('url_transparent', False)
+        smuggled_data = kwargs.pop('smuggled_data', None)
+        if smuggled_data:
+            url = smuggle_url(url, smuggled_data)
+        ie_key = args[0] if len(args) > 0 else kwargs.get('ie_key')
+        if not ie_key:
+            args = (NebulaIE.ie_key(),) + args
+        return merge_dicts(
+            {'_type': 'url_transparent'} if url_transparent else {},
+            super(NebulaPlaylistBaseIE, cls).url_result(url, *args),
+            **kwargs)
+
+    def _generate_playlist_entries(self, pl_id=None, slug=None, dl_note=None):
+        next_url = self._get_api_url(pl_id)
+        if dl_note is None:
+            dl_note = self.IE_NAME.rpartition(':')[::2]
+            if dl_note[0] and dl_note[1]:
+                dl_note = '{0} '.format(dl_note[1])
+            else:
+                dl_note = ''
+        slug = slug or pl_id
+        for page_num in itertools.count(1):
+            episodes = self._call_api(
+                next_url, slug, note='Retrieving {0}page {1}'.format(
+                    dl_note, page_num))
+            for episode in traverse_obj(episodes, ('results', Ellipsis)):
+                metadata = self._extract_video_metadata(episode)
+                yield self.url_result(
+                    self._get_episode_url(episode, metadata['display_id']),
+                    smuggled_data={'id': episode['id']}, url_transparent=True,
+                    **metadata)
+            next_url = episodes.get('next')
+            if not next_url:
+                break
+
+
+class NebulaSubscriptionsIE(NebulaPlaylistBaseIE):
+    IE_NAME = 'nebula:subscriptions'
+    _VALID_URL = r'{0}/myshows'.format(_BASE_URL_RE)
+    _API_QUERY = {
+        'following': 'true',
+        'include': 'engagement',
+        'ordering': '-published_at',
+    }
+    _TESTS = [{
+        'url': 'https://nebula.tv/myshows',
+        'playlist_mincount': 1,
+        'info_dict': {
+            'id': 'myshows',
+        },
+        'skip': 'You must be logged in to find your subscriptions',
+    }]
+
+    def _call_api(self, *args, **kwargs):
+
+        try:
+            return super(NebulaSubscriptionsIE, self)._call_api(*args, **kwargs)
+        except ExtractorError as e:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 400:
+                self.raise_login_required('You must be logged in to find your subscriptions')
+            raise
+
+    def _real_extract(self, url):
+        slug = url_basename(url)
+        return self.playlist_result(self._generate_playlist_entries(slug), slug)
+
+
+class NebulaChannelIE(NebulaPlaylistBaseIE):
+    IE_NAME = 'nebula:channel'
+    _VALID_URL = r'{0}/(?!myshows|library|videos)(?P<id>[\w-]+)/?(?:$|[?#])'.format(_BASE_URL_RE)
+    _TESTS = [{
+        'url': 'https://nebula.tv/tom-scott-presents-money',
+        'info_dict': {
+            'id': 'tom-scott-presents-money',
+            'title': 'Tom Scott Presents: Money',
+            'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
+        },
+        'playlist_count': 5,
+    }, {
+        'url': 'https://nebula.tv/lindsayellis',
+        'info_dict': {
+            'id': 'lindsayellis',
+            'title': 'Lindsay Ellis',
+            'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
+        },
+        'playlist_mincount': 2,
+    }, {
+        'url': 'https://nebula.tv/johnnyharris',
+        'info_dict': {
+            'id': 'johnnyharris',
+            'title': 'Johnny Harris',
+            'description': 'I make videos about maps and many other things.',
+        },
+        'playlist_mincount': 90,
+    }, {
+        'url': 'https://nebula.tv/copyright-for-fun-and-profit',
+        'info_dict': {
+            'id': 'copyright-for-fun-and-profit',
+            'title': 'Copyright for Fun and Profit',
+            'description': 'md5:6690248223eed044a9f11cd5a24f9742',
+        },
+        'playlist_count': 23,
+    }, {
+        'url': 'https://nebula.tv/trussissuespodcast',
+        'info_dict': {
+            'id': 'trussissuespodcast',
+            'title': 'Bite the Ballot',
+            'description': 'md5:a08c4483bc0b705881d3e0199e721385',
+        },
+        'playlist_mincount': 80,
+    }]
+
+    @classmethod
+    def _get_api_url(cls, item_id, path='/video_channels/{0}/video_episodes/'):
+        return super(NebulaChannelIE, cls)._get_api_url(
+            item_id, path=path.format(item_id))
+
+    @classmethod
+    def _get_episode_url(cls, episode, episode_id):
+        return (
+            episode.get('share_url')
+            or super(NebulaChannelIE, cls)._get_episode_url(episode, episode_id))
+
+    def _generate_class_entries(self, channel):
+        for lesson in traverse_obj(channel, ('lessons', Ellipsis)):
+            metadata = self._extract_video_metadata(lesson)
+            yield self.url_result(
+                lesson.get('share_url') or 'https://nebula.tv/{0}/{1}'.format(
+                    metadata['class_slug'], metadata['slug']),
+                smuggled_data={'id': lesson['id']}, url_transparent=True,
+                **metadata)
+
+    def _generate_podcast_entries(self, collection_id, collection_slug):
+        next_url = 'https://content.api.nebula.app/podcast_channels/{0}/podcast_episodes/?ordering=-published_at&premium=true'.format(
+            collection_id)
+        for page_num in itertools.count(1):
+            episodes = self._call_api(next_url, collection_slug, note='Retrieving podcast page {0}'.format(page_num))
+
+            for episode in traverse_obj(episodes, ('results', lambda _, v: url_or_none(v['share_url']))):
+                yield self.url_result(episode['share_url'], NebulaClassIE)
+            next_url = episodes.get('next')
+            if not next_url:
+                break
+
+    def _real_extract(self, url):
+        collection_slug = self._match_id(url)
+        channel = self._call_api(
+            'https://content.api.nebula.app/content/{0}/?include=lessons'.format(
+                collection_slug),
+            collection_slug, note='Retrieving channel')
+
+        channel_type = traverse_obj(channel, 'type')
+        if channel_type == 'class':
+            entries = self._generate_class_entries(channel)
+        elif channel_type == 'podcast_channel':
+            entries = self._generate_podcast_entries(channel['id'], collection_slug)
+        else:
+            entries = self._generate_playlist_entries(channel['id'], collection_slug)
+
+        return self.playlist_result(
+            entries,
+            playlist_id=collection_slug,
+            playlist_title=channel.get('title'),
+            playlist_description=channel.get('description'))
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -692,9 +692,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        'invidious': '|'.join(_INVIDIOUS_SITES),
    }
    _PLAYER_INFO_RE = (
-        r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})//(?:tv-)?player',
-        r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
-        r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
+        r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/(?:tv-)?player',
+        r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias(?:_tce)?\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
+        r'\b(?P<id>vfl[a-zA-Z0-9_-]{6,})\b.*?\.js$',
    )
    _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

@ -1626,15 +1626,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        """ Return a string representation of a signature """
        return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

-    @classmethod
-    def _extract_player_info(cls, player_url):
-        for player_re in cls._PLAYER_INFO_RE:
-            id_m = re.search(player_re, player_url)
-            if id_m:
-                break
-        else:
-            raise ExtractorError('Cannot identify player %r' % player_url)
-        return id_m.group('id')
+    def _extract_player_info(self, player_url):
+        try:
+            return self._search_regex(
+                self._PLAYER_INFO_RE, player_url, 'player info', group='id')
+        except ExtractorError as e:
+            raise ExtractorError(
+                'Cannot identify player %r' % (player_url,), cause=e)

    def _load_player(self, video_id, player_url, fatal=True, player_id=None):
        if not player_id:
@ -1711,6 +1709,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                '    return %s\n') % (signature_id_tuple, expr_code)
        self.to_screen('Extracted signature function:\n' + code)

+    def _extract_sig_fn(self, jsi, funcname):
+        var_ay = self._search_regex(
+            r'''(?x)
+                (?:\*/|\{|\n|^)\s*(?:'[^']+'\s*;\s*)
+                    (var\s*[\w$]+\s*=\s*(?:
+                        ('|")(?:\\\2|(?!\2).)+\2\s*\.\s*split\(\s*('|")\W+\3\s*\)|
+                        \[\s*(?:('|")(?:\\\4|(?!\4).)*\4\s*(?:(?=\])|,\s*))+\]
+                    ))(?=\s*[,;])
+            ''', jsi.code, 'useful values', default='')
+
+        sig_fn = jsi.extract_function_code(funcname)
+
+        if var_ay:
+            sig_fn = (sig_fn[0], ';\n'.join((var_ay, sig_fn[1])))
+
+        return sig_fn
+
    def _parse_sig_js(self, jscode):
        # Examples where `sig` is funcname:
        # sig=function(a){a=a.split(""); ... ;return a.join("")};
@ -1736,8 +1751,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            jscode, 'Initial JS player signature function name', group='sig')

        jsi = JSInterpreter(jscode)
-        initial_function = jsi.extract_function(funcname)
-        return lambda s: initial_function([s])
+
+        initial_function = self._extract_sig_fn(jsi, funcname)
+
+        func = jsi.extract_function_from_code(*initial_function)
+
+        return lambda s: func([s])

    def _cached(self, func, *cache_id):
        def inner(*args, **kwargs):
@ -1856,15 +1875,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):

    def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):

-        var_ay = self._search_regex(
-            r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"(?:\\"|[^"])+"\s*\.\s*split\("\W+"\))(?=\s*[,;])',
-            jsi.code, 'useful values', default='')
-
        func_name = self._extract_n_function_name(jsi.code)

-        func_code = jsi.extract_function_code(func_name)
-        if var_ay:
-            func_code = (func_code[0], ';\n'.join((var_ay, func_code[1])))
+        func_code = self._extract_sig_fn(jsi, func_name)

        if player_id:
            self.cache.store('youtube-nsig', player_id, func_code)
@ -2136,7 +2149,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    video_details = merge_dicts(*traverse_obj(
                        (player_response, api_player_response),
                        (Ellipsis, 'videoDetails', T(dict))))
-                    player_response.update(api_player_response or {})
+                    player_response.update(filter_dict(
+                        api_player_response or {}, cndn=lambda k, _: k != 'captions'))
                    player_response['videoDetails'] = video_details

        def is_agegated(playability):
@ -2566,8 +2580,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        }

        pctr = traverse_obj(
-            player_response,
-            ('captions', 'playerCaptionsTracklistRenderer', T(dict)))
+            (player_response, api_player_response),
+            (Ellipsis, 'captions', 'playerCaptionsTracklistRenderer', T(dict)))
        if pctr:
            def process_language(container, base_url, lang_code, query):
                lang_subs = []
@ -2584,20 +2598,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            def process_subtitles():
                subtitles = {}
                for caption_track in traverse_obj(pctr, (
-                        'captionTracks', lambda _, v: v.get('baseUrl'))):
+                        Ellipsis, 'captionTracks', lambda _, v: (
+                            v.get('baseUrl') and v.get('languageCode')))):
                    base_url = self._yt_urljoin(caption_track['baseUrl'])
                    if not base_url:
                        continue
+                    lang_code = caption_track['languageCode']
                    if caption_track.get('kind') != 'asr':
-                        lang_code = caption_track.get('languageCode')
-                        if not lang_code:
-                            continue
                        process_language(
                            subtitles, base_url, lang_code, {})
                        continue
                    automatic_captions = {}
+                    process_language(
+                        automatic_captions, base_url, lang_code, {})
                    for translation_language in traverse_obj(pctr, (
-                            'translationLanguages', lambda _, v: v.get('languageCode'))):
+                            Ellipsis, 'translationLanguages', lambda _, v: v.get('languageCode'))):
                        translation_language_code = translation_language['languageCode']
                        process_language(
                            automatic_captions, base_url, translation_language_code,
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@ -678,7 +678,7 @@ class JSInterpreter(object):
            return len(obj)
        try:
            return obj[int(idx)] if isinstance(obj, list) else obj[compat_str(idx)]
-        except (TypeError, KeyError, IndexError) as e:
+        except (TypeError, KeyError, IndexError, ValueError) as e:
            # allow_undefined is None gives correct behaviour
            if allow_undefined or (
                    allow_undefined is None and not isinstance(e, TypeError)):
@ -1038,6 +1038,10 @@ class JSInterpreter(object):
                    left_val = self._index(left_val, idx)
            if isinstance(idx, float):
                idx = int(idx)
+            if isinstance(left_val, list) and len(left_val) <= int_or_none(idx, default=-1):
+                # JS Array is a sparsely assignable list
+                # TODO: handle extreme sparsity without memory bloat, eg using auxiliary dict
+                left_val.extend((idx - len(left_val) + 1) * [JS_Undefined])
            left_val[idx] = self._operator(
                m.group('op'), self._index(left_val, idx) if m.group('op') else None,
                m.group('expr'), expr, local_vars, allow_recursion)
@ -1204,9 +1208,10 @@ class JSInterpreter(object):
                elif member == 'join':
                    assertion(isinstance(obj, list), 'must be applied on a list')
                    assertion(len(argvals) <= 1, 'takes at most one argument')
-                    return (',' if len(argvals) == 0 else argvals[0]).join(
-                        ('' if x in (None, JS_Undefined) else _js_toString(x))
-                        for x in obj)
+                    return (',' if len(argvals) == 0 or argvals[0] in (None, JS_Undefined)
+                            else argvals[0]).join(
+                                ('' if x in (None, JS_Undefined) else _js_toString(x))
+                                for x in obj)
                elif member == 'reverse':
                    assertion(not argvals, 'does not take any arguments')
                    obj.reverse()
@ -1364,19 +1369,21 @@ class JSInterpreter(object):
        code, _ = self._separate_at_paren(func_m.group('code'))  # refine the match
        return self.build_arglist(func_m.group('args')), code

-    def extract_function(self, funcname):
+    def extract_function(self, funcname, *global_stack):
        return function_with_repr(
-            self.extract_function_from_code(*self.extract_function_code(funcname)),
+            self.extract_function_from_code(*itertools.chain(
+                self.extract_function_code(funcname), global_stack)),
            'F<%s>' % (funcname,))

    def extract_function_from_code(self, argnames, code, *global_stack):
        local_vars = {}

+        start = None
        while True:
-            mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code)
+            mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code[start:])
            if mobj is None:
                break
-            start, body_start = mobj.span()
+            start, body_start = ((start or 0) + x for x in mobj.span())
            body, remaining = self._separate_at_paren(code[body_start - 1:])
            name = self._named_object(local_vars, self.extract_function_from_code(
                [x.strip() for x in mobj.group('args').split(',')],
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -6715,3 +6715,46 @@ class _UnsafeExtensionError(Exception):
                raise cls(extension)

        return extension
+
+
+def json_stringify(json_expr, **kwargs):
+    # /, *, concise=True, result_encoding='utf-8', **{**encode_result_kwargs, **dumps_kwargs}
+    """
+    Convert json_expr to a string, suitable for passing over a network
+
+    @param  json_expr    Python representation of a JSON expression
+
+    KW-only parameters
+    @param  {bool}  concise      do not space around , and : (default True)
+    @param  {str}   result_encoding      encoding, if any, of the result
+                                         (default 'utf-8')
+    @param  {str}   errors      error handling for result_encoding
+    @param  ...                 other KW arguments [assed to json.dumps()
+    @returns  {bytes|str}       stringified JSON, encoded to bytes using
+                                result_encoding, or Unicode if none
+
+    With the default arguments, the return value is a byte string
+    suitable to be passed as POST data.
+
+    Inspired by JSON.stringify [1], but not so much as to emulate its optional
+    replacer (use cls=replacer_JSON_encoder) or space (use indent=space for space > 0).
+    1. https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/stringify
+    """
+
+    # extract all non-dumps_kwargs
+    concise = kwargs.pop('concise', True)
+    result_encoding = kwargs.pop('result_encoding', 'utf-8')
+    result_errors = kwargs.pop('errors', None)
+
+    if concise:
+        kwargs['separators'] = (',', ':')
+        kwargs = compat_kwargs(kwargs)
+    result = json.dumps(json_expr, **kwargs)
+
+    if result_encoding:
+        kwargs = compat_kwargs({'errors': result_errors}) if result_errors else {}
+        result = result.encode(result_encoding, **kwargs)
+        return result
+
+    # return a Unicode value of type type('')
+    return '' + result
Author	SHA1	Message	Date
Henrik Heimbuerger	f6354b9bfb	Merge d4664a53467eade879971de3950424299f68bd3a into 2b4fbfce25902d557b86b003cf48f738129efce4	2025-03-26 08:12:20 +00:00
dirkf	2b4fbfce25	[YouTube] Support player `4fcd6e4a` thx seproDev, bashonly: yt-dlp/yt-dlp#12748	2025-03-26 02:27:25 +00:00
dirkf	1bc45b8b6c	[JSInterp] Use `,` for join() with null/undefined argument Eg: [1,2,3].join(null) -> '1,2,3'	2025-03-25 22:35:06 +00:00
dirkf	b982d77d0b	[YouTube] Align signature tests with yt-dlp thx bashonly, yt-dlp/yt-dlp#12725	2025-03-25 22:35:06 +00:00
dirkf	c55dbf4838	[YouTube] Update signature extraction for players `643afba4`, `363db69b`	2025-03-25 22:35:06 +00:00
dirkf	087d865230	[YouTube] Support new player URL patterns	2025-03-25 22:35:06 +00:00
dirkf	a4fc1151f1	[JSInterp] Improve indexing * catch invalid list index with `ValueError` (eg [1, 2]['ab'] -> undefined) * allow assignment outside existing list (eg var l = [1,2]; l[9] = 0;)	2025-03-25 22:35:05 +00:00
dirkf	a464c159e6	[YouTube] Make `_extract_player_info()` use `_search_regex()`	2025-03-25 22:35:05 +00:00
dirkf	7dca08eff0	[YouTube] Also get original of translated automatic captions	2025-03-25 22:35:05 +00:00
dirkf	2239ee7965	[YouTube] Get subtitles/automatic captions from both web and API responses	2025-03-25 22:35:05 +00:00
dirkf	d4664a5346	Remove (last?) set literal	2024-11-23 11:14:30 +00:00
dirkf	92d881c33f	Linty	2024-11-23 11:03:37 +00:00
dirkf	bd4729a866	[utils] Add json_stringify() * somewhat like JSON.stringify() * replaces json.dumps(..., separators=(',',':')).encode('utf-8') * more kwarg options available	2024-11-23 11:00:00 +00:00
dirkf	79abdae734	Add Art19IE to extractors.py And clean up sorting	2024-11-23 10:47:21 +00:00
dirkf	88619125c8	Create art19.py	2024-11-23 10:39:54 +00:00
dirkf	3565d21951	Merge branch 'master' into add-nebula-support	2024-11-23 10:34:26 +00:00
dirkf	ddbadd037f	Update PR with back-port from its development in yt-dlp	2024-11-23 10:31:42 +00:00
Henrik Heimbuerger	a0f69f9526	[nebula] Fix stale session issues When Nebula isn't accessed for a while, the Zype access token stored on the Nebula backend expires. It is then no longer returned by the user endpoint. The Nebula frontend has the same issue and keeps polling for the Zype token in this case. This isn't implemented in this extractor yet, but at least a specific error message now prints some helpful advice.	2021-01-17 22:25:51 +01:00
Henrik Heimbuerger	9fdfd6d3ba	[nebula] Prevent cookies from breaking Nebula auth When the 'sessionid' cookie is submitted to the `/auth/login/` endpoint, the response is always a 403. This typically happens when youtube_dl is run with both `--netrc` and `--cookies` as your default configuration. In that situation, the first authentication succeeds and stores the `sessionid` cookie in the cookie jar. During subsequent authentication attempts, the cookie is sent alongside and causes the authentication to fail. This is very unexpected and we therefore specifically handle this case.	2021-01-17 15:52:02 +01:00
Henrik Heimbuerger	59c0e6e3d8	[nebula] Log attempted authentication method	2021-01-17 15:52:02 +01:00
Henrik Heimbuerger	8b4c9da62a	[nebula] Clean up credentials-based authentication	2021-01-17 15:52:02 +01:00
Henrik Heimbuerger	2562c9ec74	[nebula] Implement PoC of netrc authentication	2021-01-17 15:52:02 +01:00
Henrik Heimbuerger	f8eb89748b	[nebula] Update test video checksums	2021-01-17 15:52:02 +01:00
Henrik Heimbuerger	30362440dc	[nebula] Improve performance by avoiding redirect	2021-01-17 15:52:02 +01:00
Henrik Heimbuerger	1317a43a6a	[nebula] Implement Zype API key retrieval from JS chunk	2021-01-17 15:52:02 +01:00
Henrik Heimbuerger	18582060c2	[nebula] Rewrite extractor to new frontend (refs #21258 )	2021-01-17 15:52:02 +01:00
Henrik Heimbuerger	af3434b839	[nebula] Relax meta data lookups	2021-01-17 15:52:01 +01:00
Henrik Heimbuerger	61cead3235	[nebula] Add better channel title extraction (refs #21258 )	2021-01-17 15:52:01 +01:00
Henrik Heimbuerger	469cae38cd	[nebula] Add additional test cases and improve cookie envvar handling	2021-01-17 15:52:01 +01:00
Henrik Heimbuerger	f6ac8cd495	[nebula] Add basic support for Nebula (refs #21258 )	2021-01-17 15:52:01 +01:00