2025-07-14 15:34:15 +09:00
7 changed files with 10 additions and 59 deletions
--- a/README.md
+++ b/README.md
@ -1069,11 +1069,9 @@ After you have ensured this site is distributing its content legally, you can fo
            }
    ```
 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
-6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test (actually, test case) then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note:
-    * the test names use the extractor class name **without the trailing `IE`**
-    * tests with `only_matching` key in test's dict are not counted.
-8. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
-9. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
+6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
+7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
+8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):

        $ flake8 youtube_dl/extractor/yourextractor.py

--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -557,7 +557,6 @@ from .kinja import KinjaEmbedIE
 from .kinopoisk import KinoPoiskIE
 from .konserthusetplay import KonserthusetPlayIE
 from .krasview import KrasViewIE
-from .kth import KTHIE
 from .ku6 import Ku6IE
 from .kusi import KUSIIE
 from .kuwo import (
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@ -373,5 +373,5 @@ class KalturaIE(InfoExtractor):
            'duration': info.get('duration'),
            'timestamp': info.get('createdAt'),
            'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
-            'view_count': int_or_none(info.get('plays')),
+            'view_count': info.get('plays'),
        }
--- a/youtube_dl/extractor/kth.py
+++ b/youtube_dl/extractor/kth.py
@ -1,31 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import smuggle_url
-
-
-class KTHIE(InfoExtractor):
-    _VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
-    _TEST = {
-        'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
-        'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
-        'info_dict': {
-            'id': '0_uoop6oz9',
-            'ext': 'mp4',
-            'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
-            'thumbnail': 're:https?://.+/thumbnail/.+',
-            'duration': 3516,
-            'timestamp': 1647345358,
-            'upload_date': '20220315',
-            'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
-        }
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        result = self.url_result(
-            smuggle_url('kaltura:308:%s' % video_id, {
-                'service_url': 'https://api.kaltura.nordu.net'}),
-            'Kaltura')
-        return result
--- a/youtube_dl/extractor/nhk.py
+++ b/youtube_dl/extractor/nhk.py
@ -1,4 +1,3 @@
-# coding: utf-8
 from __future__ import unicode_literals

 import re
@ -24,7 +23,7 @@ class NhkBaseIE(InfoExtractor):
    def _extract_episode_info(self, url, episode=None):
        fetch_episode = episode is None
        lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
-        if len(episode_id) == 7:
+        if episode_id.isdigit():
            episode_id = episode_id[:4] + '-' + episode_id[4:]

        is_video = m_type == 'video'
@ -85,8 +84,7 @@ class NhkBaseIE(InfoExtractor):


 class NhkVodIE(NhkBaseIE):
-    # the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
-    _VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
+    _VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
    # Content available only for a limited period of time. Visit
    # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
    _TESTS = [{
@ -126,19 +124,6 @@ class NhkVodIE(NhkBaseIE):
    }, {
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
        'only_matching': True,
-    }, {
-        # video, alphabetic character in ID #29670
-        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
-        'only_matching': True,
-        'info_dict': {
-            'id': 'qfjay6cg',
-            'ext': 'mp4',
-            'title': 'DESIGN TALKS plus - Fishermen’s Finery',
-            'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
-            'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
-            'upload_date': '20210615',
-            'timestamp': 1623722008,
-        }
    }]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/streamcz.py
+++ b/youtube_dl/extractor/streamcz.py
@ -62,7 +62,7 @@ class StreamCZIE(InfoExtractor):
                if not stream.get('url'):
                    continue
                yield merge_dicts({
-                    'format_id': '-'.join((format_id, ext)),
+                    'format_id': '{}-{}'.format(format_id, ext),
                    'ext': ext,
                    'source_preference': pref,
                    'url': urljoin(spl_url, stream['url']),
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -1464,15 +1464,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
    # 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
    # 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
    def _extract_n_function_name(self, jscode):
-        target = r'(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?'
+        target = r'(?P<nfunc>[a-zA-Z0-9$]{3})(?:\[(?P<idx>\d+)\])?'
        nfunc_and_idx = self._search_regex(
-            r'\.get\("n"\)\)&&\(b=(%s)\([\w$]+\)' % (target, ),
+            r'\.get\("n"\)\)&&\(b=(%s)\([a-zA-Z0-9]\)' % (target, ),
            jscode, 'Initial JS player n function name')
        nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
        if not idx:
            return nfunc
        return self._parse_json(self._search_regex(
-            r'var %s\s*=\s*(\[.+?\]);' % (re.escape(nfunc), ), jscode,
+            r'var %s\s*=\s*(\[.+?\]);' % (nfunc, ), jscode,
            'Initial JS player n function list ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]

    def _extract_n_function(self, video_id, player_url):