Compare commits

..

No commits in common. "c7965b9fc2cae54f244f31f5373cb81a40e822ab" and "b764dbe7730bc5b0a4f30f4f89fd85e096d0c4a0" have entirely different histories.

7 changed files with 10 additions and 59 deletions

View File

@ -1069,11 +1069,9 @@ After you have ensured this site is distributing its content legally, you can fo
}
```
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test (actually, test case) then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note:
* the test names use the extractor class name **without the trailing `IE`**
* tests with `only_matching` key in test's dict are not counted.
8. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
9. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
$ flake8 youtube_dl/extractor/yourextractor.py

View File

@ -557,7 +557,6 @@ from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE
from .konserthusetplay import KonserthusetPlayIE
from .krasview import KrasViewIE
from .kth import KTHIE
from .ku6 import Ku6IE
from .kusi import KUSIIE
from .kuwo import (

View File

@ -373,5 +373,5 @@ class KalturaIE(InfoExtractor):
'duration': info.get('duration'),
'timestamp': info.get('createdAt'),
'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
'view_count': int_or_none(info.get('plays')),
'view_count': info.get('plays'),
}

View File

@ -1,31 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import smuggle_url
class KTHIE(InfoExtractor):
_VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
_TEST = {
'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
'info_dict': {
'id': '0_uoop6oz9',
'ext': 'mp4',
'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
'thumbnail': 're:https?://.+/thumbnail/.+',
'duration': 3516,
'timestamp': 1647345358,
'upload_date': '20220315',
'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
result = self.url_result(
smuggle_url('kaltura:308:%s' % video_id, {
'service_url': 'https://api.kaltura.nordu.net'}),
'Kaltura')
return result

View File

@ -1,4 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
import re
@ -24,7 +23,7 @@ class NhkBaseIE(InfoExtractor):
def _extract_episode_info(self, url, episode=None):
fetch_episode = episode is None
lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
if len(episode_id) == 7:
if episode_id.isdigit():
episode_id = episode_id[:4] + '-' + episode_id[4:]
is_video = m_type == 'video'
@ -85,8 +84,7 @@ class NhkBaseIE(InfoExtractor):
class NhkVodIE(NhkBaseIE):
# the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
_VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
_VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
# Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{
@ -126,19 +124,6 @@ class NhkVodIE(NhkBaseIE):
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
'only_matching': True,
}, {
# video, alphabetic character in ID #29670
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
'only_matching': True,
'info_dict': {
'id': 'qfjay6cg',
'ext': 'mp4',
'title': 'DESIGN TALKS plus - Fishermens Finery',
'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
'upload_date': '20210615',
'timestamp': 1623722008,
}
}]
def _real_extract(self, url):

View File

@ -62,7 +62,7 @@ class StreamCZIE(InfoExtractor):
if not stream.get('url'):
continue
yield merge_dicts({
'format_id': '-'.join((format_id, ext)),
'format_id': '{}-{}'.format(format_id, ext),
'ext': ext,
'source_preference': pref,
'url': urljoin(spl_url, stream['url']),

View File

@ -1464,15 +1464,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
# 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
def _extract_n_function_name(self, jscode):
target = r'(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?'
target = r'(?P<nfunc>[a-zA-Z0-9$]{3})(?:\[(?P<idx>\d+)\])?'
nfunc_and_idx = self._search_regex(
r'\.get\("n"\)\)&&\(b=(%s)\([\w$]+\)' % (target, ),
r'\.get\("n"\)\)&&\(b=(%s)\([a-zA-Z0-9]\)' % (target, ),
jscode, 'Initial JS player n function name')
nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
if not idx:
return nfunc
return self._parse_json(self._search_regex(
r'var %s\s*=\s*(\[.+?\]);' % (re.escape(nfunc), ), jscode,
r'var %s\s*=\s*(\[.+?\]);' % (nfunc, ), jscode,
'Initial JS player n function list ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
def _extract_n_function(self, video_id, player_url):