Compare commits

..

No commits in common. "c7965b9fc2cae54f244f31f5373cb81a40e822ab" and "b764dbe7730bc5b0a4f30f4f89fd85e096d0c4a0" have entirely different histories.

7 changed files with 10 additions and 59 deletions

View File

@ -1069,11 +1069,9 @@ After you have ensured this site is distributing its content legally, you can fo
} }
``` ```
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test (actually, test case) then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note: 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
* the test names use the extractor class name **without the trailing `IE`** 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
* tests with `only_matching` key in test's dict are not counted. 8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
8. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
9. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
$ flake8 youtube_dl/extractor/yourextractor.py $ flake8 youtube_dl/extractor/yourextractor.py

View File

@ -557,7 +557,6 @@ from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE from .kinopoisk import KinoPoiskIE
from .konserthusetplay import KonserthusetPlayIE from .konserthusetplay import KonserthusetPlayIE
from .krasview import KrasViewIE from .krasview import KrasViewIE
from .kth import KTHIE
from .ku6 import Ku6IE from .ku6 import Ku6IE
from .kusi import KUSIIE from .kusi import KUSIIE
from .kuwo import ( from .kuwo import (

View File

@ -373,5 +373,5 @@ class KalturaIE(InfoExtractor):
'duration': info.get('duration'), 'duration': info.get('duration'),
'timestamp': info.get('createdAt'), 'timestamp': info.get('createdAt'),
'uploader_id': info.get('userId') if info.get('userId') != 'None' else None, 'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
'view_count': int_or_none(info.get('plays')), 'view_count': info.get('plays'),
} }

View File

@ -1,31 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import smuggle_url
class KTHIE(InfoExtractor):
_VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
_TEST = {
'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
'info_dict': {
'id': '0_uoop6oz9',
'ext': 'mp4',
'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
'thumbnail': 're:https?://.+/thumbnail/.+',
'duration': 3516,
'timestamp': 1647345358,
'upload_date': '20220315',
'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
result = self.url_result(
smuggle_url('kaltura:308:%s' % video_id, {
'service_url': 'https://api.kaltura.nordu.net'}),
'Kaltura')
return result

View File

@ -1,4 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
@ -24,7 +23,7 @@ class NhkBaseIE(InfoExtractor):
def _extract_episode_info(self, url, episode=None): def _extract_episode_info(self, url, episode=None):
fetch_episode = episode is None fetch_episode = episode is None
lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups() lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
if len(episode_id) == 7: if episode_id.isdigit():
episode_id = episode_id[:4] + '-' + episode_id[4:] episode_id = episode_id[:4] + '-' + episode_id[4:]
is_video = m_type == 'video' is_video = m_type == 'video'
@ -85,8 +84,7 @@ class NhkBaseIE(InfoExtractor):
class NhkVodIE(NhkBaseIE): class NhkVodIE(NhkBaseIE):
# the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg _VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
_VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
# Content available only for a limited period of time. Visit # Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{ _TESTS = [{
@ -126,19 +124,6 @@ class NhkVodIE(NhkBaseIE):
}, { }, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
'only_matching': True, 'only_matching': True,
}, {
# video, alphabetic character in ID #29670
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
'only_matching': True,
'info_dict': {
'id': 'qfjay6cg',
'ext': 'mp4',
'title': 'DESIGN TALKS plus - Fishermens Finery',
'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
'upload_date': '20210615',
'timestamp': 1623722008,
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -62,7 +62,7 @@ class StreamCZIE(InfoExtractor):
if not stream.get('url'): if not stream.get('url'):
continue continue
yield merge_dicts({ yield merge_dicts({
'format_id': '-'.join((format_id, ext)), 'format_id': '{}-{}'.format(format_id, ext),
'ext': ext, 'ext': ext,
'source_preference': pref, 'source_preference': pref,
'url': urljoin(spl_url, stream['url']), 'url': urljoin(spl_url, stream['url']),

View File

@ -1464,15 +1464,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116 # 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
# 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377 # 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
def _extract_n_function_name(self, jscode): def _extract_n_function_name(self, jscode):
target = r'(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?' target = r'(?P<nfunc>[a-zA-Z0-9$]{3})(?:\[(?P<idx>\d+)\])?'
nfunc_and_idx = self._search_regex( nfunc_and_idx = self._search_regex(
r'\.get\("n"\)\)&&\(b=(%s)\([\w$]+\)' % (target, ), r'\.get\("n"\)\)&&\(b=(%s)\([a-zA-Z0-9]\)' % (target, ),
jscode, 'Initial JS player n function name') jscode, 'Initial JS player n function name')
nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx') nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
if not idx: if not idx:
return nfunc return nfunc
return self._parse_json(self._search_regex( return self._parse_json(self._search_regex(
r'var %s\s*=\s*(\[.+?\]);' % (re.escape(nfunc), ), jscode, r'var %s\s*=\s*(\[.+?\]);' % (nfunc, ), jscode,
'Initial JS player n function list ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)] 'Initial JS player n function list ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
def _extract_n_function(self, video_id, player_url): def _extract_n_function(self, video_id, player_url):