Compare commits

...

5 Commits

Author SHA1 Message Date
dirkf
c7965b9fc2
[NHK] Support alphabetic characters in 7-char NhkVod IDs (#29682) 2022-05-09 18:54:41 +01:00
dirkf
e988fa4523 [doc] Clarify test naming 2022-04-29 16:56:00 +01:00
dirkf
e27d8d819f
[streamcz] Remove empty '{}'.format() for Py2.6
Use `'-join()'` here, or `{0}`, ..., in general.
2022-04-29 13:36:02 +01:00
Árni Dagur
ebc627847c
[KTH] Add new extractor for KTH play (#30885)
* Implement extractor for KTH play
* Make KTH Play url regex more relaxed
2022-04-28 10:18:10 +01:00
dirkf
a0068bd6be [Youtube] Fix "n" descrambling for player fae06c11
Resolves #30856.
2022-04-15 16:07:09 +01:00
7 changed files with 59 additions and 10 deletions

View File

@ -1069,9 +1069,11 @@ After you have ensured this site is distributing its content legally, you can fo
} }
``` ```
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test (actually, test case) then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note:
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. * the test names use the extractor class name **without the trailing `IE`**
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): * tests with `only_matching` key in test's dict are not counted.
8. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
9. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
$ flake8 youtube_dl/extractor/yourextractor.py $ flake8 youtube_dl/extractor/yourextractor.py

View File

@ -557,6 +557,7 @@ from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE from .kinopoisk import KinoPoiskIE
from .konserthusetplay import KonserthusetPlayIE from .konserthusetplay import KonserthusetPlayIE
from .krasview import KrasViewIE from .krasview import KrasViewIE
from .kth import KTHIE
from .ku6 import Ku6IE from .ku6 import Ku6IE
from .kusi import KUSIIE from .kusi import KUSIIE
from .kuwo import ( from .kuwo import (

View File

@ -373,5 +373,5 @@ class KalturaIE(InfoExtractor):
'duration': info.get('duration'), 'duration': info.get('duration'),
'timestamp': info.get('createdAt'), 'timestamp': info.get('createdAt'),
'uploader_id': info.get('userId') if info.get('userId') != 'None' else None, 'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
'view_count': info.get('plays'), 'view_count': int_or_none(info.get('plays')),
} }

View File

@ -0,0 +1,31 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import smuggle_url
class KTHIE(InfoExtractor):
_VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
_TEST = {
'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
'info_dict': {
'id': '0_uoop6oz9',
'ext': 'mp4',
'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
'thumbnail': 're:https?://.+/thumbnail/.+',
'duration': 3516,
'timestamp': 1647345358,
'upload_date': '20220315',
'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
result = self.url_result(
smuggle_url('kaltura:308:%s' % video_id, {
'service_url': 'https://api.kaltura.nordu.net'}),
'Kaltura')
return result

View File

@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
@ -23,7 +24,7 @@ class NhkBaseIE(InfoExtractor):
def _extract_episode_info(self, url, episode=None): def _extract_episode_info(self, url, episode=None):
fetch_episode = episode is None fetch_episode = episode is None
lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups() lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
if episode_id.isdigit(): if len(episode_id) == 7:
episode_id = episode_id[:4] + '-' + episode_id[4:] episode_id = episode_id[:4] + '-' + episode_id[4:]
is_video = m_type == 'video' is_video = m_type == 'video'
@ -84,7 +85,8 @@ class NhkBaseIE(InfoExtractor):
class NhkVodIE(NhkBaseIE): class NhkVodIE(NhkBaseIE):
_VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX) # the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
_VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
# Content available only for a limited period of time. Visit # Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{ _TESTS = [{
@ -124,6 +126,19 @@ class NhkVodIE(NhkBaseIE):
}, { }, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
'only_matching': True, 'only_matching': True,
}, {
# video, alphabetic character in ID #29670
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
'only_matching': True,
'info_dict': {
'id': 'qfjay6cg',
'ext': 'mp4',
'title': 'DESIGN TALKS plus - Fishermens Finery',
'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
'upload_date': '20210615',
'timestamp': 1623722008,
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -62,7 +62,7 @@ class StreamCZIE(InfoExtractor):
if not stream.get('url'): if not stream.get('url'):
continue continue
yield merge_dicts({ yield merge_dicts({
'format_id': '{}-{}'.format(format_id, ext), 'format_id': '-'.join((format_id, ext)),
'ext': ext, 'ext': ext,
'source_preference': pref, 'source_preference': pref,
'url': urljoin(spl_url, stream['url']), 'url': urljoin(spl_url, stream['url']),

View File

@ -1464,15 +1464,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116 # 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
# 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377 # 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
def _extract_n_function_name(self, jscode): def _extract_n_function_name(self, jscode):
target = r'(?P<nfunc>[a-zA-Z0-9$]{3})(?:\[(?P<idx>\d+)\])?' target = r'(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?'
nfunc_and_idx = self._search_regex( nfunc_and_idx = self._search_regex(
r'\.get\("n"\)\)&&\(b=(%s)\([a-zA-Z0-9]\)' % (target, ), r'\.get\("n"\)\)&&\(b=(%s)\([\w$]+\)' % (target, ),
jscode, 'Initial JS player n function name') jscode, 'Initial JS player n function name')
nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx') nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
if not idx: if not idx:
return nfunc return nfunc
return self._parse_json(self._search_regex( return self._parse_json(self._search_regex(
r'var %s\s*=\s*(\[.+?\]);' % (nfunc, ), jscode, r'var %s\s*=\s*(\[.+?\]);' % (re.escape(nfunc), ), jscode,
'Initial JS player n function list ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)] 'Initial JS player n function list ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
def _extract_n_function(self, video_id, player_url): def _extract_n_function(self, video_id, player_url):