Compare commits

..

5 Commits

Author SHA1 Message Date
dirkf
c7965b9fc2
[NHK] Support alphabetic characters in 7-char NhkVod IDs (#29682) 2022-05-09 18:54:41 +01:00
dirkf
e988fa4523 [doc] Clarify test naming 2022-04-29 16:56:00 +01:00
dirkf
e27d8d819f
[streamcz] Remove empty '{}'.format() for Py2.6
Use `'-join()'` here, or `{0}`, ..., in general.
2022-04-29 13:36:02 +01:00
Árni Dagur
ebc627847c
[KTH] Add new extractor for KTH play (#30885)
* Implement extractor for KTH play
* Make KTH Play url regex more relaxed
2022-04-28 10:18:10 +01:00
dirkf
a0068bd6be [Youtube] Fix "n" descrambling for player fae06c11
Resolves #30856.
2022-04-15 16:07:09 +01:00
7 changed files with 59 additions and 10 deletions

View File

@ -1069,9 +1069,11 @@ After you have ensured this site is distributing its content legally, you can fo
}
```
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test (actually, test case) then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note:
* the test names use the extractor class name **without the trailing `IE`**
* tests with `only_matching` key in test's dict are not counted.
8. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
9. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
$ flake8 youtube_dl/extractor/yourextractor.py

View File

@ -557,6 +557,7 @@ from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE
from .konserthusetplay import KonserthusetPlayIE
from .krasview import KrasViewIE
from .kth import KTHIE
from .ku6 import Ku6IE
from .kusi import KUSIIE
from .kuwo import (

View File

@ -373,5 +373,5 @@ class KalturaIE(InfoExtractor):
'duration': info.get('duration'),
'timestamp': info.get('createdAt'),
'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
'view_count': info.get('plays'),
'view_count': int_or_none(info.get('plays')),
}

View File

@ -0,0 +1,31 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import smuggle_url
class KTHIE(InfoExtractor):
_VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
_TEST = {
'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
'info_dict': {
'id': '0_uoop6oz9',
'ext': 'mp4',
'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
'thumbnail': 're:https?://.+/thumbnail/.+',
'duration': 3516,
'timestamp': 1647345358,
'upload_date': '20220315',
'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
result = self.url_result(
smuggle_url('kaltura:308:%s' % video_id, {
'service_url': 'https://api.kaltura.nordu.net'}),
'Kaltura')
return result

View File

@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals
import re
@ -23,7 +24,7 @@ class NhkBaseIE(InfoExtractor):
def _extract_episode_info(self, url, episode=None):
fetch_episode = episode is None
lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
if episode_id.isdigit():
if len(episode_id) == 7:
episode_id = episode_id[:4] + '-' + episode_id[4:]
is_video = m_type == 'video'
@ -84,7 +85,8 @@ class NhkBaseIE(InfoExtractor):
class NhkVodIE(NhkBaseIE):
_VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
# the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
_VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
# Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{
@ -124,6 +126,19 @@ class NhkVodIE(NhkBaseIE):
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
'only_matching': True,
}, {
# video, alphabetic character in ID #29670
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
'only_matching': True,
'info_dict': {
'id': 'qfjay6cg',
'ext': 'mp4',
'title': 'DESIGN TALKS plus - Fishermens Finery',
'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
'upload_date': '20210615',
'timestamp': 1623722008,
}
}]
def _real_extract(self, url):

View File

@ -62,7 +62,7 @@ class StreamCZIE(InfoExtractor):
if not stream.get('url'):
continue
yield merge_dicts({
'format_id': '{}-{}'.format(format_id, ext),
'format_id': '-'.join((format_id, ext)),
'ext': ext,
'source_preference': pref,
'url': urljoin(spl_url, stream['url']),

View File

@ -1464,15 +1464,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
# 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
def _extract_n_function_name(self, jscode):
target = r'(?P<nfunc>[a-zA-Z0-9$]{3})(?:\[(?P<idx>\d+)\])?'
target = r'(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?'
nfunc_and_idx = self._search_regex(
r'\.get\("n"\)\)&&\(b=(%s)\([a-zA-Z0-9]\)' % (target, ),
r'\.get\("n"\)\)&&\(b=(%s)\([\w$]+\)' % (target, ),
jscode, 'Initial JS player n function name')
nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
if not idx:
return nfunc
return self._parse_json(self._search_regex(
r'var %s\s*=\s*(\[.+?\]);' % (nfunc, ), jscode,
r'var %s\s*=\s*(\[.+?\]);' % (re.escape(nfunc), ), jscode,
'Initial JS player n function list ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
def _extract_n_function(self, video_id, player_url):