mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-01-08 12:30:10 +09:00
. (#1)
* [extractor/bilibili] Fix path "/audio/auxxxxx" download return 403 * [RAI] Added checks for DRM protected content (PR #27657) reviewed by pukkandan (https://github.com/yt-dlp/yt-dlp/pull/150) * [RAI] Extend formats with direct http mp4 link (PR #27990) * initial support for creating direct mp4 link * improved regexes and info extraction * added "connection: close" to request headers * updated to https://github.com/yt-dlp/yt-dlp/pull/208 * [RAI] Fix extraction of http formats From https://github.com/yt-dlp/yt-dlp/pull/3272 Closes https://github.com/yt-dlp/yt-dlp/issues/3270 Authored by: nixxo * Disable blank issues * [Youtube] Fix "n" descrambling for player fae06c11 Resolves #30856. * [KTH] Add new extractor for KTH play (#30885) * Implement extractor for KTH play * Make KTH Play url regex more relaxed * [streamcz] Remove empty `'{}'.format()` for Py2.6 Use `'-join()'` here, or `{0}`, ..., in general. * [doc] Clarify test naming Co-authored-by: lihan7 <lihan7@xiaomi.com> Co-authored-by: nixxo <nixxo@protonmail.com> Co-authored-by: dirkf <fieldhouse@gmx.net> Co-authored-by: Árni Dagur <arni@dagur.eu>
This commit is contained in:
parent
483f6558c8
commit
e484c4dbc9
1
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
1
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
blank_issues_enabled: false
|
@ -1075,9 +1075,11 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test (actually, test case) then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note:
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
* the test names use the extractor class name **without the trailing `IE`**
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
* tests with `only_matching` key in test's dict are not counted.
|
||||||
|
8. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||||
|
9. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||||
|
|
||||||
$ flake8 youtube_dl/extractor/yourextractor.py
|
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||||
|
|
||||||
|
@ -369,6 +369,11 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
|
|||||||
'filesize': int_or_none(play_data.get('size')),
|
'filesize': int_or_none(play_data.get('size')),
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
for a_format in formats:
|
||||||
|
a_format.setdefault('http_headers', {}).update({
|
||||||
|
'Referer': url,
|
||||||
|
})
|
||||||
|
|
||||||
song = self._call_api('song/info', au_id)
|
song = self._call_api('song/info', au_id)
|
||||||
title = song['title']
|
title = song['title']
|
||||||
statistic = song.get('statistic') or {}
|
statistic = song.get('statistic') or {}
|
||||||
|
@ -557,6 +557,7 @@ from .kinja import KinjaEmbedIE
|
|||||||
from .kinopoisk import KinoPoiskIE
|
from .kinopoisk import KinoPoiskIE
|
||||||
from .konserthusetplay import KonserthusetPlayIE
|
from .konserthusetplay import KonserthusetPlayIE
|
||||||
from .krasview import KrasViewIE
|
from .krasview import KrasViewIE
|
||||||
|
from .kth import KTHIE
|
||||||
from .ku6 import Ku6IE
|
from .ku6 import Ku6IE
|
||||||
from .kusi import KUSIIE
|
from .kusi import KUSIIE
|
||||||
from .kuwo import (
|
from .kuwo import (
|
||||||
|
@ -373,5 +373,5 @@ class KalturaIE(InfoExtractor):
|
|||||||
'duration': info.get('duration'),
|
'duration': info.get('duration'),
|
||||||
'timestamp': info.get('createdAt'),
|
'timestamp': info.get('createdAt'),
|
||||||
'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
|
'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
|
||||||
'view_count': info.get('plays'),
|
'view_count': int_or_none(info.get('plays')),
|
||||||
}
|
}
|
||||||
|
31
youtube_dl/extractor/kth.py
Normal file
31
youtube_dl/extractor/kth.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import smuggle_url
|
||||||
|
|
||||||
|
|
||||||
|
class KTHIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
|
||||||
|
'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0_uoop6oz9',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
|
||||||
|
'thumbnail': 're:https?://.+/thumbnail/.+',
|
||||||
|
'duration': 3516,
|
||||||
|
'timestamp': 1647345358,
|
||||||
|
'upload_date': '20220315',
|
||||||
|
'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
result = self.url_result(
|
||||||
|
smuggle_url('kaltura:308:%s' % video_id, {
|
||||||
|
'service_url': 'https://api.kaltura.nordu.net'}),
|
||||||
|
'Kaltura')
|
||||||
|
return result
|
@ -5,15 +5,16 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urlparse,
|
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
|
HEADRequest,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
remove_start,
|
remove_start,
|
||||||
@ -96,12 +97,100 @@ class RaiBaseIE(InfoExtractor):
|
|||||||
if not formats and geoprotection is True:
|
if not formats and geoprotection is True:
|
||||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||||
|
|
||||||
|
formats.extend(self._create_http_urls(relinker_url, formats))
|
||||||
|
|
||||||
return dict((k, v) for k, v in {
|
return dict((k, v) for k, v in {
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}.items() if v is not None)
|
}.items() if v is not None)
|
||||||
|
|
||||||
|
def _create_http_urls(self, relinker_url, fmts):
|
||||||
|
_RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?'
|
||||||
|
_MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
|
||||||
|
_QUALITY = {
|
||||||
|
# tbr: w, h
|
||||||
|
'250': [352, 198],
|
||||||
|
'400': [512, 288],
|
||||||
|
'700': [512, 288],
|
||||||
|
'800': [700, 394],
|
||||||
|
'1200': [736, 414],
|
||||||
|
'1800': [1024, 576],
|
||||||
|
'2400': [1280, 720],
|
||||||
|
'3200': [1440, 810],
|
||||||
|
'3600': [1440, 810],
|
||||||
|
'5000': [1920, 1080],
|
||||||
|
'10000': [1920, 1080],
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_url(url):
|
||||||
|
resp = self._request_webpage(
|
||||||
|
HEADRequest(url), None, headers={'User-Agent': 'Rai'},
|
||||||
|
fatal=False, errnote=False, note=False)
|
||||||
|
|
||||||
|
if resp is False:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if resp.code == 200:
|
||||||
|
return False if resp.url == url else resp.url
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_format_info(tbr):
|
||||||
|
import math
|
||||||
|
br = int_or_none(tbr)
|
||||||
|
if len(fmts) == 1 and not br:
|
||||||
|
br = fmts[0].get('tbr')
|
||||||
|
if br > 300:
|
||||||
|
tbr = compat_str(math.floor(br / 100) * 100)
|
||||||
|
else:
|
||||||
|
tbr = '250'
|
||||||
|
|
||||||
|
# try extracting info from available m3u8 formats
|
||||||
|
format_copy = None
|
||||||
|
for f in fmts:
|
||||||
|
if f.get('tbr'):
|
||||||
|
br_limit = math.floor(br / 100)
|
||||||
|
if br_limit - 1 <= math.floor(f['tbr'] / 100) <= br_limit + 1:
|
||||||
|
format_copy = f.copy()
|
||||||
|
return {
|
||||||
|
'width': format_copy.get('width'),
|
||||||
|
'height': format_copy.get('height'),
|
||||||
|
'tbr': format_copy.get('tbr'),
|
||||||
|
'vcodec': format_copy.get('vcodec'),
|
||||||
|
'acodec': format_copy.get('acodec'),
|
||||||
|
'fps': format_copy.get('fps'),
|
||||||
|
'format_id': 'https-%s' % tbr,
|
||||||
|
} if format_copy else {
|
||||||
|
'width': _QUALITY[tbr][0],
|
||||||
|
'height': _QUALITY[tbr][1],
|
||||||
|
'format_id': 'https-%s' % tbr,
|
||||||
|
'tbr': int(tbr),
|
||||||
|
}
|
||||||
|
|
||||||
|
loc = test_url(_MP4_TMPL % (relinker_url, '*'))
|
||||||
|
if not isinstance(loc, compat_str):
|
||||||
|
return []
|
||||||
|
|
||||||
|
mobj = re.match(
|
||||||
|
_RELINKER_REG,
|
||||||
|
test_url(relinker_url) or '')
|
||||||
|
if not mobj:
|
||||||
|
return []
|
||||||
|
|
||||||
|
available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*']
|
||||||
|
available_qualities = [i for i in available_qualities if i]
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for q in available_qualities:
|
||||||
|
fmt = {
|
||||||
|
'url': _MP4_TMPL % (relinker_url, q),
|
||||||
|
'protocol': 'https',
|
||||||
|
'ext': 'mp4',
|
||||||
|
}
|
||||||
|
fmt.update(get_format_info(q))
|
||||||
|
formats.append(fmt)
|
||||||
|
return formats
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_subtitles(url, video_data):
|
def _extract_subtitles(url, video_data):
|
||||||
STL_EXT = 'stl'
|
STL_EXT = 'stl'
|
||||||
@ -151,6 +240,22 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# 1080p direct mp4 url
|
||||||
|
'url': 'https://www.raiplay.it/video/2021/03/Leonardo-S1E1-b5703b02-82ee-475a-85b6-c9e4a8adf642.html',
|
||||||
|
'md5': '2e501e8651d72f05ffe8f5d286ad560b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b5703b02-82ee-475a-85b6-c9e4a8adf642',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Leonardo - S1E1',
|
||||||
|
'alt_title': 'St 1 Ep 1 - Episodio 1',
|
||||||
|
'description': 'md5:f5360cd267d2de146e4e3879a5a47d31',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'Rai 1',
|
||||||
|
'duration': 3229,
|
||||||
|
'series': 'Leonardo',
|
||||||
|
'season': 'Season 1',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -158,6 +263,10 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
# subtitles at 'subtitlesArray' key (see #27698)
|
# subtitles at 'subtitlesArray' key (see #27698)
|
||||||
'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
|
'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# DRM protected
|
||||||
|
'url': 'https://www.raiplay.it/video/2020/09/Lo-straordinario-mondo-di-Zoey-S1E1-Lo-straordinario-potere-di-Zoey-ed493918-1d32-44b7-8454-862e473d00ff.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -166,6 +275,13 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
media = self._download_json(
|
media = self._download_json(
|
||||||
base + '.json', video_id, 'Downloading video JSON')
|
base + '.json', video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
|
if try_get(
|
||||||
|
media,
|
||||||
|
(lambda x: x['rights_management']['rights']['drm'],
|
||||||
|
lambda x: x['program_info']['rights_management']['rights']['drm']),
|
||||||
|
dict):
|
||||||
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
|
|
||||||
title = media['name']
|
title = media['name']
|
||||||
|
|
||||||
video = media['video']
|
video = media['video']
|
||||||
@ -307,7 +423,7 @@ class RaiIE(RaiBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
# with ContentItem in og:url
|
# with ContentItem in og:url
|
||||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
|
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
|
||||||
'md5': '6865dd00cf0bbf5772fdd89d59bd768a',
|
'md5': '06345bd97c932f19ffb129973d07a020',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
|
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -62,7 +62,7 @@ class StreamCZIE(InfoExtractor):
|
|||||||
if not stream.get('url'):
|
if not stream.get('url'):
|
||||||
continue
|
continue
|
||||||
yield merge_dicts({
|
yield merge_dicts({
|
||||||
'format_id': '{}-{}'.format(format_id, ext),
|
'format_id': '-'.join((format_id, ext)),
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'source_preference': pref,
|
'source_preference': pref,
|
||||||
'url': urljoin(spl_url, stream['url']),
|
'url': urljoin(spl_url, stream['url']),
|
||||||
|
@ -1464,15 +1464,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
# 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
|
# 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
|
||||||
# 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
|
# 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
|
||||||
def _extract_n_function_name(self, jscode):
|
def _extract_n_function_name(self, jscode):
|
||||||
target = r'(?P<nfunc>[a-zA-Z0-9$]{3})(?:\[(?P<idx>\d+)\])?'
|
target = r'(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?'
|
||||||
nfunc_and_idx = self._search_regex(
|
nfunc_and_idx = self._search_regex(
|
||||||
r'\.get\("n"\)\)&&\(b=(%s)\([a-zA-Z0-9]\)' % (target, ),
|
r'\.get\("n"\)\)&&\(b=(%s)\([\w$]+\)' % (target, ),
|
||||||
jscode, 'Initial JS player n function name')
|
jscode, 'Initial JS player n function name')
|
||||||
nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
|
nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
|
||||||
if not idx:
|
if not idx:
|
||||||
return nfunc
|
return nfunc
|
||||||
return self._parse_json(self._search_regex(
|
return self._parse_json(self._search_regex(
|
||||||
r'var %s\s*=\s*(\[.+?\]);' % (nfunc, ), jscode,
|
r'var %s\s*=\s*(\[.+?\]);' % (re.escape(nfunc), ), jscode,
|
||||||
'Initial JS player n function list ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
|
'Initial JS player n function list ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
|
||||||
|
|
||||||
def _extract_n_function(self, video_id, player_url):
|
def _extract_n_function(self, video_id, player_url):
|
||||||
|
Loading…
Reference in New Issue
Block a user