diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md
index d29d5366f..352263789 100644
--- a/.github/ISSUE_TEMPLATE/1_broken_site.md
+++ b/.github/ISSUE_TEMPLATE/1_broken_site.md
@@ -18,7 +18,7 @@ title: ''
- [ ] I'm reporting a broken site support
-- [ ] I've verified that I'm running youtube-dl version **2020.06.16.1**
+- [ ] I've verified that I'm running youtube-dl version **2020.09.14**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
- [debug] youtube-dl version 2020.06.16.1
+ [debug] youtube-dl version 2020.09.14
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md
index ee882f98c..fa6509be3 100644
--- a/.github/ISSUE_TEMPLATE/2_site_support_request.md
+++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md
@@ -19,7 +19,7 @@ labels: 'site-support-request'
- [ ] I'm reporting a new site support request
-- [ ] I've verified that I'm running youtube-dl version **2020.06.16.1**
+- [ ] I've verified that I'm running youtube-dl version **2020.09.14**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones
diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md
index 23033fe13..70b0f2f19 100644
--- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md
+++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md
@@ -18,13 +18,13 @@ title: ''
- [ ] I'm reporting a site feature request
-- [ ] I've verified that I'm running youtube-dl version **2020.06.16.1**
+- [ ] I've verified that I'm running youtube-dl version **2020.09.14**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md
index 597531330..ec17e4a33 100644
--- a/.github/ISSUE_TEMPLATE/4_bug_report.md
+++ b/.github/ISSUE_TEMPLATE/4_bug_report.md
@@ -18,7 +18,7 @@ title: ''
- [ ] I'm reporting a broken site support issue
-- [ ] I've verified that I'm running youtube-dl version **2020.06.16.1**
+- [ ] I've verified that I'm running youtube-dl version **2020.09.14**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
- [debug] youtube-dl version 2020.06.16.1
+ [debug] youtube-dl version 2020.09.14
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md
index 5cfcb9318..6ac963206 100644
--- a/.github/ISSUE_TEMPLATE/5_feature_request.md
+++ b/.github/ISSUE_TEMPLATE/5_feature_request.md
@@ -19,13 +19,13 @@ labels: 'request'
- [ ] I'm reporting a feature request
-- [ ] I've verified that I'm running youtube-dl version **2020.06.16.1**
+- [ ] I've verified that I'm running youtube-dl version **2020.09.14**
- [ ] I've searched the bugtracker for similar feature requests including closed ones
diff --git a/ChangeLog b/ChangeLog
index 07d6ccd69..4143ec2fb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,54 @@
+version 2020.09.14
+
+Core
++ [postprocessor/embedthumbnail] Add support for non jpg/png thumbnails
+ (#25687, #25717)
+
+Extractors
+* [rtlnl] Extend URL regular expression (#26549, #25821)
+* [youtube] Fix empty description extraction (#26575, #26006)
+* [srgssr] Extend URL regular expression (#26555, #26556, #26578)
+* [googledrive] Use redirect URLs for source format (#18877, #23919, #24689,
+ #26565)
+* [svtplay] Fix id extraction (#26576)
+* [redbulltv] Improve support for rebull.com TV localized URLs (#22063)
++ [redbulltv] Add support for new redbull.com TV URLs (#22037, #22063)
+* [soundcloud:pagedplaylist] Reduce pagination limit (#26557)
+
+
+version 2020.09.06
+
+Core
++ [utils] Recognize wav mimetype (#26463)
+
+Extractors
+* [nrktv:episode] Improve video id extraction (#25594, #26369, #26409)
+* [youtube] Fix age gate content detection (#26100, #26152, #26311, #26384)
+* [youtube:user] Extend URL regular expression (#26443)
+* [xhamster] Improve initials regular expression (#26526, #26353)
+* [svtplay] Fix video id extraction (#26425, #26428, #26438)
+* [twitch] Rework extractors (#12297, #20414, #20604, #21811, #21812, #22979,
+ #24263, #25010, #25553, #25606)
+ * Switch to GraphQL
+ + Add support for collections
+ + Add support for clips and collections playlists
+* [biqle] Improve video ext extraction
+* [xhamster] Fix extraction (#26157, #26254)
+* [xhamster] Extend URL regular expression (#25789, #25804, #25927))
+
+
+version 2020.07.28
+
+Extractors
+* [youtube] Fix sigfunc name extraction (#26134, #26135, #26136, #26137)
+* [youtube] Improve description extraction (#25937, #25980)
+* [wistia] Restrict embed regular expression (#25969)
+* [youtube] Prevent excess HTTP 301 (#25786)
++ [youtube:playlists] Extend URL regular expression (#25810)
++ [bellmedia] Add support for cp24.com clip URLs (#25764)
+* [brightcove] Improve embed detection (#25674)
+
+
version 2020.06.16.1
Extractors
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 35c1050e5..367545a96 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -717,6 +717,8 @@
- **RayWenderlichCourse**
- **RBMARadio**
- **RDS**: RDS.ca
+ - **RedBull**
+ - **RedBullEmbed**
- **RedBullTV**
- **RedBullTVRrnContent**
- **Reddit**
@@ -950,16 +952,13 @@
- **TVPlayHome**
- **Tweakers**
- **TwitCasting**
- - **twitch:chapter**
- **twitch:clips**
- - **twitch:profile**
- **twitch:stream**
- - **twitch:video**
- - **twitch:videos:all**
- - **twitch:videos:highlights**
- - **twitch:videos:past-broadcasts**
- - **twitch:videos:uploads**
- **twitch:vod**
+ - **TwitchCollection**
+ - **TwitchVideos**
+ - **TwitchVideosClips**
+ - **TwitchVideosCollections**
- **twitter**
- **twitter:amplify**
- **twitter:broadcast**
diff --git a/test/test_utils.py b/test/test_utils.py
index 0896f4150..962fd8d75 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -803,6 +803,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
+ self.assertEqual(mimetype2ext('audio/x-wav'), 'wav')
+ self.assertEqual(mimetype2ext('audio/x-wav;codec=pcm'), 'wav')
def test_month_by_name(self):
self.assertEqual(month_by_name(None), None)
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
index 84bc34928..0f2c06f40 100644
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -141,7 +141,7 @@ class HlsFD(FragmentFD):
count = 0
headers = info_dict.get('http_headers', {})
if byte_range:
- headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
+ headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
while count <= fragment_retries:
try:
success, frag_content = self._download_fragment(
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index 5046878df..6ef26548d 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -106,7 +106,12 @@ class HttpFD(FileDownloader):
set_range(request, range_start, range_end)
# Establish connection
try:
- ctx.data = self.ydl.urlopen(request)
+ try:
+ ctx.data = self.ydl.urlopen(request)
+ except (compat_urllib_error.URLError, ) as err:
+ if isinstance(err.reason, socket.timeout):
+ raise RetryDownload(err)
+ raise err
# When trying to resume, Content-Range HTTP header of response has to be checked
# to match the value of requested Range HTTP header. This is due to a webservers
# that don't support resuming and serve a whole file with no Content-Range
@@ -233,9 +238,11 @@ class HttpFD(FileDownloader):
except socket.timeout as e:
retry(e)
except socket.error as e:
- if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
- raise
- retry(e)
+ # SSLError on python 2 (inherits socket.error) may have
+ # no errno set but this error message
+ if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message') == 'The read operation timed out':
+ retry(e)
+ raise
byte_counter += len(data_block)
diff --git a/youtube_dl/extractor/biqle.py b/youtube_dl/extractor/biqle.py
index af21e3ee5..17ebbb257 100644
--- a/youtube_dl/extractor/biqle.py
+++ b/youtube_dl/extractor/biqle.py
@@ -3,10 +3,11 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from .vk import VKIE
-from ..utils import (
- HEADRequest,
- int_or_none,
+from ..compat import (
+ compat_b64decode,
+ compat_urllib_parse_unquote,
)
+from ..utils import int_or_none
class BIQLEIE(InfoExtractor):
@@ -47,9 +48,16 @@ class BIQLEIE(InfoExtractor):
if VKIE.suitable(embed_url):
return self.url_result(embed_url, VKIE.ie_key(), video_id)
- self._request_webpage(
- HEADRequest(embed_url), video_id, headers={'Referer': url})
- video_id, sig, _, access_token = self._get_cookies(embed_url)['video_ext'].value.split('%3A')
+ embed_page = self._download_webpage(
+ embed_url, video_id, headers={'Referer': url})
+ video_ext = self._get_cookies(embed_url).get('video_ext')
+ if video_ext:
+ video_ext = compat_urllib_parse_unquote(video_ext.value)
+ if not video_ext:
+ video_ext = compat_b64decode(self._search_regex(
+ r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)',
+ embed_page, 'video_ext')).decode()
+ video_id, sig, _, access_token = video_ext.split(':')
item = self._download_json(
'https://api.vk.com/method/video.get', video_id,
headers={'User-Agent': 'okhttp/3.4.1'}, query={
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index a61753b17..021945a89 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -10,6 +10,7 @@ import os
import random
import re
import socket
+import ssl
import sys
import time
import math
@@ -67,6 +68,7 @@ from ..utils import (
sanitized_Request,
sanitize_filename,
str_or_none,
+ str_to_int,
strip_or_none,
unescapeHTML,
unified_strdate,
@@ -623,9 +625,12 @@ class InfoExtractor(object):
url_or_request = update_url_query(url_or_request, query)
if data is not None or headers:
url_or_request = sanitized_Request(url_or_request, data, headers)
+ exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
+ if hasattr(ssl, 'CertificateError'):
+ exceptions.append(ssl.CertificateError)
try:
return self._downloader.urlopen(url_or_request)
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ except tuple(exceptions) as err:
if isinstance(err, compat_urllib_error.HTTPError):
if self.__can_accept_status_code(err, expected_status):
# Retain reference to error to prevent file object from
@@ -1244,7 +1249,10 @@ class InfoExtractor(object):
interaction_type = is_e.get('interactionType')
if not isinstance(interaction_type, compat_str):
continue
- interaction_count = int_or_none(is_e.get('userInteractionCount'))
+ # For interaction count some sites provide string instead of
+ # an integer (as per spec) with non digit characters (e.g. ",")
+ # so extracting count with more relaxed str_to_int
+ interaction_count = str_to_int(is_e.get('userInteractionCount'))
if interaction_count is None:
continue
count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
@@ -1264,6 +1272,7 @@ class InfoExtractor(object):
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')),
+ 'uploader': str_or_none(e.get('author')),
'filesize': float_or_none(e.get('contentSize')),
'tbr': int_or_none(e.get('bitrate')),
'width': int_or_none(e.get('width')),
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 4b3092028..ae7079a6a 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -918,7 +918,9 @@ from .rbmaradio import RBMARadioIE
from .rds import RDSIE
from .redbulltv import (
RedBullTVIE,
+ RedBullEmbedIE,
RedBullTVRrnContentIE,
+ RedBullIE,
)
from .reddit import (
RedditIE,
@@ -1229,14 +1231,11 @@ from .twentymin import TwentyMinutenIE
from .twentythreevideo import TwentyThreeVideoIE
from .twitcasting import TwitCastingIE
from .twitch import (
- TwitchVideoIE,
- TwitchChapterIE,
TwitchVodIE,
- TwitchProfileIE,
- TwitchAllVideosIE,
- TwitchUploadsIE,
- TwitchPastBroadcastsIE,
- TwitchHighlightsIE,
+ TwitchCollectionIE,
+ TwitchVideosIE,
+ TwitchVideosClipsIE,
+ TwitchVideosCollectionsIE,
TwitchStreamIE,
TwitchClipsIE,
)
diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
index 589e4d5c3..f2cc57e44 100644
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@@ -220,19 +220,27 @@ class GoogleDriveIE(InfoExtractor):
'id': video_id,
'export': 'download',
})
- urlh = self._request_webpage(
- source_url, video_id, note='Requesting source file',
- errnote='Unable to request source file', fatal=False)
+
+ def request_source_file(source_url, kind):
+ return self._request_webpage(
+ source_url, video_id, note='Requesting %s file' % kind,
+ errnote='Unable to request %s file' % kind, fatal=False)
+ urlh = request_source_file(source_url, 'source')
if urlh:
- def add_source_format(src_url):
+ def add_source_format(urlh):
formats.append({
- 'url': src_url,
+ # Use redirect URLs as download URLs in order to calculate
+ # correct cookies in _calc_cookies.
+ # Using original URLs may result in redirect loop due to
+ # google.com's cookies mistakenly used for googleusercontent.com
+ # redirect URLs (see #23919).
+ 'url': urlh.geturl(),
'ext': determine_ext(title, 'mp4').lower(),
'format_id': 'source',
'quality': 1,
})
if urlh.headers.get('Content-Disposition'):
- add_source_format(source_url)
+ add_source_format(urlh)
else:
confirmation_webpage = self._webpage_read_content(
urlh, url, video_id, note='Downloading confirmation page',
@@ -242,9 +250,12 @@ class GoogleDriveIE(InfoExtractor):
r'confirm=([^&"\']+)', confirmation_webpage,
'confirmation code', fatal=False)
if confirm:
- add_source_format(update_url_query(source_url, {
+ confirmed_source_url = update_url_query(source_url, {
'confirm': confirm,
- }))
+ })
+ urlh = request_source_file(confirmed_source_url, 'confirmed source')
+ if urlh and urlh.headers.get('Content-Disposition'):
+ add_source_format(urlh)
if not formats:
reason = self._search_regex(
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index 94115534b..84aacbcda 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -11,7 +11,6 @@ from ..compat import (
from ..utils import (
ExtractorError,
int_or_none,
- JSON_LD_RE,
js_to_json,
NO_DEFAULT,
parse_age_limit,
@@ -425,13 +424,20 @@ class NRKTVEpisodeIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
- nrk_id = self._parse_json(
- self._search_regex(JSON_LD_RE, webpage, 'JSON-LD', group='json_ld'),
- display_id)['@id']
-
+ info = self._search_json_ld(webpage, display_id, default={})
+ nrk_id = info.get('@id') or self._html_search_meta(
+ 'nrk:program-id', webpage, default=None) or self._search_regex(
+ r'data-program-id=["\'](%s)' % NRKTVIE._EPISODE_RE, webpage,
+ 'nrk id')
assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
- return self.url_result(
- 'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id)
+
+ info.update({
+ '_type': 'url_transparent',
+ 'id': nrk_id,
+ 'url': 'nrk:%s' % nrk_id,
+ 'ie_key': NRKIE.ie_key(),
+ })
+ return info
class NRKTVSerieBaseIE(InfoExtractor):
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 3567a3283..529f3f711 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -17,6 +17,7 @@ from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
+ merge_dicts,
NO_DEFAULT,
orderedSet,
remove_quotes,
@@ -59,13 +60,14 @@ class PornHubIE(PornHubBaseIE):
'''
_TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
- 'md5': '1e19b41231a02eba417839222ac9d58e',
+ 'md5': 'a6391306d050e4547f62b3f485dd9ba9',
'info_dict': {
'id': '648719015',
'ext': 'mp4',
'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
'uploader': 'Babes',
'upload_date': '20130628',
+ 'timestamp': 1372447216,
'duration': 361,
'view_count': int,
'like_count': int,
@@ -82,8 +84,8 @@ class PornHubIE(PornHubBaseIE):
'id': '1331683002',
'ext': 'mp4',
'title': '重庆婷婷女王足交',
- 'uploader': 'Unknown',
'upload_date': '20150213',
+ 'timestamp': 1423804862,
'duration': 1753,
'view_count': int,
'like_count': int,
@@ -121,6 +123,7 @@ class PornHubIE(PornHubBaseIE):
'params': {
'skip_download': True,
},
+ 'skip': 'This video has been disabled',
}, {
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
'only_matching': True,
@@ -338,10 +341,10 @@ class PornHubIE(PornHubBaseIE):
video_uploader = self._html_search_regex(
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
- webpage, 'uploader', fatal=False)
+ webpage, 'uploader', default=None)
view_count = self._extract_count(
- r'([\d,\.]+) views', webpage, 'view')
+ r'([\d,\.]+) [Vv]iews', webpage, 'view')
like_count = self._extract_count(
r'([\d,\.]+)', webpage, 'like')
dislike_count = self._extract_count(
@@ -356,7 +359,11 @@ class PornHubIE(PornHubBaseIE):
if div:
return re.findall(r']+\bhref=[^>]+>([^<]+)', div)
- return {
+ info = self._search_json_ld(webpage, video_id, default={})
+ # description provided in JSON-LD is irrelevant
+ info['description'] = None
+
+ return merge_dicts({
'id': video_id,
'uploader': video_uploader,
'upload_date': upload_date,
@@ -372,7 +379,7 @@ class PornHubIE(PornHubBaseIE):
'tags': extract_list('tags'),
'categories': extract_list('categories'),
'subtitles': subtitles,
- }
+ }, info)
class PornHubPlaylistBaseIE(PornHubBaseIE):
diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dl/extractor/redbulltv.py
index dbe1aaded..3aae79f5d 100644
--- a/youtube_dl/extractor/redbulltv.py
+++ b/youtube_dl/extractor/redbulltv.py
@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
@@ -10,7 +12,7 @@ from ..utils import (
class RedBullTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live)/(?PAP-\w+)'
+ _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live|(?:film|episode)s)/(?PAP-\w+)'
_TESTS = [{
# film
'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
@@ -29,8 +31,8 @@ class RedBullTVIE(InfoExtractor):
'id': 'AP-1PMHKJFCW1W11',
'ext': 'mp4',
'title': 'Grime - Hashtags S2E4',
- 'description': 'md5:b5f522b89b72e1e23216e5018810bb25',
- 'duration': 904.6,
+ 'description': 'md5:5546aa612958c08a98faaad4abce484d',
+ 'duration': 904,
},
'params': {
'skip_download': True,
@@ -44,11 +46,15 @@ class RedBullTVIE(InfoExtractor):
}, {
'url': 'https://www.redbull.com/us-en/events/AP-1XV2K61Q51W11/live/AP-1XUJ86FDH1W11',
'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/films/AP-1ZSMAW8FH2111',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/episodes/AP-1TQWK7XE11W11',
+ 'only_matching': True,
}]
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
+ def extract_info(self, video_id):
session = self._download_json(
'https://api.redbull.tv/v3/session', video_id,
note='Downloading access token', query={
@@ -105,24 +111,119 @@ class RedBullTVIE(InfoExtractor):
'subtitles': subtitles,
}
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.extract_info(video_id)
+
+
+class RedBullEmbedIE(RedBullTVIE):
+ _VALID_URL = r'https?://(?:www\.)?redbull\.com/embed/(?Prrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}:[a-z]{2}-[A-Z]{2,3})'
+ _TESTS = [{
+ # HLS manifest accessible only using assetId
+ 'url': 'https://www.redbull.com/embed/rrn:content:episode-videos:f3021f4f-3ed4-51ac-915a-11987126e405:en-INT',
+ 'only_matching': True,
+ }]
+ _VIDEO_ESSENSE_TMPL = '''... on %s {
+ videoEssence {
+ attributes
+ }
+ }'''
+
+ def _real_extract(self, url):
+ rrn_id = self._match_id(url)
+ asset_id = self._download_json(
+ 'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql',
+ rrn_id, headers={'API-KEY': 'e90a1ff11335423998b100c929ecc866'},
+ query={
+ 'query': '''{
+ resource(id: "%s", enforceGeoBlocking: false) {
+ %s
+ %s
+ }
+}''' % (rrn_id, self._VIDEO_ESSENSE_TMPL % 'LiveVideo', self._VIDEO_ESSENSE_TMPL % 'VideoResource'),
+ })['data']['resource']['videoEssence']['attributes']['assetId']
+ return self.extract_info(asset_id)
+
class RedBullTVRrnContentIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)/(?:video|live)/rrn:content:[^:]+:(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P[a-z]{2,3})-(?P[a-z]{2})/tv/(?:video|live|film)/(?Prrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TESTS = [{
'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:live-videos:e3e6feb4-e95f-50b7-962a-c70f8fd13c73/mens-dh-finals-fort-william',
'only_matching': True,
}, {
'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:videos:a36a0f36-ff1b-5db8-a69d-ee11a14bf48b/tn-ts-style?playlist=rrn:content:event-profiles:83f05926-5de8-5389-b5e4-9bb312d715e8:extras',
'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/tv/film/rrn:content:films:d1f4d00e-4c04-5d19-b510-a805ffa2ab83/follow-me',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- display_id = self._match_id(url)
+ region, lang, rrn_id = re.search(self._VALID_URL, url).groups()
+ rrn_id += ':%s-%s' % (lang, region.upper())
+ return self.url_result(
+ 'https://www.redbull.com/embed/' + rrn_id,
+ RedBullEmbedIE.ie_key(), rrn_id)
- webpage = self._download_webpage(url, display_id)
- video_url = self._og_search_url(webpage)
+class RedBullIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P[a-z]{2,3})-(?P[a-z]{2})/(?P(?:episode|film|(?:(?:recap|trailer)-)?video)s|live)/(?!AP-|rrn:content:)(?P[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.redbull.com/int-en/episodes/grime-hashtags-s02-e04',
+ 'md5': 'db8271a7200d40053a1809ed0dd574ff',
+ 'info_dict': {
+ 'id': 'AA-1MT8DQWA91W14',
+ 'ext': 'mp4',
+ 'title': 'Grime - Hashtags S2E4',
+ 'description': 'md5:5546aa612958c08a98faaad4abce484d',
+ },
+ }, {
+ 'url': 'https://www.redbull.com/int-en/films/kilimanjaro-mountain-of-greatness',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/recap-videos/uci-mountain-bike-world-cup-2017-mens-xco-finals-from-vallnord',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/trailer-videos/kings-of-content',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/videos/tnts-style-red-bull-dance-your-style-s1-e12',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/live/mens-dh-finals-fort-william',
+ 'only_matching': True,
+ }, {
+ # only available on the int-en website so a fallback is need for the API
+ # https://www.redbull.com/v3/api/graphql/v1/v3/query/en-GB>en-INT?filter[uriSlug]=fia-wrc-saturday-recap-estonia&rb3Schema=v1:hero
+ 'url': 'https://www.redbull.com/gb-en/live/fia-wrc-saturday-recap-estonia',
+ 'only_matching': True,
+ }]
+ _INT_FALLBACK_LIST = ['de', 'en', 'es', 'fr']
+ _LAT_FALLBACK_MAP = ['ar', 'bo', 'car', 'cl', 'co', 'mx', 'pe']
+
+ def _real_extract(self, url):
+ region, lang, filter_type, display_id = re.search(self._VALID_URL, url).groups()
+ if filter_type == 'episodes':
+ filter_type = 'episode-videos'
+ elif filter_type == 'live':
+ filter_type = 'live-videos'
+
+ regions = [region.upper()]
+ if region != 'int':
+ if region in self._LAT_FALLBACK_MAP:
+ regions.append('LAT')
+ if lang in self._INT_FALLBACK_LIST:
+ regions.append('INT')
+ locale = '>'.join(['%s-%s' % (lang, reg) for reg in regions])
+
+ rrn_id = self._download_json(
+ 'https://www.redbull.com/v3/api/graphql/v1/v3/query/' + locale,
+ display_id, query={
+ 'filter[type]': filter_type,
+ 'filter[uriSlug]': display_id,
+ 'rb3Schema': 'v1:hero',
+ })['data']['id']
return self.url_result(
- video_url, ie=RedBullTVIE.ie_key(),
- video_id=RedBullTVIE._match_id(video_url))
+ 'https://www.redbull.com/embed/' + rrn_id,
+ RedBullEmbedIE.ie_key(), rrn_id)
diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py
index 2d2f6a98c..a1ca791ca 100644
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -15,7 +15,7 @@ from ..utils import (
class RedTubeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P[0-9]+)'
+ _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P[0-9]+)'
_TESTS = [{
'url': 'http://www.redtube.com/66418',
'md5': 'fc08071233725f26b8f014dba9590005',
@@ -31,6 +31,9 @@ class RedTubeIE(InfoExtractor):
}, {
'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
'only_matching': True,
+ }, {
+ 'url': 'http://it.redtube.com/66418',
+ 'only_matching': True,
}]
@staticmethod
diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py
index fadca8c17..9eaa06f25 100644
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -14,12 +14,27 @@ class RtlNlIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://(?:(?:www|static)\.)?
(?:
- rtlxl\.nl/[^\#]*\#!/[^/]+/|
- rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)
+ rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/|
+ rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)|
+ embed\.rtl\.nl/\#uuid=
)
(?P[0-9a-f-]+)'''
_TESTS = [{
+ # new URL schema
+ 'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f',
+ 'md5': '490428f1187b60d714f34e1f2e3af0b6',
+ 'info_dict': {
+ 'id': '0bd1384d-d970-3086-98bb-5c104e10c26f',
+ 'ext': 'mp4',
+ 'title': 'RTL Nieuws',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'timestamp': 1593293400,
+ 'upload_date': '20200627',
+ 'duration': 661.08,
+ },
+ }, {
+ # old URL schema
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
'md5': '473d1946c1fdd050b2c0161a4b13c373',
'info_dict': {
@@ -31,6 +46,7 @@ class RtlNlIE(InfoExtractor):
'upload_date': '20160429',
'duration': 1167.96,
},
+ 'skip': '404',
}, {
# best format available a3t
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
@@ -76,6 +92,10 @@ class RtlNlIE(InfoExtractor):
}, {
'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl',
'only_matching': True,
+ }, {
+ # new embed URL schema
+ 'url': 'https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index d37c52543..a2fddf6d9 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -558,8 +558,10 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
def _extract_playlist(self, base_url, playlist_id, playlist_title):
+ # Per the SoundCloud documentation, the maximum limit for a linked partioning query is 200.
+ # https://developers.soundcloud.com/blog/offset-pagination-deprecated
COMMON_QUERY = {
- 'limit': 80000,
+ 'limit': 200,
'linked_partitioning': '1',
}
diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py
index 170dce87f..f63a1359a 100644
--- a/youtube_dl/extractor/srgssr.py
+++ b/youtube_dl/extractor/srgssr.py
@@ -114,7 +114,7 @@ class SRGSSRPlayIE(InfoExtractor):
[^/]+/(?Pvideo|audio)/[^?]+|
popup(?Pvideo|audio)player
)
- \?id=(?P[0-9a-f\-]{36}|\d+)
+ \?.*?\b(?:id=|urn=urn:[^:]+:video:)(?P[0-9a-f\-]{36}|\d+)
'''
_TESTS = [{
@@ -175,6 +175,12 @@ class SRGSSRPlayIE(InfoExtractor):
}, {
'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01',
'only_matching': True,
+ }, {
+ 'url': 'https://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?urn=urn:srf:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.rts.ch/play/tv/19h30/video/le-19h30?urn=urn:rts:video:6348260',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py
index e12389cad..2f6887d86 100644
--- a/youtube_dl/extractor/svt.py
+++ b/youtube_dl/extractor/svt.py
@@ -224,9 +224,17 @@ class SVTPlayIE(SVTPlayBaseIE):
self._adjust_title(info_dict)
return info_dict
- svt_id = self._search_regex(
- r'