From e450f6cb634f17fd4ef59291eafb68b05c141e43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 28 Jul 2020 05:04:50 +0700 Subject: [PATCH 01/41] [youtube] Fix sigfunc name extraction (closes #26134, closes #26135, closes #26136, closes #26137) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c27f2cd95..b35bf03aa 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1384,7 +1384,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): funcname = self._search_regex( (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', - r'\b(?P[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', + r'(?:\b|[^a-zA-Z0-9$])(?P[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', r'(?P[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # Obsolete patterns r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', From 570611955ff76b7140c14f5bfc842d452f40d357 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 28 Jul 2020 05:07:54 +0700 Subject: [PATCH 02/41] [ChangeLog] Actualize [ci skip] --- ChangeLog | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ChangeLog b/ChangeLog index 07d6ccd69..a49904c89 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +version + +Extractors +* [youtube] Fix sigfunc name extraction (#26134, #26135, #26136, #26137) +* [youtube] Improve description extraction (#25937, #25980) +* [wistia] Restrict embed regular expression (#25969) +* [youtube] Prevent excess HTTP 301 (#25786) ++ [youtube:playlists] Extend URL regular expression (#25810) ++ [bellmedia] Add support for cp24.com clip URLs (#25764) +* [brightcove] Improve embed detection (#25674) + + version 2020.06.16.1 Extractors From a4ed50bb84658b7e77cbb37597c36fa62a9acd4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 28 Jul 2020 05:13:03 +0700 Subject: [PATCH 03/41] release 2020.07.28 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index d29d5366f..f2260db46 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2020.06.16.1** +- [ ] I've verified that I'm running youtube-dl version **2020.07.28** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.06.16.1 + [debug] youtube-dl version 2020.07.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index ee882f98c..8bc05c4ba 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2020.06.16.1** +- [ ] I've verified that I'm running youtube-dl version **2020.07.28** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 23033fe13..98348e0cd 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2020.06.16.1** +- [ ] I've verified that I'm running youtube-dl version **2020.07.28** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 597531330..86706f528 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2020.06.16.1** +- [ ] I've verified that I'm running youtube-dl version **2020.07.28** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.06.16.1 + [debug] youtube-dl version 2020.07.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 5cfcb9318..52c2709f9 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2020.06.16.1** +- [ ] I've verified that I'm running youtube-dl version **2020.07.28** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index a49904c89..bf515f784 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2020.07.28 Extractors * [youtube] Fix sigfunc name extraction (#26134, #26135, #26136, #26137) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6b88eb38c..17101fa47 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.06.16.1' +__version__ = '2020.07.28' From 6cb30ea5eddb2db4a2536d1b851f4cc45f427d3c Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Wed, 12 Aug 2020 10:37:22 -0400 Subject: [PATCH 04/41] [xhamster] Extend _VALID_URL (closes #25789) (#25804) --- youtube_dl/extractor/xhamster.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 0f7be6a7d..72ce5e1d3 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -25,8 +25,8 @@ class XHamsterIE(InfoExtractor): https?:// (?:.+?\.)?%s/ (?: - movies/(?P\d+)/(?P[^/]*)\.html| - videos/(?P[^/]*)-(?P\d+) + movies/(?P[\dA-Za-z]+)/(?P[^/]*)\.html| + videos/(?P[^/]*)-(?P[\dA-Za-z]+) ) ''' % _DOMAINS _TESTS = [{ @@ -105,6 +105,9 @@ class XHamsterIE(InfoExtractor): }, { 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', 'only_matching': True, + }, { + 'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx', + 'only_matching': True, }] def _real_extract(self, url): From a7e348556a213f008758011777a0a25759efc2e8 Mon Sep 17 00:00:00 2001 From: TheRealDude2 Date: Wed, 12 Aug 2020 16:42:17 +0200 Subject: [PATCH 05/41] [xhamster] Fix extraction (closes #26157) (#26254) --- youtube_dl/extractor/xhamster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 72ce5e1d3..babe31739 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -132,7 +132,7 @@ class XHamsterIE(InfoExtractor): initials = self._parse_json( self._search_regex( - r'window\.initials\s*=\s*({.+?})\s*;\s*\n', webpage, 'initials', + r'window\.initials\s*=\s*({.+?})\s*;', webpage, 'initials', default='{}'), video_id, fatal=False) if initials: From 10709fc7c640fcd2f4866090d68f130fc8d9ad0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Aug 2020 21:51:06 +0700 Subject: [PATCH 06/41] [xhamster] Extend _VALID_URL (closes #25927) --- youtube_dl/extractor/xhamster.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index babe31739..902a3ed33 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -20,7 +20,7 @@ from ..utils import ( class XHamsterIE(InfoExtractor): - _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster[27]\.com)' + _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)' _VALID_URL = r'''(?x) https?:// (?:.+?\.)?%s/ @@ -99,6 +99,12 @@ class XHamsterIE(InfoExtractor): }, { 'url': 'https://xhamster2.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445', 'only_matching': True, + }, { + 'url': 'https://xhamster11.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445', + 'only_matching': True, + }, { + 'url': 'https://xhamster26.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445', + 'only_matching': True, }, { 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', 'only_matching': True, From f5863a3ea08492bd9fc04c55e1e912d24e92d49b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 27 Aug 2020 19:20:41 +0100 Subject: [PATCH 07/41] [biqle] improve video_ext extraction --- youtube_dl/extractor/biqle.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/biqle.py b/youtube_dl/extractor/biqle.py index af21e3ee5..17ebbb257 100644 --- a/youtube_dl/extractor/biqle.py +++ b/youtube_dl/extractor/biqle.py @@ -3,10 +3,11 @@ from __future__ import unicode_literals from .common import InfoExtractor from .vk import VKIE -from ..utils import ( - HEADRequest, - int_or_none, +from ..compat import ( + compat_b64decode, + compat_urllib_parse_unquote, ) +from ..utils import int_or_none class BIQLEIE(InfoExtractor): @@ -47,9 +48,16 @@ class BIQLEIE(InfoExtractor): if VKIE.suitable(embed_url): return self.url_result(embed_url, VKIE.ie_key(), video_id) - self._request_webpage( - HEADRequest(embed_url), video_id, headers={'Referer': url}) - video_id, sig, _, access_token = self._get_cookies(embed_url)['video_ext'].value.split('%3A') + embed_page = self._download_webpage( + embed_url, video_id, headers={'Referer': url}) + video_ext = self._get_cookies(embed_url).get('video_ext') + if video_ext: + video_ext = compat_urllib_parse_unquote(video_ext.value) + if not video_ext: + video_ext = compat_b64decode(self._search_regex( + r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)', + embed_page, 'video_ext')).decode() + video_id, sig, _, access_token = video_ext.split(':') item = self._download_json( 'https://api.vk.com/method/video.get', video_id, headers={'User-Agent': 'okhttp/3.4.1'}, query={ From 841b683804c6f706554bf7607e52072575358445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Sep 2020 10:45:34 +0700 Subject: [PATCH 08/41] [twitch] Rework extractors (closes #12297, closes #20414, closes #20604, closes #21811, closes #21812, closes #22979, closes #24263, closes #25010, closes #25553, closes #25606) * Switch to GraphQL. + Add support for collections. + Add support for clips and collections playlists. --- youtube_dl/extractor/extractors.py | 11 +- youtube_dl/extractor/twitch.py | 674 ++++++++++++++++++----------- 2 files changed, 429 insertions(+), 256 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4b3092028..9564465a0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1229,14 +1229,11 @@ from .twentymin import TwentyMinutenIE from .twentythreevideo import TwentyThreeVideoIE from .twitcasting import TwitCastingIE from .twitch import ( - TwitchVideoIE, - TwitchChapterIE, TwitchVodIE, - TwitchProfileIE, - TwitchAllVideosIE, - TwitchUploadsIE, - TwitchPastBroadcastsIE, - TwitchHighlightsIE, + TwitchCollectionIE, + TwitchVideosIE, + TwitchVideosClipsIE, + TwitchVideosCollectionsIE, TwitchStreamIE, TwitchClipsIE, ) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index e211cd4c8..eadc48c6d 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -1,24 +1,26 @@ # coding: utf-8 from __future__ import unicode_literals +import collections import itertools -import re -import random import json +import random +import re from .common import InfoExtractor from ..compat import ( compat_kwargs, compat_parse_qs, compat_str, + compat_urlparse, compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, ) from ..utils import ( clean_html, ExtractorError, + float_or_none, int_or_none, - orderedSet, parse_duration, parse_iso8601, qualities, @@ -150,120 +152,16 @@ class TwitchBaseIE(InfoExtractor): }) self._sort_formats(formats) + def _download_access_token(self, channel_name): + return self._call_api( + 'api/channels/%s/access_token' % channel_name, channel_name, + 'Downloading access token JSON') -class TwitchItemBaseIE(TwitchBaseIE): - def _download_info(self, item, item_id): - return self._extract_info(self._call_api( - 'kraken/videos/%s%s' % (item, item_id), item_id, - 'Downloading %s info JSON' % self._ITEM_TYPE)) - - def _extract_media(self, item_id): - info = self._download_info(self._ITEM_SHORTCUT, item_id) - response = self._call_api( - 'api/videos/%s%s' % (self._ITEM_SHORTCUT, item_id), item_id, - 'Downloading %s playlist JSON' % self._ITEM_TYPE) - entries = [] - chunks = response['chunks'] - qualities = list(chunks.keys()) - for num, fragment in enumerate(zip(*chunks.values()), start=1): - formats = [] - for fmt_num, fragment_fmt in enumerate(fragment): - format_id = qualities[fmt_num] - fmt = { - 'url': fragment_fmt['url'], - 'format_id': format_id, - 'quality': 1 if format_id == 'live' else 0, - } - m = re.search(r'^(?P\d+)[Pp]', format_id) - if m: - fmt['height'] = int(m.group('height')) - formats.append(fmt) - self._sort_formats(formats) - entry = dict(info) - entry['id'] = '%s_%d' % (entry['id'], num) - entry['title'] = '%s part %d' % (entry['title'], num) - entry['formats'] = formats - entries.append(entry) - return self.playlist_result(entries, info['id'], info['title']) - - def _extract_info(self, info): - status = info.get('status') - if status == 'recording': - is_live = True - elif status == 'recorded': - is_live = False - else: - is_live = None - _QUALITIES = ('small', 'medium', 'large') - quality_key = qualities(_QUALITIES) - thumbnails = [] - preview = info.get('preview') - if isinstance(preview, dict): - for thumbnail_id, thumbnail_url in preview.items(): - thumbnail_url = url_or_none(thumbnail_url) - if not thumbnail_url: - continue - if thumbnail_id not in _QUALITIES: - continue - thumbnails.append({ - 'url': thumbnail_url, - 'preference': quality_key(thumbnail_id), - }) - return { - 'id': info['_id'], - 'title': info.get('title') or 'Untitled Broadcast', - 'description': info.get('description'), - 'duration': int_or_none(info.get('length')), - 'thumbnails': thumbnails, - 'uploader': info.get('channel', {}).get('display_name'), - 'uploader_id': info.get('channel', {}).get('name'), - 'timestamp': parse_iso8601(info.get('recorded_at')), - 'view_count': int_or_none(info.get('views')), - 'is_live': is_live, - } - - def _real_extract(self, url): - return self._extract_media(self._match_id(url)) + def _extract_channel_id(self, token, channel_name): + return compat_str(self._parse_json(token, channel_name)['channel_id']) -class TwitchVideoIE(TwitchItemBaseIE): - IE_NAME = 'twitch:video' - _VALID_URL = r'%s/[^/]+/b/(?P\d+)' % TwitchBaseIE._VALID_URL_BASE - _ITEM_TYPE = 'video' - _ITEM_SHORTCUT = 'a' - - _TEST = { - 'url': 'http://www.twitch.tv/riotgames/b/577357806', - 'info_dict': { - 'id': 'a577357806', - 'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG', - }, - 'playlist_mincount': 12, - 'skip': 'HTTP Error 404: Not Found', - } - - -class TwitchChapterIE(TwitchItemBaseIE): - IE_NAME = 'twitch:chapter' - _VALID_URL = r'%s/[^/]+/c/(?P\d+)' % TwitchBaseIE._VALID_URL_BASE - _ITEM_TYPE = 'chapter' - _ITEM_SHORTCUT = 'c' - - _TESTS = [{ - 'url': 'http://www.twitch.tv/acracingleague/c/5285812', - 'info_dict': { - 'id': 'c5285812', - 'title': 'ACRL Off Season - Sports Cars @ Nordschleife', - }, - 'playlist_mincount': 3, - 'skip': 'HTTP Error 404: Not Found', - }, { - 'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361', - 'only_matching': True, - }] - - -class TwitchVodIE(TwitchItemBaseIE): +class TwitchVodIE(TwitchBaseIE): IE_NAME = 'twitch:vod' _VALID_URL = r'''(?x) https?:// @@ -332,17 +230,60 @@ class TwitchVodIE(TwitchItemBaseIE): 'only_matching': True, }] - def _real_extract(self, url): - item_id = self._match_id(url) + def _download_info(self, item_id): + return self._extract_info( + self._call_api( + 'kraken/videos/%s' % item_id, item_id, + 'Downloading video info JSON')) - info = self._download_info(self._ITEM_SHORTCUT, item_id) + @staticmethod + def _extract_info(info): + status = info.get('status') + if status == 'recording': + is_live = True + elif status == 'recorded': + is_live = False + else: + is_live = None + _QUALITIES = ('small', 'medium', 'large') + quality_key = qualities(_QUALITIES) + thumbnails = [] + preview = info.get('preview') + if isinstance(preview, dict): + for thumbnail_id, thumbnail_url in preview.items(): + thumbnail_url = url_or_none(thumbnail_url) + if not thumbnail_url: + continue + if thumbnail_id not in _QUALITIES: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'preference': quality_key(thumbnail_id), + }) + return { + 'id': info['_id'], + 'title': info.get('title') or 'Untitled Broadcast', + 'description': info.get('description'), + 'duration': int_or_none(info.get('length')), + 'thumbnails': thumbnails, + 'uploader': info.get('channel', {}).get('display_name'), + 'uploader_id': info.get('channel', {}).get('name'), + 'timestamp': parse_iso8601(info.get('recorded_at')), + 'view_count': int_or_none(info.get('views')), + 'is_live': is_live, + } + + def _real_extract(self, url): + vod_id = self._match_id(url) + + info = self._download_info(vod_id) access_token = self._call_api( - 'api/vods/%s/access_token' % item_id, item_id, + 'api/vods/%s/access_token' % vod_id, vod_id, 'Downloading %s access token' % self._ITEM_TYPE) formats = self._extract_m3u8_formats( '%s/vod/%s.m3u8?%s' % ( - self._USHER_BASE, item_id, + self._USHER_BASE, vod_id, compat_urllib_parse_urlencode({ 'allow_source': 'true', 'allow_audio_only': 'true', @@ -352,7 +293,7 @@ class TwitchVodIE(TwitchItemBaseIE): 'nauth': access_token['token'], 'nauthsig': access_token['sig'], })), - item_id, 'mp4', entry_protocol='m3u8_native') + vod_id, 'mp4', entry_protocol='m3u8_native') self._prefer_source(formats) info['formats'] = formats @@ -366,7 +307,7 @@ class TwitchVodIE(TwitchItemBaseIE): info['subtitles'] = { 'rechat': [{ 'url': update_url_query( - 'https://api.twitch.tv/v5/videos/%s/comments' % item_id, { + 'https://api.twitch.tv/v5/videos/%s/comments' % vod_id, { 'client_id': self._CLIENT_ID, }), 'ext': 'json', @@ -376,164 +317,405 @@ class TwitchVodIE(TwitchItemBaseIE): return info -class TwitchPlaylistBaseIE(TwitchBaseIE): - _PLAYLIST_PATH = 'kraken/channels/%s/videos/?offset=%d&limit=%d' +def _make_video_result(node): + assert isinstance(node, dict) + video_id = node.get('id') + if not video_id: + return + return { + '_type': 'url_transparent', + 'ie_key': TwitchVodIE.ie_key(), + 'id': video_id, + 'url': 'https://www.twitch.tv/videos/%s' % video_id, + 'title': node.get('title'), + 'thumbnail': node.get('previewThumbnailURL'), + 'duration': float_or_none(node.get('lengthSeconds')), + 'view_count': int_or_none(node.get('viewCount')), + } + + +class TwitchGraphQLBaseIE(TwitchBaseIE): _PAGE_LIMIT = 100 - def _extract_playlist(self, channel_id): - info = self._call_api( - 'kraken/channels/%s' % channel_id, - channel_id, 'Downloading channel info JSON') - channel_name = info.get('display_name') or info.get('name') + def _download_gql(self, video_id, op, variables, sha256_hash, note, fatal=True): + return self._download_json( + 'https://gql.twitch.tv/gql', video_id, note, + data=json.dumps({ + 'operationName': op, + 'variables': variables, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': sha256_hash, + } + } + }).encode(), + headers={ + 'Content-Type': 'text/plain;charset=UTF-8', + 'Client-ID': self._CLIENT_ID, + }, fatal=fatal) + + +class TwitchCollectionIE(TwitchGraphQLBaseIE): + _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P[^/]+)' + + _TESTS = [{ + 'url': 'https://www.twitch.tv/collections/wlDCoH0zEBZZbQ', + 'info_dict': { + 'id': 'wlDCoH0zEBZZbQ', + 'title': 'Overthrow Nook, capitalism for children', + }, + 'playlist_mincount': 13, + }] + + _OPERATION_NAME = 'CollectionSideBar' + _SHA256_HASH = '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14' + + def _real_extract(self, url): + collection_id = self._match_id(url) + collection = self._download_gql( + collection_id, self._OPERATION_NAME, + {'collectionID': collection_id}, self._SHA256_HASH, + 'Downloading collection GraphQL')['data']['collection'] + title = collection.get('title') entries = [] + for edge in collection['items']['edges']: + if not isinstance(edge, dict): + continue + node = edge.get('node') + if not isinstance(node, dict): + continue + video = _make_video_result(node) + if video: + entries.append(video) + return self.playlist_result( + entries, playlist_id=collection_id, playlist_title=title) + + +class TwitchPlaylistBaseIE(TwitchGraphQLBaseIE): + def _entries(self, channel_name, *args): + cursor = None + variables_common = self._make_variables(channel_name, *args) + entries_key = '%ss' % self._ENTRY_KIND + for page_num in itertools.count(1): + variables = variables_common.copy() + variables['limit'] = self._PAGE_LIMIT + if cursor: + variables['cursor'] = cursor + page = self._download_gql( + channel_name, self._OPERATION_NAME, variables, + self._SHA256_HASH, + 'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num), + fatal=False) + if not page: + break + edges = try_get( + page, lambda x: x['data']['user'][entries_key]['edges'], list) + if not edges: + break + for edge in edges: + if not isinstance(edge, dict): + continue + if edge.get('__typename') != self._EDGE_KIND: + continue + node = edge.get('node') + if not isinstance(node, dict): + continue + if node.get('__typename') != self._NODE_KIND: + continue + entry = self._extract_entry(node) + if entry: + cursor = edge.get('cursor') + yield entry + if not cursor or not isinstance(cursor, compat_str): + break + + # Deprecated kraken v5 API + def _entries_kraken(self, channel_name, broadcast_type, sort): + access_token = self._download_access_token(channel_name) + channel_id = self._extract_channel_id(access_token['token'], channel_name) offset = 0 - limit = self._PAGE_LIMIT - broken_paging_detected = False counter_override = None for counter in itertools.count(1): response = self._call_api( - self._PLAYLIST_PATH % (channel_id, offset, limit), + 'kraken/channels/%s/videos/' % channel_id, channel_id, - 'Downloading %s JSON page %s' - % (self._PLAYLIST_TYPE, counter_override or counter)) - page_entries = self._extract_playlist_page(response) - if not page_entries: + 'Downloading video JSON page %s' % (counter_override or counter), + query={ + 'offset': offset, + 'limit': self._PAGE_LIMIT, + 'broadcast_type': broadcast_type, + 'sort': sort, + }) + videos = response.get('videos') + if not isinstance(videos, list): break + for video in videos: + if not isinstance(video, dict): + continue + video_url = url_or_none(video.get('url')) + if not video_url: + continue + yield { + '_type': 'url_transparent', + 'ie_key': TwitchVodIE.ie_key(), + 'id': video.get('_id'), + 'url': video_url, + 'title': video.get('title'), + 'description': video.get('description'), + 'timestamp': unified_timestamp(video.get('published_at')), + 'duration': float_or_none(video.get('length')), + 'view_count': int_or_none(video.get('views')), + 'language': video.get('language'), + } + offset += self._PAGE_LIMIT total = int_or_none(response.get('_total')) - # Since the beginning of March 2016 twitch's paging mechanism - # is completely broken on the twitch side. It simply ignores - # a limit and returns the whole offset number of videos. - # Working around by just requesting all videos at once. - # Upd: pagination bug was fixed by twitch on 15.03.2016. - if not broken_paging_detected and total and len(page_entries) > limit: - self.report_warning( - 'Twitch pagination is broken on twitch side, requesting all videos at once', - channel_id) - broken_paging_detected = True - offset = total - counter_override = '(all at once)' - continue - entries.extend(page_entries) - if broken_paging_detected or total and len(page_entries) >= total: + if total and offset >= total: break - offset += limit - return self.playlist_result( - [self._make_url_result(entry) for entry in orderedSet(entries)], - channel_id, channel_name) - - def _make_url_result(self, url): - try: - video_id = 'v%s' % TwitchVodIE._match_id(url) - return self.url_result(url, TwitchVodIE.ie_key(), video_id=video_id) - except AssertionError: - return self.url_result(url) - - def _extract_playlist_page(self, response): - videos = response.get('videos') - return [video['url'] for video in videos] if videos else [] - - def _real_extract(self, url): - return self._extract_playlist(self._match_id(url)) -class TwitchProfileIE(TwitchPlaylistBaseIE): - IE_NAME = 'twitch:profile' - _VALID_URL = r'%s/(?P[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE - _PLAYLIST_TYPE = 'profile' +class TwitchVideosIE(TwitchPlaylistBaseIE): + _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P[^/]+)/(?:videos|profile)' _TESTS = [{ - 'url': 'http://www.twitch.tv/vanillatv/profile', - 'info_dict': { - 'id': 'vanillatv', - 'title': 'VanillaTV', - }, - 'playlist_mincount': 412, - }, { - 'url': 'http://m.twitch.tv/vanillatv/profile', - 'only_matching': True, - }] - - -class TwitchVideosBaseIE(TwitchPlaylistBaseIE): - _VALID_URL_VIDEOS_BASE = r'%s/(?P[^/]+)/videos' % TwitchBaseIE._VALID_URL_BASE - _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcast_type=' - - -class TwitchAllVideosIE(TwitchVideosBaseIE): - IE_NAME = 'twitch:videos:all' - _VALID_URL = r'%s/all' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE - _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight' - _PLAYLIST_TYPE = 'all videos' - - _TESTS = [{ - 'url': 'https://www.twitch.tv/spamfish/videos/all', + # All Videos sorted by Date + 'url': 'https://www.twitch.tv/spamfish/videos?filter=all', 'info_dict': { 'id': 'spamfish', - 'title': 'Spamfish', + 'title': 'spamfish - All Videos sorted by Date', }, - 'playlist_mincount': 869, + 'playlist_mincount': 924, + }, { + # All Videos sorted by Popular + 'url': 'https://www.twitch.tv/spamfish/videos?filter=all&sort=views', + 'info_dict': { + 'id': 'spamfish', + 'title': 'spamfish - All Videos sorted by Popular', + }, + 'playlist_mincount': 931, + }, { + # Past Broadcasts sorted by Date + 'url': 'https://www.twitch.tv/spamfish/videos?filter=archives', + 'info_dict': { + 'id': 'spamfish', + 'title': 'spamfish - Past Broadcasts sorted by Date', + }, + 'playlist_mincount': 27, + }, { + # Highlights sorted by Date + 'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights', + 'info_dict': { + 'id': 'spamfish', + 'title': 'spamfish - Highlights sorted by Date', + }, + 'playlist_mincount': 901, + }, { + # Uploads sorted by Date + 'url': 'https://www.twitch.tv/esl_csgo/videos?filter=uploads&sort=time', + 'info_dict': { + 'id': 'esl_csgo', + 'title': 'esl_csgo - Uploads sorted by Date', + }, + 'playlist_mincount': 5, + }, { + # Past Premieres sorted by Date + 'url': 'https://www.twitch.tv/spamfish/videos?filter=past_premieres', + 'info_dict': { + 'id': 'spamfish', + 'title': 'spamfish - Past Premieres sorted by Date', + }, + 'playlist_mincount': 1, + }, { + 'url': 'https://www.twitch.tv/spamfish/videos/all', + 'only_matching': True, }, { 'url': 'https://m.twitch.tv/spamfish/videos/all', 'only_matching': True, - }] - - -class TwitchUploadsIE(TwitchVideosBaseIE): - IE_NAME = 'twitch:videos:uploads' - _VALID_URL = r'%s/uploads' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE - _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload' - _PLAYLIST_TYPE = 'uploads' - - _TESTS = [{ - 'url': 'https://www.twitch.tv/spamfish/videos/uploads', - 'info_dict': { - 'id': 'spamfish', - 'title': 'Spamfish', - }, - 'playlist_mincount': 0, }, { - 'url': 'https://m.twitch.tv/spamfish/videos/uploads', + 'url': 'https://www.twitch.tv/spamfish/videos', 'only_matching': True, }] + Broadcast = collections.namedtuple('Broadcast', ['type', 'label']) -class TwitchPastBroadcastsIE(TwitchVideosBaseIE): - IE_NAME = 'twitch:videos:past-broadcasts' - _VALID_URL = r'%s/past-broadcasts' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE - _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive' - _PLAYLIST_TYPE = 'past broadcasts' + _DEFAULT_BROADCAST = Broadcast(None, 'All Videos') + _BROADCASTS = { + 'archives': Broadcast('ARCHIVE', 'Past Broadcasts'), + 'highlights': Broadcast('HIGHLIGHT', 'Highlights'), + 'uploads': Broadcast('UPLOAD', 'Uploads'), + 'past_premieres': Broadcast('PAST_PREMIERE', 'Past Premieres'), + 'all': _DEFAULT_BROADCAST, + } + + _DEFAULT_SORTED_BY = 'Date' + _SORTED_BY = { + 'time': _DEFAULT_SORTED_BY, + 'views': 'Popular', + } + + _SHA256_HASH = 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb' + _OPERATION_NAME = 'FilterableVideoTower_Videos' + _ENTRY_KIND = 'video' + _EDGE_KIND = 'VideoEdge' + _NODE_KIND = 'Video' + + @classmethod + def suitable(cls, url): + return (False + if any(ie.suitable(url) for ie in ( + TwitchVideosClipsIE, + TwitchVideosCollectionsIE)) + else super(TwitchVideosIE, cls).suitable(url)) + + @staticmethod + def _make_variables(channel_name, broadcast_type, sort): + return { + 'channelOwnerLogin': channel_name, + 'broadcastType': broadcast_type, + 'videoSort': sort.upper(), + } + + @staticmethod + def _extract_entry(node): + return _make_video_result(node) + + def _real_extract(self, url): + channel_name = self._match_id(url) + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + filter = qs.get('filter', ['all'])[0] + sort = qs.get('sort', ['time'])[0] + broadcast = self._BROADCASTS.get(filter, self._DEFAULT_BROADCAST) + return self.playlist_result( + self._entries(channel_name, broadcast.type, sort), + playlist_id=channel_name, + playlist_title='%s - %s sorted by %s' + % (channel_name, broadcast.label, + self._SORTED_BY.get(sort, self._DEFAULT_SORTED_BY))) + + +class TwitchVideosClipsIE(TwitchPlaylistBaseIE): + _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P[^/]+)/(?:clips|videos/*?\?.*?\bfilter=clips)' _TESTS = [{ - 'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts', + # Clips + 'url': 'https://www.twitch.tv/vanillatv/clips?filter=clips&range=all', 'info_dict': { - 'id': 'spamfish', - 'title': 'Spamfish', + 'id': 'vanillatv', + 'title': 'vanillatv - Clips Top All', }, - 'playlist_mincount': 0, + 'playlist_mincount': 1, }, { - 'url': 'https://m.twitch.tv/spamfish/videos/past-broadcasts', + 'url': 'https://www.twitch.tv/dota2ruhub/videos?filter=clips&range=7d', 'only_matching': True, }] + Clip = collections.namedtuple('Clip', ['filter', 'label']) -class TwitchHighlightsIE(TwitchVideosBaseIE): - IE_NAME = 'twitch:videos:highlights' - _VALID_URL = r'%s/highlights' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE - _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight' - _PLAYLIST_TYPE = 'highlights' + _DEFAULT_CLIP = Clip('LAST_WEEK', 'Top 7D') + _RANGE = { + '24hr': Clip('LAST_DAY', 'Top 24H'), + '7d': _DEFAULT_CLIP, + '30d': Clip('LAST_MONTH', 'Top 30D'), + 'all': Clip('ALL_TIME', 'Top All'), + } + + # NB: values other than 20 result in skipped videos + _PAGE_LIMIT = 20 + + _SHA256_HASH = 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777' + _OPERATION_NAME = 'ClipsCards__User' + _ENTRY_KIND = 'clip' + _EDGE_KIND = 'ClipEdge' + _NODE_KIND = 'Clip' + + @staticmethod + def _make_variables(channel_name, filter): + return { + 'login': channel_name, + 'criteria': { + 'filter': filter, + }, + } + + @staticmethod + def _extract_entry(node): + assert isinstance(node, dict) + clip_url = url_or_none(node.get('url')) + if not clip_url: + return + return { + '_type': 'url_transparent', + 'ie_key': TwitchClipsIE.ie_key(), + 'id': node.get('id'), + 'url': clip_url, + 'title': node.get('title'), + 'thumbnail': node.get('thumbnailURL'), + 'duration': float_or_none(node.get('durationSeconds')), + 'timestamp': unified_timestamp(node.get('createdAt')), + 'view_count': int_or_none(node.get('viewCount')), + 'language': node.get('language'), + } + + def _real_extract(self, url): + channel_name = self._match_id(url) + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + range = qs.get('range', ['7d'])[0] + clip = self._RANGE.get(range, self._DEFAULT_CLIP) + return self.playlist_result( + self._entries(channel_name, clip.filter), + playlist_id=channel_name, + playlist_title='%s - Clips %s' % (channel_name, clip.label)) + + +class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE): + _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P[^/]+)/videos/*?\?.*?\bfilter=collections' _TESTS = [{ - 'url': 'https://www.twitch.tv/spamfish/videos/highlights', + # Collections + 'url': 'https://www.twitch.tv/spamfish/videos?filter=collections', 'info_dict': { 'id': 'spamfish', - 'title': 'Spamfish', + 'title': 'spamfish - Collections', }, - 'playlist_mincount': 805, - }, { - 'url': 'https://m.twitch.tv/spamfish/videos/highlights', - 'only_matching': True, + 'playlist_mincount': 3, }] + _SHA256_HASH = '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84' + _OPERATION_NAME = 'ChannelCollectionsContent' + _ENTRY_KIND = 'collection' + _EDGE_KIND = 'CollectionsItemEdge' + _NODE_KIND = 'Collection' + + @staticmethod + def _make_variables(channel_name): + return { + 'ownerLogin': channel_name, + } + + @staticmethod + def _extract_entry(node): + assert isinstance(node, dict) + collection_id = node.get('id') + if not collection_id: + return + return { + '_type': 'url_transparent', + 'ie_key': TwitchCollectionIE.ie_key(), + 'id': collection_id, + 'url': 'https://www.twitch.tv/collections/%s' % collection_id, + 'title': node.get('title'), + 'thumbnail': node.get('thumbnailURL'), + 'duration': float_or_none(node.get('lengthSeconds')), + 'timestamp': unified_timestamp(node.get('updatedAt')), + 'view_count': int_or_none(node.get('viewCount')), + } + + def _real_extract(self, url): + channel_name = self._match_id(url) + return self.playlist_result( + self._entries(channel_name), playlist_id=channel_name, + playlist_title='%s - Collections' % channel_name) + class TwitchStreamIE(TwitchBaseIE): IE_NAME = 'twitch:stream' @@ -583,27 +765,21 @@ class TwitchStreamIE(TwitchBaseIE): def suitable(cls, url): return (False if any(ie.suitable(url) for ie in ( - TwitchVideoIE, - TwitchChapterIE, TwitchVodIE, - TwitchProfileIE, - TwitchAllVideosIE, - TwitchUploadsIE, - TwitchPastBroadcastsIE, - TwitchHighlightsIE, + TwitchCollectionIE, + TwitchVideosIE, + TwitchVideosClipsIE, + TwitchVideosCollectionsIE, TwitchClipsIE)) else super(TwitchStreamIE, cls).suitable(url)) def _real_extract(self, url): channel_name = self._match_id(url) - access_token = self._call_api( - 'api/channels/%s/access_token' % channel_name, channel_name, - 'Downloading access token JSON') + access_token = self._download_access_token(channel_name) token = access_token['token'] - channel_id = compat_str(self._parse_json( - token, channel_name)['channel_id']) + channel_id = self._extract_channel_id(token, channel_name) stream = self._call_api( 'kraken/streams/%s?stream_type=all' % channel_id, From 5ed05f26adea517aa715d5ec4b0dccfea30b2b8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Sep 2020 10:45:57 +0700 Subject: [PATCH 09/41] [svtplay] Fix svt id extraction (closes #26425, closes #26428, closes #26438) --- youtube_dl/extractor/svt.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index e12389cad..8e9ec2ca3 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -224,9 +224,15 @@ class SVTPlayIE(SVTPlayBaseIE): self._adjust_title(info_dict) return info_dict - svt_id = self._search_regex( - r']+data-video-id=["\']([\da-zA-Z-]+)', - webpage, 'video id') + svt_id = try_get( + data, lambda x: x['statistics']['dataLake']['content']['id'], + compat_str) + + if not svt_id: + svt_id = self._search_regex( + (r']+data-video-id=["\']([\da-zA-Z-]+)', + r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"'), + webpage, 'video id') return self._extract_by_video_id(svt_id, webpage) From 62ae19ff760b9df6a2600430e42e2c32f1449e7a Mon Sep 17 00:00:00 2001 From: TheRealDude2 Date: Sun, 6 Sep 2020 06:10:27 +0200 Subject: [PATCH 10/41] [xhamster] Improve initials regex (#26526) (closes #26353) --- youtube_dl/extractor/xhamster.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 902a3ed33..76aeaf9a4 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -138,7 +138,8 @@ class XHamsterIE(InfoExtractor): initials = self._parse_json( self._search_regex( - r'window\.initials\s*=\s*({.+?})\s*;', webpage, 'initials', + (r'window\.initials\s*=\s*({.+?})\s*;\s*', + r'window\.initials\s*=\s*({.+?})\s*;'), webpage, 'initials', default='{}'), video_id, fatal=False) if initials: From 1d9bf655e68c873dc1f04739640046c43a1443d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Sep 2020 11:19:53 +0700 Subject: [PATCH 11/41] [utils] Recognize wav mimetype (closes #26463) --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 1 + 2 files changed, 3 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 0896f4150..962fd8d75 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -803,6 +803,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(mimetype2ext('text/vtt'), 'vtt') self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt') self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html') + self.assertEqual(mimetype2ext('audio/x-wav'), 'wav') + self.assertEqual(mimetype2ext('audio/x-wav;codec=pcm'), 'wav') def test_month_by_name(self): self.assertEqual(month_by_name(None), None) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d1eca3760..01d9c0362 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -4198,6 +4198,7 @@ def mimetype2ext(mt): 'vnd.ms-sstr+xml': 'ism', 'quicktime': 'mov', 'mp2t': 'ts', + 'x-wav': 'wav', }.get(res, res) From 67171ed7e940d9022e3388ddd029978bc426ea1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Sep 2020 11:31:28 +0700 Subject: [PATCH 12/41] [youtube:user] Extend _VALID_URL (closes #26443) --- youtube_dl/extractor/youtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b35bf03aa..6611caf06 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -3008,7 +3008,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): class YoutubeUserIE(YoutubeChannelIE): IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)' - _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?Puser|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P[A-Za-z0-9_-]+)' + _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?Puser|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P[A-Za-z0-9_%-]+)' _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos' IE_NAME = 'youtube:user' @@ -3038,6 +3038,9 @@ class YoutubeUserIE(YoutubeChannelIE): }, { 'url': 'https://www.youtube.com/c/gametrailers', 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak', + 'only_matching': True, }, { 'url': 'https://www.youtube.com/gametrailers', 'only_matching': True, From 16ee69c1b7d9877c852d50428d8f047ada45d539 Mon Sep 17 00:00:00 2001 From: random-nick Date: Sun, 6 Sep 2020 04:44:53 +0000 Subject: [PATCH 13/41] [youtube] Fix age gate content detection (#26100) (closes #26152, closes #26311, closes #26384) --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6611caf06..6ae2e58c1 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1825,7 +1825,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Get video info video_info = {} embed_webpage = None - if re.search(r'player-age-gate-content">', video_webpage) is not None: + if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+' + or re.search(r'player-age-gate-content">', video_webpage) is not None): age_gate = True # We simulate the access to the video from www.youtube.com/v/{video_id} # this can be viewed without login into Youtube From 50e9fcc1fd5aa8e3c8bbeb33f2434fbf9c1200d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Sep 2020 12:42:57 +0700 Subject: [PATCH 14/41] [nrktv:episode] Improve video id extraction (closes #25594, closes #26369, closes #26409) --- youtube_dl/extractor/nrk.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 94115534b..84aacbcda 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -11,7 +11,6 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, - JSON_LD_RE, js_to_json, NO_DEFAULT, parse_age_limit, @@ -425,13 +424,20 @@ class NRKTVEpisodeIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - nrk_id = self._parse_json( - self._search_regex(JSON_LD_RE, webpage, 'JSON-LD', group='json_ld'), - display_id)['@id'] - + info = self._search_json_ld(webpage, display_id, default={}) + nrk_id = info.get('@id') or self._html_search_meta( + 'nrk:program-id', webpage, default=None) or self._search_regex( + r'data-program-id=["\'](%s)' % NRKTVIE._EPISODE_RE, webpage, + 'nrk id') assert re.match(NRKTVIE._EPISODE_RE, nrk_id) - return self.url_result( - 'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id) + + info.update({ + '_type': 'url_transparent', + 'id': nrk_id, + 'url': 'nrk:%s' % nrk_id, + 'ie_key': NRKIE.ie_key(), + }) + return info class NRKTVSerieBaseIE(InfoExtractor): From 6cd452acffe8d79c895a2ebd0346e2ba7f9e112f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Sep 2020 12:57:56 +0700 Subject: [PATCH 15/41] [ChangeLog] Actualize [ci skip] --- ChangeLog | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/ChangeLog b/ChangeLog index bf515f784..8d410f600 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +version + +Core ++ [utils] Recognize wav mimetype (#26463) + +Extractors +* [nrktv:episode] Improve video id extraction (#25594, #26369, #26409) +* [youtube] Fix age gate content detection (#26100, #26152, #26311, #26384) +* [youtube:user] Extend URL regular expression (#26443) +* [xhamster] Improve initials regular expression (#26526, #26353) +* [svtplay] Fix video id extraction (#26425, #26428, #26438) +* [twitch] Rework extractors (#12297, #20414, #20604, #21811, #21812, #22979, + #24263, #25010, #25553, #25606) + * Switch to GraphQL + + Add support for collections + + Add support for clips and collections playlists +* [biqle] Improve video ext extraction +* [xhamster] Fix extraction (#26157, #26254) +* [xhamster] Extend URL regular expression (#25789, #25804, #25927)) + + version 2020.07.28 Extractors From d51e23d9fc709a80ca037199a46e59c729a1192e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Sep 2020 13:00:41 +0700 Subject: [PATCH 16/41] release 2020.09.06 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 11 ++++------- youtube_dl/version.py | 2 +- 8 files changed, 18 insertions(+), 21 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index f2260db46..f05aa66e6 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2020.07.28** +- [ ] I've verified that I'm running youtube-dl version **2020.09.06** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.07.28 + [debug] youtube-dl version 2020.09.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 8bc05c4ba..29beaf437 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2020.07.28** +- [ ] I've verified that I'm running youtube-dl version **2020.09.06** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 98348e0cd..f96b8d2bb 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2020.07.28** +- [ ] I've verified that I'm running youtube-dl version **2020.09.06** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 86706f528..3a175aa4d 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2020.07.28** +- [ ] I've verified that I'm running youtube-dl version **2020.09.06** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.07.28 + [debug] youtube-dl version 2020.09.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 52c2709f9..4977079de 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2020.07.28** +- [ ] I've verified that I'm running youtube-dl version **2020.09.06** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 8d410f600..86b0e8ccb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2020.09.06 Core + [utils] Recognize wav mimetype (#26463) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 35c1050e5..5c4e1d58c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -950,16 +950,13 @@ - **TVPlayHome** - **Tweakers** - **TwitCasting** - - **twitch:chapter** - **twitch:clips** - - **twitch:profile** - **twitch:stream** - - **twitch:video** - - **twitch:videos:all** - - **twitch:videos:highlights** - - **twitch:videos:past-broadcasts** - - **twitch:videos:uploads** - **twitch:vod** + - **TwitchCollection** + - **TwitchVideos** + - **TwitchVideosClips** + - **TwitchVideosCollections** - **twitter** - **twitter:amplify** - **twitter:broadcast** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 17101fa47..45b4d3291 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.07.28' +__version__ = '2020.09.06' From aa272535567cb372b93fcd4cd24e13bca050437f Mon Sep 17 00:00:00 2001 From: tfvlrue <35318734+tfvlrue@users.noreply.github.com> Date: Sat, 12 Sep 2020 05:35:11 -0400 Subject: [PATCH 17/41] [soundcloud] Reduce pagination limit to fix 502 Bad Gateway errors when listing a user's tracks. (#26557) Per the documentation here https://developers.soundcloud.com/blog/offset-pagination-deprecated the maximum limit is 200, so let's respect that (even if a higher value sometimes works). Co-authored-by: tfvlrue --- youtube_dl/extractor/soundcloud.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index d37c52543..a2fddf6d9 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -558,8 +558,10 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): def _extract_playlist(self, base_url, playlist_id, playlist_title): + # Per the SoundCloud documentation, the maximum limit for a linked partioning query is 200. + # https://developers.soundcloud.com/blog/offset-pagination-deprecated COMMON_QUERY = { - 'limit': 80000, + 'limit': 200, 'linked_partitioning': '1', } From 1f7675451c119b67edb486940e1275b096dd47d5 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 12 Sep 2020 19:20:53 +0100 Subject: [PATCH 18/41] [redbulltv] Add support for new redbull.com TV URLs(closes #22037)(closes #22063) --- youtube_dl/extractor/extractors.py | 2 + youtube_dl/extractor/redbulltv.py | 110 +++++++++++++++++++++++++---- 2 files changed, 100 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9564465a0..ae7079a6a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -918,7 +918,9 @@ from .rbmaradio import RBMARadioIE from .rds import RDSIE from .redbulltv import ( RedBullTVIE, + RedBullEmbedIE, RedBullTVRrnContentIE, + RedBullIE, ) from .reddit import ( RedditIE, diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dl/extractor/redbulltv.py index dbe1aaded..06945bd0c 100644 --- a/youtube_dl/extractor/redbulltv.py +++ b/youtube_dl/extractor/redbulltv.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( @@ -10,7 +12,7 @@ from ..utils import ( class RedBullTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live)/(?PAP-\w+)' + _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live|(?:film|episode)s)/(?PAP-\w+)' _TESTS = [{ # film 'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11', @@ -29,8 +31,8 @@ class RedBullTVIE(InfoExtractor): 'id': 'AP-1PMHKJFCW1W11', 'ext': 'mp4', 'title': 'Grime - Hashtags S2E4', - 'description': 'md5:b5f522b89b72e1e23216e5018810bb25', - 'duration': 904.6, + 'description': 'md5:5546aa612958c08a98faaad4abce484d', + 'duration': 904, }, 'params': { 'skip_download': True, @@ -44,11 +46,15 @@ class RedBullTVIE(InfoExtractor): }, { 'url': 'https://www.redbull.com/us-en/events/AP-1XV2K61Q51W11/live/AP-1XUJ86FDH1W11', 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/films/AP-1ZSMAW8FH2111', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/episodes/AP-1TQWK7XE11W11', + 'only_matching': True, }] - def _real_extract(self, url): - video_id = self._match_id(url) - + def extract_info(self, video_id): session = self._download_json( 'https://api.redbull.tv/v3/session', video_id, note='Downloading access token', query={ @@ -105,24 +111,104 @@ class RedBullTVIE(InfoExtractor): 'subtitles': subtitles, } + def _real_extract(self, url): + video_id = self._match_id(url) + return self.extract_info(video_id) + + +class RedBullEmbedIE(RedBullTVIE): + _VALID_URL = r'https?://(?:www\.)?redbull\.com/embed/(?Prrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}:[a-z]{2}-[A-Z]{2,3})' + _TESTS = [{ + # HLS manifest accessible only using assetId + 'url': 'https://www.redbull.com/embed/rrn:content:episode-videos:f3021f4f-3ed4-51ac-915a-11987126e405:en-INT', + 'only_matching': True, + }] + _VIDEO_ESSENSE_TMPL = '''... on %s { + videoEssence { + attributes + } + }''' + + def _real_extract(self, url): + rrn_id = self._match_id(url) + asset_id = self._download_json( + 'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql', + rrn_id, headers={'API-KEY': 'e90a1ff11335423998b100c929ecc866'}, + query={ + 'query': '''{ + resource(id: "%s", enforceGeoBlocking: false) { + %s + %s + } +}''' % (rrn_id, self._VIDEO_ESSENSE_TMPL % 'LiveVideo', self._VIDEO_ESSENSE_TMPL % 'VideoResource'), + })['data']['resource']['videoEssence']['attributes']['assetId'] + return self.extract_info(asset_id) + class RedBullTVRrnContentIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)/(?:video|live)/rrn:content:[^:]+:(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P[a-z]{2,3})-(?P[a-z]{2})/tv/(?:video|live|film)/(?Prrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' _TESTS = [{ 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:live-videos:e3e6feb4-e95f-50b7-962a-c70f8fd13c73/mens-dh-finals-fort-william', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:videos:a36a0f36-ff1b-5db8-a69d-ee11a14bf48b/tn-ts-style?playlist=rrn:content:event-profiles:83f05926-5de8-5389-b5e4-9bb312d715e8:extras', 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/tv/film/rrn:content:films:d1f4d00e-4c04-5d19-b510-a805ffa2ab83/follow-me', + 'only_matching': True, }] def _real_extract(self, url): - display_id = self._match_id(url) + region, lang, rrn_id = re.search(self._VALID_URL, url).groups() + rrn_id += ':%s-%s' % (lang, region.upper()) + return self.url_result( + 'https://www.redbull.com/embed/' + rrn_id, + RedBullEmbedIE.ie_key(), rrn_id) - webpage = self._download_webpage(url, display_id) - video_url = self._og_search_url(webpage) +class RedBullIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P[a-z]{2,3})-(?P[a-z]{2})/(?P(?:episode|film|(?:(?:recap|trailer)-)?video)s|live)/(?!AP-|rrn:content:)(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.redbull.com/int-en/episodes/grime-hashtags-s02-e04', + 'md5': 'db8271a7200d40053a1809ed0dd574ff', + 'info_dict': { + 'id': 'AA-1MT8DQWA91W14', + 'ext': 'mp4', + 'title': 'Grime - Hashtags S2E4', + 'description': 'md5:5546aa612958c08a98faaad4abce484d', + }, + }, { + 'url': 'https://www.redbull.com/int-en/films/kilimanjaro-mountain-of-greatness', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/recap-videos/uci-mountain-bike-world-cup-2017-mens-xco-finals-from-vallnord', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/trailer-videos/kings-of-content', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/videos/tnts-style-red-bull-dance-your-style-s1-e12', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/live/mens-dh-finals-fort-william', + 'only_matching': True, + }] + + def _real_extract(self, url): + region, lang, filter_type, display_id = re.search(self._VALID_URL, url).groups() + if filter_type == 'episodes': + filter_type = 'episode-videos' + elif filter_type == 'live': + filter_type = 'live-videos' + + rrn_id = self._download_json( + 'https://www.redbull.com/v3/api/graphql/v1/v3/query/%s-%s' % (lang, region.upper()), + display_id, query={ + 'filter[type]': filter_type, + 'filter[uriSlug]': display_id, + 'rb3Schema': 'v1:hero', + })['data']['id'] return self.url_result( - video_url, ie=RedBullTVIE.ie_key(), - video_id=RedBullTVIE._match_id(video_url)) + 'https://www.redbull.com/embed/' + rrn_id, + RedBullEmbedIE.ie_key(), rrn_id) From b03eebdb6aab0f778293ed298543083b6ae76963 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 13 Sep 2020 11:26:11 +0100 Subject: [PATCH 19/41] [redbulltv] improve support for rebull.com TV localized URLS(#22063) --- youtube_dl/extractor/redbulltv.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dl/extractor/redbulltv.py index 06945bd0c..3aae79f5d 100644 --- a/youtube_dl/extractor/redbulltv.py +++ b/youtube_dl/extractor/redbulltv.py @@ -192,7 +192,14 @@ class RedBullIE(InfoExtractor): }, { 'url': 'https://www.redbull.com/int-en/live/mens-dh-finals-fort-william', 'only_matching': True, + }, { + # only available on the int-en website so a fallback is need for the API + # https://www.redbull.com/v3/api/graphql/v1/v3/query/en-GB>en-INT?filter[uriSlug]=fia-wrc-saturday-recap-estonia&rb3Schema=v1:hero + 'url': 'https://www.redbull.com/gb-en/live/fia-wrc-saturday-recap-estonia', + 'only_matching': True, }] + _INT_FALLBACK_LIST = ['de', 'en', 'es', 'fr'] + _LAT_FALLBACK_MAP = ['ar', 'bo', 'car', 'cl', 'co', 'mx', 'pe'] def _real_extract(self, url): region, lang, filter_type, display_id = re.search(self._VALID_URL, url).groups() @@ -201,8 +208,16 @@ class RedBullIE(InfoExtractor): elif filter_type == 'live': filter_type = 'live-videos' + regions = [region.upper()] + if region != 'int': + if region in self._LAT_FALLBACK_MAP: + regions.append('LAT') + if lang in self._INT_FALLBACK_LIST: + regions.append('INT') + locale = '>'.join(['%s-%s' % (lang, reg) for reg in regions]) + rrn_id = self._download_json( - 'https://www.redbull.com/v3/api/graphql/v1/v3/query/%s-%s' % (lang, region.upper()), + 'https://www.redbull.com/v3/api/graphql/v1/v3/query/' + locale, display_id, query={ 'filter[type]': filter_type, 'filter[uriSlug]': display_id, From 95c98100155589e224c76fddb3d01dae0bd233ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Sep 2020 18:59:37 +0700 Subject: [PATCH 20/41] [svtplay] Fix id extraction (closes #26576) --- youtube_dl/extractor/svt.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 8e9ec2ca3..2f6887d86 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -231,7 +231,9 @@ class SVTPlayIE(SVTPlayBaseIE): if not svt_id: svt_id = self._search_regex( (r']+data-video-id=["\']([\da-zA-Z-]+)', - r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"'), + r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)', + r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"', + r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'), webpage, 'video id') return self._extract_by_video_id(svt_id, webpage) From da2069fb22fd3b34046fd1be03690fccdd9ab1a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Sep 2020 20:43:50 +0700 Subject: [PATCH 21/41] [googledrive] Use redirect URLs for source format (closes #18877, closes #23919, closes #24689, closes #26565) --- youtube_dl/extractor/googledrive.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 589e4d5c3..f2cc57e44 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -220,19 +220,27 @@ class GoogleDriveIE(InfoExtractor): 'id': video_id, 'export': 'download', }) - urlh = self._request_webpage( - source_url, video_id, note='Requesting source file', - errnote='Unable to request source file', fatal=False) + + def request_source_file(source_url, kind): + return self._request_webpage( + source_url, video_id, note='Requesting %s file' % kind, + errnote='Unable to request %s file' % kind, fatal=False) + urlh = request_source_file(source_url, 'source') if urlh: - def add_source_format(src_url): + def add_source_format(urlh): formats.append({ - 'url': src_url, + # Use redirect URLs as download URLs in order to calculate + # correct cookies in _calc_cookies. + # Using original URLs may result in redirect loop due to + # google.com's cookies mistakenly used for googleusercontent.com + # redirect URLs (see #23919). + 'url': urlh.geturl(), 'ext': determine_ext(title, 'mp4').lower(), 'format_id': 'source', 'quality': 1, }) if urlh.headers.get('Content-Disposition'): - add_source_format(source_url) + add_source_format(urlh) else: confirmation_webpage = self._webpage_read_content( urlh, url, video_id, note='Downloading confirmation page', @@ -242,9 +250,12 @@ class GoogleDriveIE(InfoExtractor): r'confirm=([^&"\']+)', confirmation_webpage, 'confirmation code', fatal=False) if confirm: - add_source_format(update_url_query(source_url, { + confirmed_source_url = update_url_query(source_url, { 'confirm': confirm, - })) + }) + urlh = request_source_file(confirmed_source_url, 'confirmed source') + if urlh and urlh.headers.get('Content-Disposition'): + add_source_format(urlh) if not formats: reason = self._search_regex( From 06cd4cdb252a45da25ed75ea63b714bc3b9d691b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Sep 2020 21:07:25 +0700 Subject: [PATCH 22/41] [srgssr] Extend _VALID_URL (closes #26555, closes #26556, closes #26578) --- youtube_dl/extractor/srgssr.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index 170dce87f..f63a1359a 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -114,7 +114,7 @@ class SRGSSRPlayIE(InfoExtractor): [^/]+/(?Pvideo|audio)/[^?]+| popup(?Pvideo|audio)player ) - \?id=(?P[0-9a-f\-]{36}|\d+) + \?.*?\b(?:id=|urn=urn:[^:]+:video:)(?P[0-9a-f\-]{36}|\d+) ''' _TESTS = [{ @@ -175,6 +175,12 @@ class SRGSSRPlayIE(InfoExtractor): }, { 'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01', 'only_matching': True, + }, { + 'url': 'https://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?urn=urn:srf:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5', + 'only_matching': True, + }, { + 'url': 'https://www.rts.ch/play/tv/19h30/video/le-19h30?urn=urn:rts:video:6348260', + 'only_matching': True, }] def _real_extract(self, url): From ea74e00b3afe604531419c17eb7291038115a271 Mon Sep 17 00:00:00 2001 From: Daniel Peukert Date: Sun, 13 Sep 2020 16:23:21 +0200 Subject: [PATCH 23/41] [youtube] Fix empty description extraction (#26575) (closes #26006) --- youtube_dl/extractor/youtube.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6ae2e58c1..02f3ab61a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1264,7 +1264,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'params': { 'skip_download': True, }, - } + }, + { + # empty description results in an empty string + 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k', + 'info_dict': { + 'id': 'x41yOUIvK2k', + 'ext': 'mp4', + 'title': 'IMG 3456', + 'description': '', + 'upload_date': '20170613', + 'uploader_id': 'ElevageOrVert', + 'uploader': 'ElevageOrVert', + }, + 'params': { + 'skip_download': True, + }, + }, ] def __init__(self, *args, **kwargs): @@ -1931,7 +1947,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ''', replace_url, video_description) video_description = clean_html(video_description) else: - video_description = video_details.get('shortDescription') or self._html_search_meta('description', video_webpage) + video_description = video_details.get('shortDescription') + if video_description is None: + video_description = self._html_search_meta('description', video_webpage) if not smuggled_data.get('force_singlefeed', False): if not self._downloader.params.get('noplaylist'): From 97f34a48d72c5e55a584b7defc0dd00d2d9b416c Mon Sep 17 00:00:00 2001 From: Derek Land Date: Sun, 13 Sep 2020 16:38:16 +0200 Subject: [PATCH 24/41] [rtlnl] Extend _VALID_URL (#26549) (closes #25821) --- youtube_dl/extractor/rtlnl.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index fadca8c17..cf4dc85db 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -14,12 +14,26 @@ class RtlNlIE(InfoExtractor): _VALID_URL = r'''(?x) https?://(?:(?:www|static)\.)? (?: - rtlxl\.nl/[^\#]*\#!/[^/]+/| + rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/| rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/) ) (?P[0-9a-f-]+)''' _TESTS = [{ + # new URL schema + 'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f', + 'md5': '490428f1187b60d714f34e1f2e3af0b6', + 'info_dict': { + 'id': '0bd1384d-d970-3086-98bb-5c104e10c26f', + 'ext': 'mp4', + 'title': 'RTL Nieuws', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'timestamp': 1593293400, + 'upload_date': '20200627', + 'duration': 661.08, + }, + }, { + # old URL schema 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416', 'md5': '473d1946c1fdd050b2c0161a4b13c373', 'info_dict': { @@ -31,6 +45,7 @@ class RtlNlIE(InfoExtractor): 'upload_date': '20160429', 'duration': 1167.96, }, + 'skip': '404', }, { # best format available a3t 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', From 45f6362464d283fe61653c75fbb93a6a87ac6f65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Sep 2020 21:42:06 +0700 Subject: [PATCH 25/41] [rtlnl] Extend _VALID_URL for new embed URL schema --- youtube_dl/extractor/rtlnl.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index cf4dc85db..9eaa06f25 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -15,7 +15,8 @@ class RtlNlIE(InfoExtractor): https?://(?:(?:www|static)\.)? (?: rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/| - rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/) + rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)| + embed\.rtl\.nl/\#uuid= ) (?P[0-9a-f-]+)''' @@ -91,6 +92,10 @@ class RtlNlIE(InfoExtractor): }, { 'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl', 'only_matching': True, + }, { + # new embed URL schema + 'url': 'https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', + 'only_matching': True, }] def _real_extract(self, url): From a31a022efd1b7bac8cd0a477f08534f3fcc67298 Mon Sep 17 00:00:00 2001 From: Alex Merkel Date: Thu, 18 Jun 2020 22:36:44 +0200 Subject: [PATCH 26/41] [postprocessor/embedthumbnail] Add support for non jpeg/png thumbnails (closes #25687) --- youtube_dl/postprocessor/embedthumbnail.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index 56be914b8..e2002ab0b 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -41,6 +41,28 @@ class EmbedThumbnailPP(FFmpegPostProcessor): 'Skipping embedding the thumbnail because the file is missing.') return [], info + # Check for mislabeled webp file + with open(encodeFilename(thumbnail_filename), "rb") as f: + b = f.read(16) + if b'\x57\x45\x42\x50' in b: # Binary for WEBP + [thumbnail_filename_path, thumbnail_filename_extension] = os.path.splitext(thumbnail_filename) + if not thumbnail_filename_extension == ".webp": + webp_thumbnail_filename = thumbnail_filename_path + ".webp" + os.rename(encodeFilename(thumbnail_filename), encodeFilename(webp_thumbnail_filename)) + thumbnail_filename = webp_thumbnail_filename + + # If not a jpg or png thumbnail, convert it to jpg using ffmpeg + if not os.path.splitext(thumbnail_filename)[1].lower() in ['.jpg', '.png']: + jpg_thumbnail_filename = os.path.splitext(thumbnail_filename)[0] + ".jpg" + jpg_thumbnail_filename = os.path.join(os.path.dirname(jpg_thumbnail_filename), os.path.basename(jpg_thumbnail_filename).replace('%', '_')) # ffmpeg interprets % as image sequence + + self._downloader.to_screen('[ffmpeg] Converting thumbnail "%s" to JPEG' % thumbnail_filename) + + self.run_ffmpeg(thumbnail_filename, jpg_thumbnail_filename, ['-bsf:v', 'mjpeg2jpeg']) + + os.remove(encodeFilename(thumbnail_filename)) + thumbnail_filename = jpg_thumbnail_filename + if info['ext'] == 'mp3': options = [ '-c', 'copy', '-map', '0', '-map', '1', From bff857a8af696e701482208617bf0b7564951326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 14 Sep 2020 03:28:31 +0700 Subject: [PATCH 27/41] [postprocessor/embedthumbnail] Fix issues (closes #25717) * Fix WebP with wrong extension processing * Fix embedding of thumbnails with % character in path --- youtube_dl/postprocessor/embedthumbnail.py | 49 +++++++++++++--------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index e2002ab0b..5a3359588 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -13,6 +13,7 @@ from ..utils import ( encodeFilename, PostProcessingError, prepend_extension, + replace_extension, shell_quote ) @@ -41,27 +42,37 @@ class EmbedThumbnailPP(FFmpegPostProcessor): 'Skipping embedding the thumbnail because the file is missing.') return [], info - # Check for mislabeled webp file - with open(encodeFilename(thumbnail_filename), "rb") as f: - b = f.read(16) - if b'\x57\x45\x42\x50' in b: # Binary for WEBP - [thumbnail_filename_path, thumbnail_filename_extension] = os.path.splitext(thumbnail_filename) - if not thumbnail_filename_extension == ".webp": - webp_thumbnail_filename = thumbnail_filename_path + ".webp" - os.rename(encodeFilename(thumbnail_filename), encodeFilename(webp_thumbnail_filename)) - thumbnail_filename = webp_thumbnail_filename + def is_webp(path): + with open(encodeFilename(path), 'rb') as f: + b = f.read(12) + return b[0:4] == b'RIFF' and b[8:] == b'WEBP' - # If not a jpg or png thumbnail, convert it to jpg using ffmpeg - if not os.path.splitext(thumbnail_filename)[1].lower() in ['.jpg', '.png']: - jpg_thumbnail_filename = os.path.splitext(thumbnail_filename)[0] + ".jpg" - jpg_thumbnail_filename = os.path.join(os.path.dirname(jpg_thumbnail_filename), os.path.basename(jpg_thumbnail_filename).replace('%', '_')) # ffmpeg interprets % as image sequence + # Correct extension for WebP file with wrong extension (see #25687, #25717) + _, thumbnail_ext = os.path.splitext(thumbnail_filename) + if thumbnail_ext: + thumbnail_ext = thumbnail_ext[1:].lower() + if thumbnail_ext != 'webp' and is_webp(thumbnail_filename): + self._downloader.to_screen( + '[ffmpeg] Correcting extension to webp and escaping path for thumbnail "%s"' % thumbnail_filename) + thumbnail_webp_filename = replace_extension(thumbnail_filename, 'webp') + os.rename(encodeFilename(thumbnail_filename), encodeFilename(thumbnail_webp_filename)) + thumbnail_filename = thumbnail_webp_filename + thumbnail_ext = 'webp' - self._downloader.to_screen('[ffmpeg] Converting thumbnail "%s" to JPEG' % thumbnail_filename) - - self.run_ffmpeg(thumbnail_filename, jpg_thumbnail_filename, ['-bsf:v', 'mjpeg2jpeg']) - - os.remove(encodeFilename(thumbnail_filename)) - thumbnail_filename = jpg_thumbnail_filename + # Convert unsupported thumbnail formats to JPEG (see #25687, #25717) + if thumbnail_ext not in ['jpg', 'png']: + # NB: % is supposed to be escaped with %% but this does not work + # for input files so working around with standard substitution + escaped_thumbnail_filename = thumbnail_filename.replace('%', '#') + os.rename(encodeFilename(thumbnail_filename), encodeFilename(escaped_thumbnail_filename)) + escaped_thumbnail_jpg_filename = replace_extension(escaped_thumbnail_filename, 'jpg') + self._downloader.to_screen('[ffmpeg] Converting thumbnail "%s" to JPEG' % escaped_thumbnail_filename) + self.run_ffmpeg(escaped_thumbnail_filename, escaped_thumbnail_jpg_filename, ['-bsf:v', 'mjpeg2jpeg']) + os.remove(encodeFilename(escaped_thumbnail_filename)) + thumbnail_jpg_filename = replace_extension(thumbnail_filename, 'jpg') + # Rename back to unescaped for further processing + os.rename(encodeFilename(escaped_thumbnail_jpg_filename), encodeFilename(thumbnail_jpg_filename)) + thumbnail_filename = thumbnail_jpg_filename if info['ext'] == 'mp3': options = [ From ca7ebc4e5e03a79b90ffa509cd3ce6bbf65d43fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 14 Sep 2020 03:35:18 +0700 Subject: [PATCH 28/41] [ChangeLog] Actualize [ci skip] --- ChangeLog | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog b/ChangeLog index 86b0e8ccb..041cf7113 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +version + +Core ++ [postprocessor/embedthumbnail] Add support for non jpg/png thumbnails + (#25687, #25717) + +Extractors +* [rtlnl] Extend URL regular expression (#26549, #25821) +* [youtube] Fix empty description extraction (#26575, #26006) +* [srgssr] Extend URL regular expression (#26555, #26556, #26578) +* [googledrive] Use redirect URLs for source format (#18877, #23919, #24689, + #26565) +* [svtplay] Fix id extraction (#26576) +* [redbulltv] Improve support for rebull.com TV localized URLs (#22063) ++ [redbulltv] Add support for new redbull.com TV URLs (#22037, #22063) +* [soundcloud:pagedplaylist] Reduce pagination limit (#26557) + + version 2020.09.06 Core From e8c5d40bc840b9e3ec8b4e8070291a64a4fa75b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 14 Sep 2020 03:37:36 +0700 Subject: [PATCH 29/41] release 2020.09.14 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 8 files changed, 16 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index f05aa66e6..352263789 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2020.09.06** +- [ ] I've verified that I'm running youtube-dl version **2020.09.14** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.09.06 + [debug] youtube-dl version 2020.09.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 29beaf437..fa6509be3 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2020.09.06** +- [ ] I've verified that I'm running youtube-dl version **2020.09.14** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index f96b8d2bb..70b0f2f19 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2020.09.06** +- [ ] I've verified that I'm running youtube-dl version **2020.09.14** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 3a175aa4d..ec17e4a33 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2020.09.06** +- [ ] I've verified that I'm running youtube-dl version **2020.09.14** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.09.06 + [debug] youtube-dl version 2020.09.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 4977079de..6ac963206 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2020.09.06** +- [ ] I've verified that I'm running youtube-dl version **2020.09.14** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 041cf7113..4143ec2fb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2020.09.14 Core + [postprocessor/embedthumbnail] Add support for non jpg/png thumbnails diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 5c4e1d58c..367545a96 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -717,6 +717,8 @@ - **RayWenderlichCourse** - **RBMARadio** - **RDS**: RDS.ca + - **RedBull** + - **RedBullEmbed** - **RedBullTV** - **RedBullTVRrnContent** - **Reddit** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 45b4d3291..5625b8324 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.09.06' +__version__ = '2020.09.14' From 86b7c00adca578b36138b165b0add5978972917e Mon Sep 17 00:00:00 2001 From: Ori Avtalion Date: Thu, 17 Sep 2020 23:15:44 +0300 Subject: [PATCH 30/41] [downloader/http] Retry download when urlopen times out (#26603) (refs #10935) --- youtube_dl/downloader/http.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 5046878df..e14ddce58 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -106,7 +106,12 @@ class HttpFD(FileDownloader): set_range(request, range_start, range_end) # Establish connection try: - ctx.data = self.ydl.urlopen(request) + try: + ctx.data = self.ydl.urlopen(request) + except (compat_urllib_error.URLError, ) as err: + if isinstance(err.reason, socket.timeout): + raise RetryDownload(err) + raise err # When trying to resume, Content-Range HTTP header of response has to be checked # to match the value of requested Range HTTP header. This is due to a webservers # that don't support resuming and serve a whole file with no Content-Range From cdc55e666f3f9c795ed74c478c6a249d992cf93f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Sep 2020 03:32:54 +0700 Subject: [PATCH 31/41] [downloader/http] Improve timeout detection when reading block of data (refs #10935) --- youtube_dl/downloader/http.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index e14ddce58..6ef26548d 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -238,9 +238,11 @@ class HttpFD(FileDownloader): except socket.timeout as e: retry(e) except socket.error as e: - if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT): - raise - retry(e) + # SSLError on python 2 (inherits socket.error) may have + # no errno set but this error message + if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message') == 'The read operation timed out': + retry(e) + raise byte_counter += len(data_block) From f8c7bed133fe729a9e26f23e5685559e3fa12eb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Sep 2020 03:41:16 +0700 Subject: [PATCH 32/41] [extractor/common] Handle ssl.CertificateError in _request_webpage (closes #26601) ssl.CertificateError is raised on some python versions <= 3.7.x --- youtube_dl/extractor/common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a61753b17..f740ddad1 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -10,6 +10,7 @@ import os import random import re import socket +import ssl import sys import time import math @@ -623,9 +624,12 @@ class InfoExtractor(object): url_or_request = update_url_query(url_or_request, query) if data is not None or headers: url_or_request = sanitized_Request(url_or_request, data, headers) + exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error] + if hasattr(ssl, 'CertificateError'): + exceptions.append(ssl.CertificateError) try: return self._downloader.urlopen(url_or_request) - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + except tuple(exceptions) as err: if isinstance(err, compat_urllib_error.HTTPError): if self.__can_accept_status_code(err, expected_status): # Retain reference to error to prevent file object from From 6e65a2a67e075ae8f3e4fe03c732d7772d36f5e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20P=C3=B6schel?= Date: Fri, 18 Sep 2020 00:26:56 +0200 Subject: [PATCH 33/41] [downloader/hls] Fix incorrect end byte in Range HTTP header for media segments with EXT-X-BYTERANGE (#24512) (closes #14748) The end of the byte range is the first byte that is NOT part of the to be downloaded range. So don't include it into the requested HTTP download range, as this additional byte leads to a broken TS packet and subsequently to e.g. visible video corruption. Fixes #14748. --- youtube_dl/downloader/hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 84bc34928..0f2c06f40 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -141,7 +141,7 @@ class HlsFD(FragmentFD): count = 0 headers = info_dict.get('http_headers', {}) if byte_range: - headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end']) + headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) while count <= fragment_retries: try: success, frag_content = self._download_fragment( From 540b9f5164d50eb99d9c988ece6eb6775ccaf94a Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Fri, 18 Sep 2020 18:59:19 -0400 Subject: [PATCH 34/41] [pornhub] Fix view count extraction (#26621) (refs #26614) --- youtube_dl/extractor/pornhub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 3567a3283..c64c870dc 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -341,7 +341,7 @@ class PornHubIE(PornHubBaseIE): webpage, 'uploader', fatal=False) view_count = self._extract_count( - r'([\d,\.]+) views', webpage, 'view') + r'([\d,\.]+) [Vv]iews', webpage, 'view') like_count = self._extract_count( r'([\d,\.]+)', webpage, 'like') dislike_count = self._extract_count( From ad06b99dd47acc8b1bd213f079e1f36da9e3a73d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Sep 2020 06:13:42 +0700 Subject: [PATCH 35/41] [extractor/common] Extract author as uploader for VideoObject in _json_ld --- youtube_dl/extractor/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f740ddad1..c9b8b6337 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1268,6 +1268,7 @@ class InfoExtractor(object): 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')), 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), + 'uploader': str_or_none(e.get('author')), 'filesize': float_or_none(e.get('contentSize')), 'tbr': int_or_none(e.get('bitrate')), 'width': int_or_none(e.get('width')), From ce5b904050b4610bac6d99673bbe9181a3af3db5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Sep 2020 06:33:17 +0700 Subject: [PATCH 36/41] [extractor/common] Relax interaction count extraction in _json_ld --- youtube_dl/extractor/common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c9b8b6337..021945a89 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -68,6 +68,7 @@ from ..utils import ( sanitized_Request, sanitize_filename, str_or_none, + str_to_int, strip_or_none, unescapeHTML, unified_strdate, @@ -1248,7 +1249,10 @@ class InfoExtractor(object): interaction_type = is_e.get('interactionType') if not isinstance(interaction_type, compat_str): continue - interaction_count = int_or_none(is_e.get('userInteractionCount')) + # For interaction count some sites provide string instead of + # an integer (as per spec) with non digit characters (e.g. ",") + # so extracting count with more relaxed str_to_int + interaction_count = str_to_int(is_e.get('userInteractionCount')) if interaction_count is None: continue count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1]) From cd85a1bb8b24eaf6a421a32a985d4c3ad4f80597 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Sep 2020 06:34:34 +0700 Subject: [PATCH 37/41] [pornhub] Extract metadata from JSON-LD (closes #26614) --- youtube_dl/extractor/pornhub.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index c64c870dc..529f3f711 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -17,6 +17,7 @@ from ..utils import ( determine_ext, ExtractorError, int_or_none, + merge_dicts, NO_DEFAULT, orderedSet, remove_quotes, @@ -59,13 +60,14 @@ class PornHubIE(PornHubBaseIE): ''' _TESTS = [{ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', - 'md5': '1e19b41231a02eba417839222ac9d58e', + 'md5': 'a6391306d050e4547f62b3f485dd9ba9', 'info_dict': { 'id': '648719015', 'ext': 'mp4', 'title': 'Seductive Indian beauty strips down and fingers her pink pussy', 'uploader': 'Babes', 'upload_date': '20130628', + 'timestamp': 1372447216, 'duration': 361, 'view_count': int, 'like_count': int, @@ -82,8 +84,8 @@ class PornHubIE(PornHubBaseIE): 'id': '1331683002', 'ext': 'mp4', 'title': '重庆婷婷女王足交', - 'uploader': 'Unknown', 'upload_date': '20150213', + 'timestamp': 1423804862, 'duration': 1753, 'view_count': int, 'like_count': int, @@ -121,6 +123,7 @@ class PornHubIE(PornHubBaseIE): 'params': { 'skip_download': True, }, + 'skip': 'This video has been disabled', }, { 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'only_matching': True, @@ -338,7 +341,7 @@ class PornHubIE(PornHubBaseIE): video_uploader = self._html_search_regex( r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', - webpage, 'uploader', fatal=False) + webpage, 'uploader', default=None) view_count = self._extract_count( r'([\d,\.]+) [Vv]iews', webpage, 'view') @@ -356,7 +359,11 @@ class PornHubIE(PornHubBaseIE): if div: return re.findall(r']+\bhref=[^>]+>([^<]+)', div) - return { + info = self._search_json_ld(webpage, video_id, default={}) + # description provided in JSON-LD is irrelevant + info['description'] = None + + return merge_dicts({ 'id': video_id, 'uploader': video_uploader, 'upload_date': upload_date, @@ -372,7 +379,7 @@ class PornHubIE(PornHubBaseIE): 'tags': extract_list('tags'), 'categories': extract_list('categories'), 'subtitles': subtitles, - } + }, info) class PornHubPlaylistBaseIE(PornHubBaseIE): From b856b3997ce2978f7a8c386b4ce4840fd221c45a Mon Sep 17 00:00:00 2001 From: Patrick Dessalle Date: Wed, 28 Aug 2019 19:04:57 +0200 Subject: [PATCH 38/41] [telequebec] Add support for brightcove videos (closes #25833) --- youtube_dl/extractor/telequebec.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py index c82c94b3a..3adea7bc5 100644 --- a/youtube_dl/extractor/telequebec.py +++ b/youtube_dl/extractor/telequebec.py @@ -12,6 +12,8 @@ from ..utils import ( class TeleQuebecBaseIE(InfoExtractor): + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6150020952001/default_default/index.html?videoId=%s' + @staticmethod def _limelight_result(media_id): return { @@ -21,6 +23,13 @@ class TeleQuebecBaseIE(InfoExtractor): 'ie_key': 'LimelightMedia', } + def _brightcove_result(self, brightcove_id): + return self.url_result( + smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': ['CA']}), + 'BrightcoveNew', brightcove_id) + class TeleQuebecIE(TeleQuebecBaseIE): _VALID_URL = r'''(?x) @@ -37,7 +46,7 @@ class TeleQuebecIE(TeleQuebecBaseIE): 'id': '577116881b4b439084e6b1cf4ef8b1b3', 'ext': 'mp4', 'title': 'Un petit choc et puis repart!', - 'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374', + 'description': 'md5:067bc84bd6afecad85e69d1000730907', }, 'params': { 'skip_download': True, @@ -58,7 +67,10 @@ class TeleQuebecIE(TeleQuebecBaseIE): 'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id, media_id)['media'] - info = self._limelight_result(media_data['streamInfo']['sourceId']) + if media_data['streamInfo']['source'] == 'Brightcove': + info = self._brightcove_result(media_data['streamInfo']['sourceId']) + elif media_data['streamInfo']['source'] == 'Limelight': + info = self._limelight_result(media_data['streamInfo']['sourceId']) info.update({ 'title': media_data.get('title'), 'description': try_get( From 82ef02e936a0e2ca698048c8cd79273a22e79867 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Sep 2020 07:52:42 +0700 Subject: [PATCH 39/41] [telequebec] Fix issues (closes #26368) --- youtube_dl/extractor/telequebec.py | 55 +++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py index 3adea7bc5..b4c485b9b 100644 --- a/youtube_dl/extractor/telequebec.py +++ b/youtube_dl/extractor/telequebec.py @@ -12,23 +12,24 @@ from ..utils import ( class TeleQuebecBaseIE(InfoExtractor): - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6150020952001/default_default/index.html?videoId=%s' + @staticmethod + def _result(url, ie_key): + return { + '_type': 'url_transparent', + 'url': smuggle_url(url, {'geo_countries': ['CA']}), + 'ie_key': ie_key, + } @staticmethod def _limelight_result(media_id): - return { - '_type': 'url_transparent', - 'url': smuggle_url( - 'limelight:media:' + media_id, {'geo_countries': ['CA']}), - 'ie_key': 'LimelightMedia', - } + return TeleQuebecBaseIE._result( + 'limelight:media:' + media_id, 'LimelightMedia') - def _brightcove_result(self, brightcove_id): - return self.url_result( - smuggle_url( - self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, - {'geo_countries': ['CA']}), - 'BrightcoveNew', brightcove_id) + @staticmethod + def _brightcove_result(brightcove_id): + return TeleQuebecBaseIE._result( + 'http://players.brightcove.net/6150020952001/default_default/index.html?videoId=%s' + % brightcove_id, 'BrightcoveNew') class TeleQuebecIE(TeleQuebecBaseIE): @@ -51,6 +52,22 @@ class TeleQuebecIE(TeleQuebecBaseIE): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://zonevideo.telequebec.tv/media/55267/le-soleil/passe-partout', + 'info_dict': { + 'id': '6167180337001', + 'ext': 'mp4', + 'title': 'Le soleil', + 'description': 'md5:64289c922a8de2abbe99c354daffde02', + 'uploader_id': '6150020952001', + 'upload_date': '20200625', + 'timestamp': 1593090307, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + 'add_ie': ['BrightcoveNew'], }, { # no description 'url': 'http://zonevideo.telequebec.tv/media/30261', @@ -67,10 +84,14 @@ class TeleQuebecIE(TeleQuebecBaseIE): 'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id, media_id)['media'] - if media_data['streamInfo']['source'] == 'Brightcove': - info = self._brightcove_result(media_data['streamInfo']['sourceId']) - elif media_data['streamInfo']['source'] == 'Limelight': - info = self._limelight_result(media_data['streamInfo']['sourceId']) + source_id = media_data['streamInfo']['sourceId'] + source = (try_get( + media_data, lambda x: x['streamInfo']['source'], + compat_str) or 'limelight').lower() + if source == 'brightcove': + info = self._brightcove_result(source_id) + else: + info = self._limelight_result(source_id) info.update({ 'title': media_data.get('title'), 'description': try_get( From defc820b70e3f0131fad450fc1e673e18b00a625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Sep 2020 10:05:00 +0700 Subject: [PATCH 40/41] [twitch] Switch streams to GraphQL and refactor (closes #26535) --- youtube_dl/extractor/twitch.py | 142 +++++++++++++++++++-------------- 1 file changed, 81 insertions(+), 61 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index eadc48c6d..ab6654432 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -24,7 +24,6 @@ from ..utils import ( parse_duration, parse_iso8601, qualities, - str_or_none, try_get, unified_timestamp, update_url_query, @@ -337,19 +336,27 @@ def _make_video_result(node): class TwitchGraphQLBaseIE(TwitchBaseIE): _PAGE_LIMIT = 100 - def _download_gql(self, video_id, op, variables, sha256_hash, note, fatal=True): + _OPERATION_HASHES = { + 'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14', + 'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb', + 'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777', + 'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84', + 'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e', + 'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01', + 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c', + } + + def _download_gql(self, video_id, ops, note, fatal=True): + for op in ops: + op['extensions'] = { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': self._OPERATION_HASHES[op['operationName']], + } + } return self._download_json( 'https://gql.twitch.tv/gql', video_id, note, - data=json.dumps({ - 'operationName': op, - 'variables': variables, - 'extensions': { - 'persistedQuery': { - 'version': 1, - 'sha256Hash': sha256_hash, - } - } - }).encode(), + data=json.dumps(ops).encode(), headers={ 'Content-Type': 'text/plain;charset=UTF-8', 'Client-ID': self._CLIENT_ID, @@ -369,14 +376,15 @@ class TwitchCollectionIE(TwitchGraphQLBaseIE): }] _OPERATION_NAME = 'CollectionSideBar' - _SHA256_HASH = '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14' def _real_extract(self, url): collection_id = self._match_id(url) collection = self._download_gql( - collection_id, self._OPERATION_NAME, - {'collectionID': collection_id}, self._SHA256_HASH, - 'Downloading collection GraphQL')['data']['collection'] + collection_id, [{ + 'operationName': self._OPERATION_NAME, + 'variables': {'collectionID': collection_id}, + }], + 'Downloading collection GraphQL')[0]['data']['collection'] title = collection.get('title') entries = [] for edge in collection['items']['edges']: @@ -403,14 +411,16 @@ class TwitchPlaylistBaseIE(TwitchGraphQLBaseIE): if cursor: variables['cursor'] = cursor page = self._download_gql( - channel_name, self._OPERATION_NAME, variables, - self._SHA256_HASH, + channel_name, [{ + 'operationName': self._OPERATION_NAME, + 'variables': variables, + }], 'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num), fatal=False) if not page: break edges = try_get( - page, lambda x: x['data']['user'][entries_key]['edges'], list) + page, lambda x: x[0]['data']['user'][entries_key]['edges'], list) if not edges: break for edge in edges: @@ -553,7 +563,6 @@ class TwitchVideosIE(TwitchPlaylistBaseIE): 'views': 'Popular', } - _SHA256_HASH = 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb' _OPERATION_NAME = 'FilterableVideoTower_Videos' _ENTRY_KIND = 'video' _EDGE_KIND = 'VideoEdge' @@ -622,7 +631,6 @@ class TwitchVideosClipsIE(TwitchPlaylistBaseIE): # NB: values other than 20 result in skipped videos _PAGE_LIMIT = 20 - _SHA256_HASH = 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777' _OPERATION_NAME = 'ClipsCards__User' _ENTRY_KIND = 'clip' _EDGE_KIND = 'ClipEdge' @@ -680,7 +688,6 @@ class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE): 'playlist_mincount': 3, }] - _SHA256_HASH = '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84' _OPERATION_NAME = 'ChannelCollectionsContent' _ENTRY_KIND = 'collection' _EDGE_KIND = 'CollectionsItemEdge' @@ -717,7 +724,7 @@ class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE): playlist_title='%s - Collections' % channel_name) -class TwitchStreamIE(TwitchBaseIE): +class TwitchStreamIE(TwitchGraphQLBaseIE): IE_NAME = 'twitch:stream' _VALID_URL = r'''(?x) https?:// @@ -774,28 +781,43 @@ class TwitchStreamIE(TwitchBaseIE): else super(TwitchStreamIE, cls).suitable(url)) def _real_extract(self, url): - channel_name = self._match_id(url) + channel_name = self._match_id(url).lower() - access_token = self._download_access_token(channel_name) + gql = self._download_gql( + channel_name, [{ + 'operationName': 'StreamMetadata', + 'variables': {'channelLogin': channel_name}, + }, { + 'operationName': 'ComscoreStreamingQuery', + 'variables': { + 'channel': channel_name, + 'clipSlug': '', + 'isClip': False, + 'isLive': True, + 'isVodOrCollection': False, + 'vodID': '', + }, + }, { + 'operationName': 'VideoPreviewOverlay', + 'variables': {'login': channel_name}, + }], + 'Downloading stream GraphQL') - token = access_token['token'] - channel_id = self._extract_channel_id(token, channel_name) + user = gql[0]['data']['user'] - stream = self._call_api( - 'kraken/streams/%s?stream_type=all' % channel_id, - channel_id, 'Downloading stream JSON').get('stream') + if not user: + raise ExtractorError( + '%s does not exist' % channel_name, expected=True) + + stream = user['stream'] if not stream: - raise ExtractorError('%s is offline' % channel_id, expected=True) + raise ExtractorError('%s is offline' % channel_name, expected=True) - # Channel name may be typed if different case than the original channel name - # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing - # an invalid m3u8 URL. Working around by use of original channel name from stream - # JSON and fallback to lowercase if it's not available. - channel_name = try_get( - stream, lambda x: x['channel']['name'], - compat_str) or channel_name.lower() + access_token = self._download_access_token(channel_name) + token = access_token['token'] + stream_id = stream.get('id') or channel_name query = { 'allow_source': 'true', 'allow_audio_only': 'true', @@ -808,41 +830,39 @@ class TwitchStreamIE(TwitchBaseIE): 'token': token.encode('utf-8'), } formats = self._extract_m3u8_formats( - '%s/api/channel/hls/%s.m3u8?%s' - % (self._USHER_BASE, channel_name, compat_urllib_parse_urlencode(query)), - channel_id, 'mp4') + '%s/api/channel/hls/%s.m3u8' % (self._USHER_BASE, channel_name), + stream_id, 'mp4', query=query) self._prefer_source(formats) view_count = stream.get('viewers') - timestamp = parse_iso8601(stream.get('created_at')) + timestamp = unified_timestamp(stream.get('createdAt')) - channel = stream['channel'] - title = self._live_title(channel.get('display_name') or channel.get('name')) - description = channel.get('status') + sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {} + uploader = sq_user.get('displayName') + description = try_get( + sq_user, lambda x: x['broadcastSettings']['title'], compat_str) - thumbnails = [] - for thumbnail_key, thumbnail_url in stream['preview'].items(): - m = re.search(r'(?P\d+)x(?P\d+)\.jpg$', thumbnail_key) - if not m: - continue - thumbnails.append({ - 'url': thumbnail_url, - 'width': int(m.group('width')), - 'height': int(m.group('height')), - }) + thumbnail = url_or_none(try_get( + gql, lambda x: x[2]['data']['user']['stream']['previewImageURL'], + compat_str)) + + title = uploader or channel_name + stream_type = stream.get('type') + if stream_type in ['rerun', 'live']: + title += ' (%s)' % stream_type return { - 'id': str_or_none(stream.get('_id')) or channel_id, + 'id': stream_id, 'display_id': channel_name, - 'title': title, + 'title': self._live_title(title), 'description': description, - 'thumbnails': thumbnails, - 'uploader': channel.get('display_name'), - 'uploader_id': channel.get('name'), + 'thumbnail': thumbnail, + 'uploader': uploader, + 'uploader_id': channel_name, 'timestamp': timestamp, 'view_count': view_count, 'formats': formats, - 'is_live': True, + 'is_live': stream_type == 'live', } From 1ca5f821c8708720d5897b48a8c8d9e3d8822f93 Mon Sep 17 00:00:00 2001 From: nixxo Date: Sun, 20 Sep 2020 06:39:42 +0200 Subject: [PATCH 41/41] [redtube] Extend _VALID_URL (#26506) --- youtube_dl/extractor/redtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 2d2f6a98c..a1ca791ca 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -15,7 +15,7 @@ from ..utils import ( class RedTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P[0-9]+)' + _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P[0-9]+)' _TESTS = [{ 'url': 'http://www.redtube.com/66418', 'md5': 'fc08071233725f26b8f014dba9590005', @@ -31,6 +31,9 @@ class RedTubeIE(InfoExtractor): }, { 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286', 'only_matching': True, + }, { + 'url': 'http://it.redtube.com/66418', + 'only_matching': True, }] @staticmethod