From 1980ff4550a3f040fbc1e054d6b91013e9d8cb96 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 26 May 2021 11:04:39 +0100 Subject: [PATCH 1/7] [vimeo] fix vimeo pro embed extraction(closes #29126) --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 102687b82..0b386f450 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -647,7 +647,7 @@ class VimeoIE(VimeoBaseInfoExtractor): expected=True) raise - if '://player.vimeo.com/video/' in url: + if '//player.vimeo.com/video/' in url: config = self._parse_json(self._search_regex( r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id) if config.get('view') == 4: From 24297a42efc52862cb9510d32b28efd7faf49af6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 May 2021 00:36:26 +0700 Subject: [PATCH 2/7] [youtube] Fix get_video_info request (closes #29086, closes #29165) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0c52e5a8b..bf858c39d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1499,6 +1499,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'unable to download video info webpage', query={ 'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id, + 'html5': 1, }, fatal=False)), lambda x: x['player_response'][0], compat_str) or '{}', video_id) From e13a01061d149f4fac7db1a50124c4745a11c16e Mon Sep 17 00:00:00 2001 From: phlip Date: Fri, 28 May 2021 11:01:59 +1000 Subject: [PATCH 3/7] [twitch:clips] Add access token query to download URLs (closes #29136) --- youtube_dl/extractor/twitch.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index a7867f4d3..7f9738d43 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -49,6 +49,7 @@ class TwitchBaseIE(InfoExtractor): 'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84', 'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e', 'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01', + 'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11', 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c', 'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687', } @@ -924,6 +925,17 @@ class TwitchClipsIE(TwitchBaseIE): raise ExtractorError( 'This clip is no longer available', expected=True) + access_token = self._download_gql( + video_id, [{ + 'operationName': 'VideoAccessToken_Clip', + 'variables': { + 'slug': video_id, + }, + }], + 'Downloading access token GraphQL') + access_token = try_get( + access_token, lambda x: x[0]['data']['clip']['playbackAccessToken']) + formats = [] for option in clip.get('videoQualities', []): if not isinstance(option, dict): @@ -931,6 +943,14 @@ class TwitchClipsIE(TwitchBaseIE): source = url_or_none(option.get('sourceURL')) if not source: continue + if access_token: + source = "%s%s%s" % ( + source, + "&" if "?" in source else "?", + compat_urllib_parse_urlencode({ + "sig": access_token.get('signature'), + "token": access_token.get('value'), + })) formats.append({ 'url': source, 'format_id': option.get('quality'), From f3cd1d9cec91943a459a0662cbcffe3b2e1f6675 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 May 2021 01:46:49 +0700 Subject: [PATCH 4/7] [twitch:clips] Improve extraction (closes #29149) --- youtube_dl/extractor/twitch.py | 48 ++++++++++++++++------------------ 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 7f9738d43..a378bd6dc 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -894,7 +894,25 @@ class TwitchClipsIE(TwitchBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - clip = self._download_base_gql( + clip = self._download_gql( + video_id, [{ + 'operationName': 'VideoAccessToken_Clip', + 'variables': { + 'slug': video_id, + }, + }], + 'Downloading clip access token GraphQL')[0]['data']['clip'] + + if not clip: + raise ExtractorError( + 'This clip is no longer available', expected=True) + + access_query = { + 'sig': clip['playbackAccessToken']['signature'], + 'token': clip['playbackAccessToken']['value'], + } + + data = self._download_base_gql( video_id, { 'query': '''{ clip(slug: "%s") { @@ -919,22 +937,10 @@ class TwitchClipsIE(TwitchBaseIE): } viewCount } -}''' % video_id}, 'Downloading clip GraphQL')['data']['clip'] +}''' % video_id}, 'Downloading clip GraphQL', fatal=False) - if not clip: - raise ExtractorError( - 'This clip is no longer available', expected=True) - - access_token = self._download_gql( - video_id, [{ - 'operationName': 'VideoAccessToken_Clip', - 'variables': { - 'slug': video_id, - }, - }], - 'Downloading access token GraphQL') - access_token = try_get( - access_token, lambda x: x[0]['data']['clip']['playbackAccessToken']) + if data: + clip = try_get(data, lambda x: x['data']['clip'], dict) or clip formats = [] for option in clip.get('videoQualities', []): @@ -943,16 +949,8 @@ class TwitchClipsIE(TwitchBaseIE): source = url_or_none(option.get('sourceURL')) if not source: continue - if access_token: - source = "%s%s%s" % ( - source, - "&" if "?" in source else "?", - compat_urllib_parse_urlencode({ - "sig": access_token.get('signature'), - "token": access_token.get('value'), - })) formats.append({ - 'url': source, + 'url': update_url_query(source, access_query), 'format_id': option.get('quality'), 'height': int_or_none(option.get('quality')), 'fps': int_or_none(option.get('frameRate')), From 6511b8e8d7db78d4ba3706df5122a74e1c9b9b57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 May 2021 03:05:22 +0700 Subject: [PATCH 5/7] [ted] Prefer own formats over external sources (closes #29142) --- youtube_dl/extractor/ted.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 63e2455b2..f09f1a3f9 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -123,6 +123,10 @@ class TEDIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + # with own formats and private Youtube external + 'url': 'https://www.ted.com/talks/spencer_wells_a_family_tree_for_humanity', + 'only_matching': True, }] _NATIVE_FORMATS = { @@ -210,16 +214,6 @@ class TEDIE(InfoExtractor): player_talk = talk_info['player_talks'][0] - external = player_talk.get('external') - if isinstance(external, dict): - service = external.get('service') - if isinstance(service, compat_str): - ext_url = None - if service.lower() == 'youtube': - ext_url = external.get('code') - - return self.url_result(ext_url or external['uri']) - resources_ = player_talk.get('resources') or talk_info.get('resources') http_url = None @@ -294,6 +288,16 @@ class TEDIE(InfoExtractor): 'vcodec': 'none', }) + if not formats: + external = player_talk.get('external') + if isinstance(external, dict): + service = external.get('service') + if isinstance(service, compat_str): + ext_url = None + if service.lower() == 'youtube': + ext_url = external.get('code') + return self.url_result(ext_url or external['uri']) + self._sort_formats(formats) video_id = compat_str(talk_info['id']) From 2ee6c7f11074917c08253af4c47f9258aa1e0dad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 May 2021 03:43:59 +0700 Subject: [PATCH 6/7] [ustream] Detect https embeds (closes #29133) --- youtube_dl/extractor/ustream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 9e860aeb7..1e29cbe22 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -75,7 +75,7 @@ class UstreamIE(InfoExtractor): @staticmethod def _extract_url(webpage): mobj = re.search( - r']+?src=(["\'])(?Phttp://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage) + r']+?src=(["\'])(?Phttps?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage) if mobj is not None: return mobj.group('url') From d495292852b6c2f1bd58bc2141ff2b0265c952cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 30 May 2021 06:14:59 +0700 Subject: [PATCH 7/7] [ard] Relax _VALID_URL and fix video ids (closes #22724, closes #29091) --- youtube_dl/extractor/ard.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index d57c5ba0f..d45a9fe52 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -249,14 +249,14 @@ class ARDMediathekIE(ARDMediathekBaseIE): class ARDIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P[^/?#]+)-(?:video-?)?(?P[0-9]+))\.html' + _VALID_URL = r'(?Phttps?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P[^/?#&]+))\.html' _TESTS = [{ # available till 7.01.2022 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html', 'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1', 'info_dict': { - 'display_id': 'maischberger-die-woche', - 'id': '100', + 'id': 'maischberger-die-woche-video100', + 'display_id': 'maischberger-die-woche-video100', 'ext': 'mp4', 'duration': 3687.0, 'title': 'maischberger. die woche vom 7. Januar 2021', @@ -264,16 +264,25 @@ class ARDIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', }, }, { - 'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html', + 'url': 'https://www.daserste.de/information/politik-weltgeschehen/morgenmagazin/videosextern/dominik-kahun-aus-der-nhl-direkt-zur-weltmeisterschaft-100.html', + 'only_matching': True, + }, { + 'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html', 'only_matching': True, }, { 'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html', 'only_matching': True, + }, { + 'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/Drehpause-100.html', + 'only_matching': True, + }, { + 'url': 'https://www.daserste.de/unterhaltung/film/filmmittwoch-im-ersten/videos/making-ofwendezeit-video-100.html', + 'only_matching': True, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') + display_id = mobj.group('id') player_url = mobj.group('mainurl') + '~playerXml.xml' doc = self._download_xml(player_url, display_id) @@ -324,7 +333,7 @@ class ARDIE(InfoExtractor): self._sort_formats(formats) return { - 'id': mobj.group('id'), + 'id': xpath_text(video_node, './videoId', default=display_id), 'formats': formats, 'display_id': display_id, 'title': video_node.find('./title').text,