From f0c3a223cd446525b5321df7a6e0800ee1f88721 Mon Sep 17 00:00:00 2001 From: Grabien Date: Sun, 8 Aug 2021 23:20:54 +0300 Subject: [PATCH 1/3] - rumble plugin direct links support --- youtube_dl/extractor/rumble.py | 75 +++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rumble.py b/youtube_dl/extractor/rumble.py index 4a0225109..1aac82abb 100644 --- a/youtube_dl/extractor/rumble.py +++ b/youtube_dl/extractor/rumble.py @@ -10,9 +10,11 @@ from ..utils import ( try_get, ) +import re + class RumbleEmbedIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P[0-9a-z]+)' + _VALID_URL = r'https?://(?:www\.)?rumble\.com/(?:embed/(?:[0-9a-z]+\.)?(?P[0-9a-z]+)|(.*?)\.html)' _TESTS = [{ 'url': 'https://rumble.com/embed/v5pv5f', 'md5': '36a18a049856720189f30977ccbb2c34', @@ -26,10 +28,79 @@ class RumbleEmbedIE(InfoExtractor): }, { 'url': 'https://rumble.com/embed/ufe9n.v5pv5f', 'only_matching': True, + }, { + 'url': 'https://rumble.com/vhlrar-mike-lindell-to-confront-brian-kemp-and-doug-ducey-over-election-fraud.html', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) + if re.match(r'https?://(?:www\.)?rumble\.com/(.*?)\.html', url): + video_id = "" + content, urlh = self._download_webpage_handle(url, video_id) + video_id = re.findall(r'"embedUrl":"https://rumble\.com/embed/(.*?)/"', content)[0] + else: + video_id = self._match_id(url) + video = self._download_json( + 'https://rumble.com/embedJS/', video_id, + query={'request': 'video', 'v': video_id}) + title = video['title'] + + formats = [] + for height, ua in (video.get('ua') or {}).items(): + for i in range(2): + f_url = try_get(ua, lambda x: x[i], compat_str) + if f_url: + ext = determine_ext(f_url) + f = { + 'ext': ext, + 'format_id': '%s-%sp' % (ext, height), + 'height': int_or_none(height), + 'url': f_url, + } + bitrate = try_get(ua, lambda x: x[i + 2]['bitrate']) + if bitrate: + f['tbr'] = int_or_none(bitrate) + formats.append(f) + self._sort_formats(formats) + + author = video.get('author') or {} + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': video.get('i'), + 'timestamp': parse_iso8601(video.get('pubDate')), + 'channel': author.get('name'), + 'channel_url': author.get('url'), + 'duration': int_or_none(video.get('duration')), + } + + +class RumbleRegularIE(RumbleEmbedIE): + _VALID_URL = r'https?://(?:www\.)?rumble\.com/(.*?)\.html' + _TESTS = [{ + 'url': 'https://rumble.com/vhlrar-mike-lindell-to-confront-brian-kemp-and-doug-ducey-over-election-fraud.html', + 'md5': '36a18a049856720189f30977ccbb2c34', + 'info_dict': { + 'id': 'v5pv5f', + 'ext': 'mp4', + 'title': 'WMAR 2 News Latest Headlines | October 20, 6pm', + 'timestamp': 1571611968, + 'upload_date': '20191020', + } + }, { + 'url': 'https://rumble.com/vhlrar-mike-lindell-to-confront-brian-kemp-and-doug-ducey-over-election-fraud.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = "" + content, urlh = self._download_webpage_handle(url, video_id) + video_id = re.findall(r'"embedUrl":"https://rumble\.com/embed/(.*?)/"', content)[0] + + # this is + #video_id = self._match_id(url) video = self._download_json( 'https://rumble.com/embedJS/', video_id, query={'request': 'video', 'v': video_id}) From b71c25645248047529469cba66ce54ea9a589334 Mon Sep 17 00:00:00 2001 From: Grabien Date: Mon, 9 Aug 2021 12:26:05 +0300 Subject: [PATCH 2/3] - Rumble plugin cleanup --- youtube_dl/extractor/rumble.py | 65 ++-------------------------------- 1 file changed, 2 insertions(+), 63 deletions(-) diff --git a/youtube_dl/extractor/rumble.py b/youtube_dl/extractor/rumble.py index 1aac82abb..ac3b3dfc7 100644 --- a/youtube_dl/extractor/rumble.py +++ b/youtube_dl/extractor/rumble.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_str from ..utils import ( @@ -10,8 +12,6 @@ from ..utils import ( try_get, ) -import re - class RumbleEmbedIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rumble\.com/(?:embed/(?:[0-9a-z]+\.)?(?P[0-9a-z]+)|(.*?)\.html)' @@ -75,64 +75,3 @@ class RumbleEmbedIE(InfoExtractor): 'channel_url': author.get('url'), 'duration': int_or_none(video.get('duration')), } - - -class RumbleRegularIE(RumbleEmbedIE): - _VALID_URL = r'https?://(?:www\.)?rumble\.com/(.*?)\.html' - _TESTS = [{ - 'url': 'https://rumble.com/vhlrar-mike-lindell-to-confront-brian-kemp-and-doug-ducey-over-election-fraud.html', - 'md5': '36a18a049856720189f30977ccbb2c34', - 'info_dict': { - 'id': 'v5pv5f', - 'ext': 'mp4', - 'title': 'WMAR 2 News Latest Headlines | October 20, 6pm', - 'timestamp': 1571611968, - 'upload_date': '20191020', - } - }, { - 'url': 'https://rumble.com/vhlrar-mike-lindell-to-confront-brian-kemp-and-doug-ducey-over-election-fraud.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = "" - content, urlh = self._download_webpage_handle(url, video_id) - video_id = re.findall(r'"embedUrl":"https://rumble\.com/embed/(.*?)/"', content)[0] - - # this is - #video_id = self._match_id(url) - video = self._download_json( - 'https://rumble.com/embedJS/', video_id, - query={'request': 'video', 'v': video_id}) - title = video['title'] - - formats = [] - for height, ua in (video.get('ua') or {}).items(): - for i in range(2): - f_url = try_get(ua, lambda x: x[i], compat_str) - if f_url: - ext = determine_ext(f_url) - f = { - 'ext': ext, - 'format_id': '%s-%sp' % (ext, height), - 'height': int_or_none(height), - 'url': f_url, - } - bitrate = try_get(ua, lambda x: x[i + 2]['bitrate']) - if bitrate: - f['tbr'] = int_or_none(bitrate) - formats.append(f) - self._sort_formats(formats) - - author = video.get('author') or {} - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': video.get('i'), - 'timestamp': parse_iso8601(video.get('pubDate')), - 'channel': author.get('name'), - 'channel_url': author.get('url'), - 'duration': int_or_none(video.get('duration')), - } From c848132084f49bcd2887ac601b53c4ae3eebee1d Mon Sep 17 00:00:00 2001 From: Grabien Date: Fri, 13 Aug 2021 19:44:01 +0300 Subject: [PATCH 3/3] Rumble plugin improvements --- youtube_dl/extractor/rumble.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/rumble.py b/youtube_dl/extractor/rumble.py index ac3b3dfc7..405c5a12d 100644 --- a/youtube_dl/extractor/rumble.py +++ b/youtube_dl/extractor/rumble.py @@ -14,7 +14,7 @@ from ..utils import ( class RumbleEmbedIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rumble\.com/(?:embed/(?:[0-9a-z]+\.)?(?P[0-9a-z]+)|(.*?)\.html)' + _VALID_URL = r'https?://(?:www\.)?rumble\.com/(?:embed/(?:[0-9a-z]+\.)?(?P[0-9a-z]+)|([0-9a-z]+)-(.*?)\.html)' _TESTS = [{ 'url': 'https://rumble.com/embed/v5pv5f', 'md5': '36a18a049856720189f30977ccbb2c34', @@ -34,10 +34,10 @@ class RumbleEmbedIE(InfoExtractor): }] def _real_extract(self, url): - if re.match(r'https?://(?:www\.)?rumble\.com/(.*?)\.html', url): - video_id = "" - content, urlh = self._download_webpage_handle(url, video_id) - video_id = re.findall(r'"embedUrl":"https://rumble\.com/embed/(.*?)/"', content)[0] + if re.match(r'https?://(?:www\.)?rumble\.com/([0-9a-z]+)-(.*?)\.html', url): + direct_video_id = re.findall(r'https?://(?:www\.)?rumble\.com/([0-9a-z]+)-(?:.*?)\.html', url)[0] + content = self._download_webpage(url, direct_video_id) + video_id = re.findall(r'"embedUrl"\s*:\s*"https://rumble\.com/embed/(.*?)/"', content)[0] else: video_id = self._match_id(url) video = self._download_json(