From 79643316da5b37041b1e47e8907fd422cfb2a3bb Mon Sep 17 00:00:00 2001 From: charlie Date: Sat, 5 Dec 2020 22:31:18 +0100 Subject: [PATCH 1/3] [ukdevilz] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/ukdevilz.py | 64 ++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 youtube_dl/extractor/ukdevilz.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index eb5a75f30..7ca363d6e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1285,6 +1285,7 @@ from .ufctv import ( UFCTVIE, UFCArabiaIE, ) +from .ukdevilz import UKDevilzIE from .uktvplay import UKTVPlayIE from .digiteka import DigitekaIE from .dlive import ( diff --git a/youtube_dl/extractor/ukdevilz.py b/youtube_dl/extractor/ukdevilz.py new file mode 100644 index 000000000..9fd2b1ff7 --- /dev/null +++ b/youtube_dl/extractor/ukdevilz.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, +) +from ..compat import compat_urlparse + + +class UKDevilzIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ukdevilz\.com/watch/-(?P[\d]*_[\d]*)' + IE_DESC = 'UKDEVILZ' + _TEST = { + 'url': 'https://ukdevilz.com/watch/-160418850_456239050', + 'md5': 'fe608143263af08b0160932561ed1a8a', + 'info_dict': { + 'id': '160418850_456239050', + 'ext': 'mp4', + 'title': 'Twix hot beverage', + 'description': 'md5:7c941f7c8ae9c83d06a6cea1722ae859', + 'thumbnail': r're:^https?://.*\.jpg$' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + iframe_url = self._search_regex(r']+src=(["\'])(?P(?:https?:)?//ukdevilz\.com/player/[^"\']+)', webpage, 'iFrameUrl', group='iFrameUrl') + iframe = self._download_webpage(iframe_url, video_id) + + playlist_url = compat_urlparse.urljoin(url, self._search_regex(r'window.playlistUrl\s*=\s*["\'](?P[^"\']+)', iframe, 'playlistUrl')) + playlist = self._download_json(playlist_url, video_id) + + # playlist has no info about the width and file extension of the HLS stream + # the HLS stream seems to always be the highest quality of the other streams, so just use that + max_height = 0 + ext = '' + for source in playlist.get('sources'): + if source.get('label') and int(source.get('label')) > max_height: + max_height = int_or_none(source.get('label')) + ext = source.get('type') + + formats = [] + for source in playlist.get('sources'): + formats.append({ + 'url': source.get('file') if not source.get('file').startswith('/') else compat_urlparse.urljoin(url, source.get('file')), + 'ext': ext if source.get('type') == 'hls' else source.get('type'), + 'protocol': 'm3u8' if source.get('type') == 'hls' else compat_urlparse.urlparse(source.get('file')).scheme, + 'height': int_or_none(source.get('label') or max_height) + }) + + description = (self._search_regex(r'(?s)]+\bclass=["\']description["\'][^>]*>(.+?)', webpage, 'description', default='', fatal=False) + or self._og_search_description(webpage)) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'description': description, + 'formats': formats, + 'thumbnail': self._og_search_thumbnail(webpage), + 'tags': self._html_search_meta('keywords', webpage) + } From 77bf61e04f12826e1fb9bf721ca13227c9d4bfd8 Mon Sep 17 00:00:00 2001 From: w1ldg00se <50177968+w1ldg00se@users.noreply.github.com> Date: Sat, 5 Dec 2020 23:48:19 +0100 Subject: [PATCH 2/3] fix _VALID_URL regex some videos don't have a '-' in front of the video id --- youtube_dl/extractor/ukdevilz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ukdevilz.py b/youtube_dl/extractor/ukdevilz.py index 9fd2b1ff7..e3b5e0559 100644 --- a/youtube_dl/extractor/ukdevilz.py +++ b/youtube_dl/extractor/ukdevilz.py @@ -9,7 +9,7 @@ from ..compat import compat_urlparse class UKDevilzIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ukdevilz\.com/watch/-(?P[\d]*_[\d]*)' + _VALID_URL = r'https?://(?:www\.)?ukdevilz\.com/watch/(-)?(?P[\d]*_[\d]*)' IE_DESC = 'UKDEVILZ' _TEST = { 'url': 'https://ukdevilz.com/watch/-160418850_456239050', From 02f91e1d695d16e7ce6e1811dad3b485cafa5ab0 Mon Sep 17 00:00:00 2001 From: w1ldg00se <50177968+w1ldg00se@users.noreply.github.com> Date: Sun, 6 Dec 2020 02:01:15 +0100 Subject: [PATCH 3/3] fix sorting of formats rename height to width, add self._sort_formats to automatically download highest quality --- youtube_dl/extractor/ukdevilz.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ukdevilz.py b/youtube_dl/extractor/ukdevilz.py index e3b5e0559..9daca255b 100644 --- a/youtube_dl/extractor/ukdevilz.py +++ b/youtube_dl/extractor/ukdevilz.py @@ -35,11 +35,11 @@ class UKDevilzIE(InfoExtractor): # playlist has no info about the width and file extension of the HLS stream # the HLS stream seems to always be the highest quality of the other streams, so just use that - max_height = 0 + max_width = 0 ext = '' for source in playlist.get('sources'): - if source.get('label') and int(source.get('label')) > max_height: - max_height = int_or_none(source.get('label')) + if source.get('label') and int(source.get('label')) > max_width: + max_width = int_or_none(source.get('label')) ext = source.get('type') formats = [] @@ -48,8 +48,9 @@ class UKDevilzIE(InfoExtractor): 'url': source.get('file') if not source.get('file').startswith('/') else compat_urlparse.urljoin(url, source.get('file')), 'ext': ext if source.get('type') == 'hls' else source.get('type'), 'protocol': 'm3u8' if source.get('type') == 'hls' else compat_urlparse.urlparse(source.get('file')).scheme, - 'height': int_or_none(source.get('label') or max_height) + 'width': int_or_none(source.get('label') or max_width) }) + self._sort_formats(formats) description = (self._search_regex(r'(?s)]+\bclass=["\']description["\'][^>]*>(.+?)', webpage, 'description', default='', fatal=False) or self._og_search_description(webpage))