From 847a6464a8a1c3b6848d3cdb26f19c4cf79dd10e Mon Sep 17 00:00:00 2001 From: schnusch Date: Sat, 24 Apr 2021 22:10:05 +0200 Subject: [PATCH] [doodstream] fix and add more metadata * metadata * fetch and decrypt video title * implemented doodExe * add filesize and duration * removed description * use _og_search_thumbnail * remove User-Agent from HTTP headers because it is not needed --- youtube_dl/extractor/doodstream.py | 60 +++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/doodstream.py b/youtube_dl/extractor/doodstream.py index ddddd717a..cfc5ebafd 100644 --- a/youtube_dl/extractor/doodstream.py +++ b/youtube_dl/extractor/doodstream.py @@ -7,10 +7,28 @@ import time from .common import InfoExtractor from ..utils import ( + js_to_json, urljoin, ) +def doodExe(crp, crs): + if crp == 'N_crp': + return crs + sorted_crp = ''.join(sorted(crp)) + result = '' + for c in crs: + i = crp.find(c) + if i >= 0: + result += sorted_crp[i] + result = result.replace('+.+', '(') + result = result.replace('+..+', ')') + result = result.replace('+-+', '[') + result = result.replace('+--+', ']') + result = result.replace('+', ' ') + return result + + class DoodStreamIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:doodstream\.com|dood\.(?:so|to|watch))/[de]/(?P[^/?#]+)' _TESTS = [{ @@ -21,6 +39,8 @@ class DoodStreamIE(InfoExtractor): 'ext': 'mp4', 'title': 'Big Buck Bunny Trailer', 'thumbnail': r're:^https?://.*\.jpg$', + 'filesize': 4447915, + 'duration': 33, }, }] @@ -28,32 +48,44 @@ class DoodStreamIE(InfoExtractor): video_id = self._match_id(url) url = urljoin(url, '/e/' + video_id) + referer = {'Referer': url} webpage = self._download_webpage(url, video_id) - title = self._html_search_meta(['og:title', 'twitter:title'], - webpage, default=None) - thumb = self._html_search_meta(['og:image', 'twitter:image'], - webpage, default=None) + metadata_url = self._html_search_regex(r"('/cptr/[^']*')", webpage, + 'video metadata') + metadata_url = self._parse_json(metadata_url, video_id, + transform_source=js_to_json) + metadata_url = urljoin(url, metadata_url) + metadata = self._download_json(metadata_url, video_id, headers=referer) + + thumb = self._og_search_thumbnail(webpage) + try: + filesize = int(doodExe(**metadata['siz']), 10) + except (KeyError, ValueError): + filesize = None + try: + duration = int(doodExe(**metadata['len']), 10) + except (KeyError, ValueError): + duration = None + try: + title = doodExe(**metadata['ttl']) + except KeyError: + title = video_id + token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token') - description = self._html_search_meta( - ['og:description', 'description', 'twitter:description'], - webpage, default=None) auth_url = 'https://dood.to' + self._html_search_regex( r'(/pass_md5.*?)\'', webpage, 'pass_md5') - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0', - 'referer': url - } - webpage = self._download_webpage(auth_url, video_id, headers=headers) + webpage = self._download_webpage(auth_url, video_id, headers=referer) final_url = webpage + ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(10)) + "?token=" + token + "&expiry=" + str(int(time.time() * 1000)) return { 'id': video_id, 'title': title, 'url': final_url, - 'http_headers': headers, + 'http_headers': referer, 'ext': 'mp4', - 'description': description, 'thumbnail': thumb, + 'filesize': filesize, + 'duration': duration, }