[doodstream] add new extractor

This commit is contained in:
sxvghd 2021-01-29 19:01:36 +01:00 committed by schnusch
parent a800838f5a
commit 2b488a9179
3 changed files with 74 additions and 0 deletions

View File

@ -0,0 +1,67 @@
# coding: utf-8
from __future__ import unicode_literals
import string
import random
import time
import re
from .common import InfoExtractor
class DoodStreamIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch|so)/[ed]/(?P<id>[a-z0-9]+)'
_TESTS = [{
'url': 'https://dood.to/d/wpyp2mgwi2kb',
'md5': '2aaf633bcd5fefb64b27344f55022bf9',
'info_dict': {
'id': 'wpyp2mgwi2kb',
'ext': 'mp4',
'title': 'Big Buck Bunny Trailer',
'thumbnail': r're:^https?://.*\.jpg$',
},
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?://)?dood\.(?:watch|to|so)/e/.+?)["\']',
webpage)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if '/d/' in url:
url = "https://dood.to" + self._html_search_regex(
r'<iframe src="(/e/[a-z0-9]+)"', webpage, 'embed')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_meta(['og:title', 'twitter:title'],
webpage, default=None)
thumb = self._html_search_meta(['og:image', 'twitter:image'],
webpage, default=None)
token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token')
description = self._html_search_meta(
['og:description', 'description', 'twitter:description'],
webpage, default=None)
auth_url = 'https://dood.to' + self._html_search_regex(
r'(/pass_md5.*?)\'', webpage, 'pass_md5')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0',
'referer': url
}
webpage = self._download_webpage(auth_url, video_id, headers=headers)
final_url = webpage + ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(10)) + "?token=" + token + "&expiry=" + str(int(time.time() * 1000))
return {
'id': video_id,
'title': title,
'url': final_url,
'http_headers': headers,
'ext': 'mp4',
'description': description,
'thumbnail': thumb,
}

View File

@ -308,6 +308,7 @@ from .discoverynetworks import DiscoveryNetworksDeIE
from .discoveryvr import DiscoveryVRIE from .discoveryvr import DiscoveryVRIE
from .disney import DisneyIE from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE from .dispeak import DigitallySpeakingIE
from .doodstream import DoodStreamIE
from .dropbox import DropboxIE from .dropbox import DropboxIE
from .dw import ( from .dw import (
DWIE, DWIE,

View File

@ -129,6 +129,7 @@ from .odnoklassniki import OdnoklassnikiIE
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
from .arcpublishing import ArcPublishingIE from .arcpublishing import ArcPublishingIE
from .medialaan import MedialaanIE from .medialaan import MedialaanIE
from .doodstream import DoodStreamIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -3299,6 +3300,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key()) foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
doodstream_urls = DoodStreamIE._extract_urls(webpage)
if doodstream_urls:
return self.playlist_from_matches(
doodstream_urls, video_id, video_title, ie=DoodStreamIE.ie_key())
sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer( sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer(
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1', r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
webpage)] webpage)]