From 503406d4bc838b51c9b1adf0d3fd4a9efda26d30 Mon Sep 17 00:00:00 2001 From: lkho Date: Fri, 28 Aug 2020 23:44:50 +0800 Subject: [PATCH] [duboku] Add new extractor www.duboku.co --- youtube_dl/extractor/duboku.py | 92 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 93 insertions(+) create mode 100644 youtube_dl/extractor/duboku.py diff --git a/youtube_dl/extractor/duboku.py b/youtube_dl/extractor/duboku.py new file mode 100644 index 000000000..3e4cf8d5b --- /dev/null +++ b/youtube_dl/extractor/duboku.py @@ -0,0 +1,92 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import * + + +class DubokuIE(InfoExtractor): + _VALID_URL = r'(?:https?://[^/]+\.duboku\.co/vodplay/)(?P[0-9\-]+)\.html.*' + _TESTS = [{ + 'url': 'https://www.duboku.co/vodplay/1575-1-1.html', + 'info_dict': { + 'id': '1575-1-1', + 'title': '白色月光', + 'season': 1, + 'episode': 1, + }, + 'params': { + 'skip_download': 'm3u8 download', + }, + }] + + _PLAYER_DATA_PATTERN = r'player_data\s*=\s*(\{\s*(.*)})\s*;?\s*(.*)', html) + if mobj: + href = extract_attributes(mobj.group(0)).get('href') + if href: + mobj1 = re.search(r'/(\d+)\.html', href) + if mobj1 and mobj1.group(1) == series_id: + series_title = clean_html(mobj.group(0)) + series_title = re.sub(r'[\s\r\n\t]+', ' ', series_title) + title = clean_html(html) + title = re.sub(r'[\s\r\n\t]+', ' ', title) + break + + data_url = player_data['url'] + assert data_url + data_from = player_data.get('from') + + # if it is an embedded iframe, maybe it's an external source + if data_from == 'iframe': + # use _type url_transparent to retain the meaningful details + # of the video. + return { + '_type': 'url_transparent', + 'url': smuggle_url(data_url, {'http_headers': {'Referer': webpage_url}}), + 'id': video_id, + 'title': title, + 'series': series_title, + 'season_number': int_or_none(season_id), + 'season_id': season_id, + 'episode_number': int_or_none(episode_id), + 'episode_id': episode_id, + } + + formats = self._extract_m3u8_formats(data_url, video_id, 'ts') + + return { + 'id': video_id, + 'title': title, + 'series': series_title, + 'season_number': int_or_none(season_id), + 'season_id': season_id, + 'episode_number': int_or_none(episode_id), + 'episode_id': episode_id, + 'formats': formats, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4b3092028..e6c008b6f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -282,6 +282,7 @@ from .drtv import ( ) from .dtube import DTubeIE from .dvtv import DVTVIE +from .duboku import DubokuIE from .dumpert import DumpertIE from .defense import DefenseGouvFrIE from .discovery import DiscoveryIE