This commit is contained in:
dirkf 2024-08-21 12:56:57 -05:00 committed by GitHub
commit 1d6e97466c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 253 additions and 1 deletions

View File

@ -0,0 +1,241 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .vimeo import VHXEmbedIE, VimeoIE
from ..utils import (
clean_html,
ExtractorError,
get_element_by_class,
get_element_by_id,
get_elements_by_class,
int_or_none,
unified_strdate,
urlencode_postdata
)
class DropoutIE(InfoExtractor):
_LOGIN_URL = 'https://www.dropout.tv/login'
_NETRC_MACHINE = 'dropout'
_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?:[^/]+/)*videos/(?P<id>[^/]+)/?$'
_TESTS = [
{
'url': 'https://www.dropout.tv/dimension-20-misfits-and-magic/season:2/videos/misfits-and-magic-holiday-special-trailer',
'note': 'No login required',
'md5': 'cafb8d704af8da70134d70a97d966799',
'info_dict': {
'id': '1893157',
'ext': 'mp4',
'title': 'Dimension 20: Misfits and Magic Holiday Special Trailer',
'description': 'Independent. Funny. Ad Free.',
'uploader': 'OTT Videos',
'uploader_id': 'user80538407',
},
'expected_warnings': ['No login information available']
},
{
'url': 'https://www.dropout.tv/game-changer/season:2/videos/yes-or-no',
'note': 'Episode in a series',
'md5': '5e000fdfd8d8fa46ff40456f1c2af04a',
'info_dict': {
'id': '738153',
'display_id': 'yes-or-no',
'ext': 'mp4',
'title': 'Yes or No',
'description': 'Ally, Brennan, and Zac are asked a simple question, but is there a correct answer?',
# 'release_date': '20200508', # Release dates seem to have been removed from the website
'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/351e3f24-c4a3-459a-8b79-dc80f1e5b7fd.jpg',
'series': 'Game Changer',
'season_number': 2,
'season': 'Season 2',
'episode_number': 6,
'episode': 'Yes or No',
'duration': 1180,
'uploader_id': 'user80538407',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader': 'OTT Videos'
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'skip': 'Username and password required',
},
{
'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1/videos/episode-1',
'note': 'Episode in a series (missing release_date)',
'md5': '712caf7c191f1c47c8f1879520c2fa5c',
'info_dict': {
'id': '320562',
'display_id': 'episode-1',
'ext': 'mp4',
'title': 'The Beginning Begins',
'description': 'The cast introduces their PCs, including a neurotic elf, a goblin PI, and a corn-worshipping cleric.',
'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/4421ed0d-f630-4c88-9004-5251b2b8adfa.jpg',
'series': 'Dimension 20: Fantasy High',
'season_number': 1,
'season': 'Season 1',
'episode_number': 1,
'episode': 'The Beginning Begins',
'duration': 6838,
'uploader_id': 'user80538407',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader': 'OTT Videos'
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'skip': 'Username and password required',
},
{
'url': 'https://www.dropout.tv/videos/misfits-magic-holiday-special',
'note': 'Episode not in a series',
'md5': '1cedb55910c0367c02d9d0aae524398e',
'info_dict': {
'id': '1915774',
'display_id': 'misfits-magic-holiday-special',
'ext': 'mp4',
'title': 'Misfits & Magic Holiday Special',
'description': 'The magical misfits spend Christmas break at Gowpenny, with an unwelcome visitor.',
# 'release_date': '20211215',
'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/d91ea8a6-b250-42ed-907e-b30fb1c65176-8e24b8e5.jpg',
'duration': 11698,
'uploader_id': 'user80538407',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader': 'OTT Videos'
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
'skip': 'Username and password required',
},
]
def _get_authenticity_token(self, display_id):
signin_page = self._download_webpage(
self._LOGIN_URL, display_id, note='Getting authenticity token')
return self._html_search_regex(
r'name=["\']authenticity_token["\'] value=["\'](.+?)["\']',
signin_page, 'authenticity_token')
def _login(self, display_id):
username, password = self._get_login_info()
if not (username and password):
# self.raise_login_required()
self.report_warning('No login information available', display_id)
return False
response = self._download_webpage(
self._LOGIN_URL, display_id, note='Logging in', data=urlencode_postdata({
'email': username,
'password': password,
'authenticity_token': self._get_authenticity_token(display_id),
'utf8': True
}))
user_has_subscription = self._search_regex(
r'user_has_subscription:\s*["\'](.+?)["\']', response, 'subscription status', default='none')
if user_has_subscription.lower() == 'true':
return response
elif user_has_subscription.lower() == 'false':
raise ExtractorError('Account is not subscribed')
else:
raise ExtractorError('Incorrect username/password')
def _real_extract(self, url):
display_id = self._match_id(url)
try:
logged_in = self._login(display_id)
webpage = self._download_webpage(url, display_id, note='Downloading video webpage')
except ExtractorError:
if logged_in is False:
self.raise_login_required()
raise
finally:
if logged_in is not False:
self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out')
embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')
thumbnail = self._og_search_thumbnail(webpage)
watch_info = get_element_by_id('watch-info', webpage) or ''
title = clean_html(get_element_by_class('video-title', watch_info))
season_episode = get_element_by_class(
'site-font-secondary-color', get_element_by_class('text', watch_info))
episode_number = int_or_none(self._search_regex(
r'Episode (\d+)', season_episode or '', 'episode', default=None))
return {
'_type': 'url_transparent',
'ie_key': VHXEmbedIE.ie_key(),
'url': VimeoIE._smuggle_referrer(embed_url, 'https://www.dropout.tv'),
'id': self._search_regex(r'embed.vhx.tv/videos/(.+?)\?', embed_url, 'id'),
'display_id': display_id,
'title': title,
'description': self._html_search_meta('description', webpage, fatal=False),
'thumbnail': thumbnail.split('?')[0] if thumbnail else None, # Ignore crop/downscale
'series': clean_html(get_element_by_class('series-title', watch_info)),
'episode_number': episode_number,
'episode': title if episode_number else None,
'season_number': int_or_none(self._search_regex(
r'Season (\d+),', season_episode or '', 'season', default=None)),
'release_date': unified_strdate(self._search_regex(
r'data-meta-field-name=["\']release_dates["\'] data-meta-field-value=["\'](.+?)["\']',
watch_info, 'release date', default=None)),
}
class DropoutSeasonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P<id>[^\/$&?#]+)(?:/?$|/season:[0-9]+/?$)'
_TESTS = [
{
'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1',
'note': 'Multi-season series with the season in the url',
'playlist_count': 17,
'info_dict': {
'id': 'dimension-20-fantasy-high-season-1',
'title': 'Dimension 20 Fantasy High - Season 1'
}
},
{
'url': 'https://www.dropout.tv/dimension-20-fantasy-high',
'note': 'Multi-season series with the season not in the url',
'playlist_count': 17,
'info_dict': {
'id': 'dimension-20-fantasy-high-season-1',
'title': 'Dimension 20 Fantasy High - Season 1'
}
},
{
'url': 'https://www.dropout.tv/dimension-20-shriek-week',
'note': 'Single-season series',
'playlist_count': 4,
'info_dict': {
'id': 'dimension-20-shriek-week-season-1',
'title': 'Dimension 20 Shriek Week - Season 1'
}
}
]
def _real_extract(self, url):
season_id = self._match_id(url)
season_title = season_id.replace('-', ' ').title()
webpage = self._download_webpage(url, season_id)
entries = [
self.url_result(
url=self._search_regex(r'<a href=["\'](.+?)["\'] class=["\']browse-item-link["\']',
item, 'item_url'),
ie=DropoutIE.ie_key()
) for item in get_elements_by_class('js-collection-item', webpage)
]
seasons = (get_element_by_class('select-dropdown-wrapper', webpage) or '').strip().replace('\n', '')
current_season = self._search_regex(r'<option[^>]+selected>([^<]+)</option>',
seasons, 'current_season', default='').strip()
def join_nonempty(*args, **kwargs):
delim = kwargs.get('delim', '-')
return delim.join(x for x in args if x)
return {
'_type': 'playlist',
'id': join_nonempty(season_id, current_season.lower().replace(' ', '-')),
'title': join_nonempty(season_title, current_season, delim=' - '),
'entries': entries
}

View File

@ -335,6 +335,10 @@ from .discoveryvr import DiscoveryVRIE
from .disney import DisneyIE from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE from .dispeak import DigitallySpeakingIE
from .dropbox import DropboxIE from .dropbox import DropboxIE
from .dropout import (
DropoutSeasonIE,
DropoutIE
)
from .dw import ( from .dw import (
DWIE, DWIE,
DWArticleIE, DWArticleIE,

View File

@ -1171,8 +1171,15 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
return unescapeHTML(mobj.group(1)) if mobj else None return unescapeHTML(mobj.group(1)) if mobj else None
def _real_extract(self, url): def _real_extract(self, url):
url, data = unsmuggle_url(url, {})
headers = std_headers.copy()
if 'http_headers' in data:
headers.update(data['http_headers'])
if 'Referer' not in headers:
headers['Referer'] = url
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id, headers=headers)
config_url = self._parse_json(self._search_regex( config_url = self._parse_json(self._search_regex(
r'window\.OTTData\s*=\s*({.+})', webpage, r'window\.OTTData\s*=\s*({.+})', webpage,
'ott data'), video_id, js_to_json)['config_url'] 'ott data'), video_id, js_to_json)['config_url']