From e658853b98efb28a18f9ea596a42e79da1c55ff1 Mon Sep 17 00:00:00 2001 From: Sonic <47434066+TwoThousandHedgehogs@users.noreply.github.com> Date: Fri, 24 Dec 2021 06:49:33 -0500 Subject: [PATCH 1/4] Add extractor (#2045) Authored-by: TwoThousandHedgehogs, pukkandan --- youtube_dl/extractor/dropout.py | 212 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 4 + 2 files changed, 216 insertions(+) create mode 100644 youtube_dl/extractor/dropout.py diff --git a/youtube_dl/extractor/dropout.py b/youtube_dl/extractor/dropout.py new file mode 100644 index 000000000..a7442d8f0 --- /dev/null +++ b/youtube_dl/extractor/dropout.py @@ -0,0 +1,212 @@ +# coding: utf-8 +from .common import InfoExtractor +from .vimeo import VHXEmbedIE +from ..utils import ( + clean_html, + ExtractorError, + get_element_by_class, + get_element_by_id, + get_elements_by_class, + int_or_none, + join_nonempty, + unified_strdate, + urlencode_postdata, +) + + +class DropoutIE(InfoExtractor): + _LOGIN_URL = 'https://www.dropout.tv/login' + _NETRC_MACHINE = 'dropout' + + _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?:[^/]+/)*videos/(?P[^/]+)/?$' + _TESTS = [ + { + 'url': 'https://www.dropout.tv/game-changer/season:2/videos/yes-or-no', + 'note': 'Episode in a series', + 'md5': '5e000fdfd8d8fa46ff40456f1c2af04a', + 'info_dict': { + 'id': '738153', + 'display_id': 'yes-or-no', + 'ext': 'mp4', + 'title': 'Yes or No', + 'description': 'Ally, Brennan, and Zac are asked a simple question, but is there a correct answer?', + 'release_date': '20200508', + 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/351e3f24-c4a3-459a-8b79-dc80f1e5b7fd.jpg', + 'series': 'Game Changer', + 'season_number': 2, + 'season': 'Season 2', + 'episode_number': 6, + 'episode': 'Yes or No', + 'duration': 1180, + 'uploader_id': 'user80538407', + 'uploader_url': 'https://vimeo.com/user80538407', + 'uploader': 'OTT Videos' + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] + }, + { + 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1/videos/episode-1', + 'note': 'Episode in a series (missing release_date)', + 'md5': '712caf7c191f1c47c8f1879520c2fa5c', + 'info_dict': { + 'id': '320562', + 'display_id': 'episode-1', + 'ext': 'mp4', + 'title': 'The Beginning Begins', + 'description': 'The cast introduces their PCs, including a neurotic elf, a goblin PI, and a corn-worshipping cleric.', + 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/4421ed0d-f630-4c88-9004-5251b2b8adfa.jpg', + 'series': 'Dimension 20: Fantasy High', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 1, + 'episode': 'The Beginning Begins', + 'duration': 6838, + 'uploader_id': 'user80538407', + 'uploader_url': 'https://vimeo.com/user80538407', + 'uploader': 'OTT Videos' + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] + }, + { + 'url': 'https://www.dropout.tv/videos/misfits-magic-holiday-special', + 'note': 'Episode not in a series', + 'md5': 'c30fa18999c5880d156339f13c953a26', + 'info_dict': { + 'id': '1915774', + 'display_id': 'misfits-magic-holiday-special', + 'ext': 'mp4', + 'title': 'Misfits & Magic Holiday Special', + 'description': 'The magical misfits spend Christmas break at Gowpenny, with an unwelcome visitor.', + 'release_date': '20211215', + 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/d91ea8a6-b250-42ed-907e-b30fb1c65176-8e24b8e5.jpg', + 'duration': 11698, + 'uploader_id': 'user80538407', + 'uploader_url': 'https://vimeo.com/user80538407', + 'uploader': 'OTT Videos' + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] + } + ] + + def _get_authenticity_token(self, display_id): + signin_page = self._download_webpage( + self._LOGIN_URL, display_id, note='Getting authenticity token') + return self._html_search_regex( + r'name=["\']authenticity_token["\'] value=["\'](.+?)["\']', + signin_page, 'authenticity_token') + + def _login(self, display_id): + username, password = self._get_login_info() + if not (username and password): + self.raise_login_required(method='password') + + response = self._download_webpage( + self._LOGIN_URL, display_id, note='Logging in', data=urlencode_postdata({ + 'email': username, + 'password': password, + 'authenticity_token': self._get_authenticity_token(display_id), + 'utf8': True + })) + + user_has_subscription = self._search_regex( + r'user_has_subscription:\s*["\'](.+?)["\']', response, 'subscription status', default='none') + if user_has_subscription.lower() == 'true': + return response + elif user_has_subscription.lower() == 'false': + raise ExtractorError('Account is not subscribed') + else: + raise ExtractorError('Incorrect username/password') + + def _real_extract(self, url): + display_id = self._match_id(url) + try: + self._login(display_id) + webpage = self._download_webpage(url, display_id, note='Downloading video webpage') + finally: + self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out') + + embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url') + thumbnail = self._og_search_thumbnail(webpage) + watch_info = get_element_by_id('watch-info', webpage) or '' + + title = clean_html(get_element_by_class('video-title', watch_info)) + season_episode = get_element_by_class( + 'site-font-secondary-color', get_element_by_class('text', watch_info)) + episode_number = int_or_none(self._search_regex( + r'Episode (\d+)', season_episode or '', 'episode', default=None)) + + return { + '_type': 'url_transparent', + 'ie_key': VHXEmbedIE.ie_key(), + 'url': embed_url, + 'id': self._search_regex(r'embed.vhx.tv/videos/(.+?)\?', embed_url, 'id'), + 'display_id': display_id, + 'title': title, + 'description': self._html_search_meta('description', webpage, fatal=False), + 'thumbnail': thumbnail.split('?')[0] if thumbnail else None, # Ignore crop/downscale + 'series': clean_html(get_element_by_class('series-title', watch_info)), + 'episode_number': episode_number, + 'episode': title if episode_number else None, + 'season_number': int_or_none(self._search_regex( + r'Season (\d+),', season_episode or '', 'season', default=None)), + 'release_date': unified_strdate(self._search_regex( + r'data-meta-field-name=["\']release_dates["\'] data-meta-field-value=["\'](.+?)["\']', + watch_info, 'release date', default=None)), + } + + +class DropoutSeasonIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P[^\/$&?#]+)(?:/?$|/season:[0-9]+/?$)' + _TESTS = [ + { + 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1', + 'note': 'Multi-season series with the season in the url', + 'playlist_count': 17, + 'info_dict': { + 'id': 'dimension-20-fantasy-high-season-1', + 'title': 'Dimension 20 Fantasy High - Season 1' + } + }, + { + 'url': 'https://www.dropout.tv/dimension-20-fantasy-high', + 'note': 'Multi-season series with the season not in the url', + 'playlist_count': 17, + 'info_dict': { + 'id': 'dimension-20-fantasy-high-season-1', + 'title': 'Dimension 20 Fantasy High - Season 1' + } + }, + { + 'url': 'https://www.dropout.tv/dimension-20-shriek-week', + 'note': 'Single-season series', + 'playlist_count': 4, + 'info_dict': { + 'id': 'dimension-20-shriek-week-season-1', + 'title': 'Dimension 20 Shriek Week - Season 1' + } + } + ] + + def _real_extract(self, url): + season_id = self._match_id(url) + season_title = season_id.replace('-', ' ').title() + webpage = self._download_webpage(url, season_id) + + entries = [ + self.url_result( + url=self._search_regex(r']+selected>([^<]+)', + seasons, 'current_season', default='').strip() + + return { + '_type': 'playlist', + 'id': join_nonempty(season_id, current_season.lower().replace(' ', '-')), + 'title': join_nonempty(season_title, current_season, delim=' - '), + 'entries': entries + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 50b7cb4a0..aaf0117d1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -321,6 +321,10 @@ from .discoveryvr import DiscoveryVRIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE from .dropbox import DropboxIE +from .dropout import ( + DropoutSeasonIE, + DropoutIE +) from .dw import ( DWIE, DWArticleIE, From 4ced620d74b0f7735e847eadca7b2c9344e86eb3 Mon Sep 17 00:00:00 2001 From: KatDestroyer Date: Thu, 24 Feb 2022 22:08:31 +0100 Subject: [PATCH 2/4] Fix for #2858 --- youtube_dl/extractor/dropout.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dropout.py b/youtube_dl/extractor/dropout.py index a7442d8f0..5c68b2eb8 100644 --- a/youtube_dl/extractor/dropout.py +++ b/youtube_dl/extractor/dropout.py @@ -11,6 +11,7 @@ from ..utils import ( join_nonempty, unified_strdate, urlencode_postdata, + std_headers ) @@ -30,7 +31,7 @@ class DropoutIE(InfoExtractor): 'ext': 'mp4', 'title': 'Yes or No', 'description': 'Ally, Brennan, and Zac are asked a simple question, but is there a correct answer?', - 'release_date': '20200508', + # 'release_date': '20200508', # Release dates seem to have been removed from the website 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/351e3f24-c4a3-459a-8b79-dc80f1e5b7fd.jpg', 'series': 'Game Changer', 'season_number': 2, @@ -70,14 +71,14 @@ class DropoutIE(InfoExtractor): { 'url': 'https://www.dropout.tv/videos/misfits-magic-holiday-special', 'note': 'Episode not in a series', - 'md5': 'c30fa18999c5880d156339f13c953a26', + 'md5': '1cedb55910c0367c02d9d0aae524398e', 'info_dict': { 'id': '1915774', 'display_id': 'misfits-magic-holiday-special', 'ext': 'mp4', 'title': 'Misfits & Magic Holiday Special', 'description': 'The magical misfits spend Christmas break at Gowpenny, with an unwelcome visitor.', - 'release_date': '20211215', + # 'release_date': '20211215', 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/d91ea8a6-b250-42ed-907e-b30fb1c65176-8e24b8e5.jpg', 'duration': 11698, 'uploader_id': 'user80538407', @@ -118,6 +119,8 @@ class DropoutIE(InfoExtractor): raise ExtractorError('Incorrect username/password') def _real_extract(self, url): + std_headers['Referer'] = 'https://www.dropout.tv' # See issue 2858 + display_id = self._match_id(url) try: self._login(display_id) From 41290c1cccf6aca3d4d0c17a551f3571d3e765fe Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 25 Feb 2022 13:46:40 +0000 Subject: [PATCH 3/4] Adapt Dropout extractors to yt-dl and add free test --- youtube_dl/extractor/dropout.py | 45 +++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/dropout.py b/youtube_dl/extractor/dropout.py index 5c68b2eb8..7630a793d 100644 --- a/youtube_dl/extractor/dropout.py +++ b/youtube_dl/extractor/dropout.py @@ -1,4 +1,6 @@ # coding: utf-8 +from __future__ import unicode_literals + from .common import InfoExtractor from .vimeo import VHXEmbedIE from ..utils import ( @@ -8,7 +10,6 @@ from ..utils import ( get_element_by_id, get_elements_by_class, int_or_none, - join_nonempty, unified_strdate, urlencode_postdata, std_headers @@ -21,6 +22,20 @@ class DropoutIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?:[^/]+/)*videos/(?P[^/]+)/?$' _TESTS = [ + { + 'url': 'https://www.dropout.tv/dimension-20-misfits-and-magic/season:2/videos/misfits-and-magic-holiday-special-trailer', + 'note': 'No login required', + 'md5': 'cafb8d704af8da70134d70a97d966799', + 'info_dict': { + 'id': '1893157', + 'ext': 'mp4', + 'title': 'Dimension 20: Misfits and Magic Holiday Special Trailer', + 'description': 'Independent. Funny. Ad Free.', + 'uploader': 'OTT Videos', + 'uploader_id': 'user80538407', + }, + 'expected_warnings': ['No login information available'] + }, { 'url': 'https://www.dropout.tv/game-changer/season:2/videos/yes-or-no', 'note': 'Episode in a series', @@ -43,7 +58,8 @@ class DropoutIE(InfoExtractor): 'uploader_url': 'https://vimeo.com/user80538407', 'uploader': 'OTT Videos' }, - 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], + 'skip': 'Username and password required', }, { 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1/videos/episode-1', @@ -66,7 +82,8 @@ class DropoutIE(InfoExtractor): 'uploader_url': 'https://vimeo.com/user80538407', 'uploader': 'OTT Videos' }, - 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], + 'skip': 'Username and password required', }, { 'url': 'https://www.dropout.tv/videos/misfits-magic-holiday-special', @@ -85,8 +102,9 @@ class DropoutIE(InfoExtractor): 'uploader_url': 'https://vimeo.com/user80538407', 'uploader': 'OTT Videos' }, - 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] - } + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], + 'skip': 'Username and password required', + }, ] def _get_authenticity_token(self, display_id): @@ -99,7 +117,9 @@ class DropoutIE(InfoExtractor): def _login(self, display_id): username, password = self._get_login_info() if not (username and password): - self.raise_login_required(method='password') + # self.raise_login_required() + self.report_warning('No login information available', display_id) + return False response = self._download_webpage( self._LOGIN_URL, display_id, note='Logging in', data=urlencode_postdata({ @@ -123,10 +143,15 @@ class DropoutIE(InfoExtractor): display_id = self._match_id(url) try: - self._login(display_id) + logged_in = self._login(display_id) webpage = self._download_webpage(url, display_id, note='Downloading video webpage') + except ExtractorError: + if logged_in is False: + self.raise_login_required() + raise finally: - self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out') + if logged_in is not False: + self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out') embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url') thumbnail = self._og_search_thumbnail(webpage) @@ -207,6 +232,10 @@ class DropoutSeasonIE(InfoExtractor): current_season = self._search_regex(r']+selected>([^<]+)', seasons, 'current_season', default='').strip() + def join_nonempty(*args, **kwargs): + delim = kwargs.get('delim', '-') + return delim.join(x for x in args if x) + return { '_type': 'playlist', 'id': join_nonempty(season_id, current_season.lower().replace(' ', '-')), From 457f046c3ca82a2123eac2a80518d87287d828ad Mon Sep 17 00:00:00 2001 From: KatDestroyer Date: Fri, 25 Feb 2022 15:56:36 +0100 Subject: [PATCH 4/4] Changed to use `_smuggle_referer` Removed `std_headers` import to satisfy flake8 --- youtube_dl/extractor/dropout.py | 9 +++------ youtube_dl/extractor/vimeo.py | 9 ++++++++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/dropout.py b/youtube_dl/extractor/dropout.py index 7630a793d..8db4efcff 100644 --- a/youtube_dl/extractor/dropout.py +++ b/youtube_dl/extractor/dropout.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .vimeo import VHXEmbedIE +from .vimeo import VHXEmbedIE, VimeoIE from ..utils import ( clean_html, ExtractorError, @@ -11,8 +11,7 @@ from ..utils import ( get_elements_by_class, int_or_none, unified_strdate, - urlencode_postdata, - std_headers + urlencode_postdata ) @@ -139,8 +138,6 @@ class DropoutIE(InfoExtractor): raise ExtractorError('Incorrect username/password') def _real_extract(self, url): - std_headers['Referer'] = 'https://www.dropout.tv' # See issue 2858 - display_id = self._match_id(url) try: logged_in = self._login(display_id) @@ -166,7 +163,7 @@ class DropoutIE(InfoExtractor): return { '_type': 'url_transparent', 'ie_key': VHXEmbedIE.ie_key(), - 'url': embed_url, + 'url': VimeoIE._smuggle_referrer(embed_url, 'https://www.dropout.tv'), 'id': self._search_regex(r'embed.vhx.tv/videos/(.+?)\?', embed_url, 'id'), 'display_id': display_id, 'title': title, diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 0b386f450..a77f7c7d4 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -1146,8 +1146,15 @@ class VHXEmbedIE(VimeoBaseInfoExtractor): return unescapeHTML(mobj.group(1)) if mobj else None def _real_extract(self, url): + url, data = unsmuggle_url(url, {}) + headers = std_headers.copy() + if 'http_headers' in data: + headers.update(data['http_headers']) + if 'Referer' not in headers: + headers['Referer'] = url + video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, video_id, headers=headers) config_url = self._parse_json(self._search_regex( r'window\.OTTData\s*=\s*({.+})', webpage, 'ott data'), video_id, js_to_json)['config_url']