diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a4372912e..aa8026a32 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -569,7 +569,6 @@ - **ndr:embed** - **ndr:embed:base** - **NDTV** - - **Nebula** - **NerdCubedFeed** - **netease:album**: 网易云音乐 - 专辑 - **netease:djradio**: 网易云音乐 - 电台 diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9874441d5..1495eb5b4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -731,7 +731,12 @@ from .ndr import ( NJoyEmbedIE, ) from .ndtv import NDTVIE -from .nebula import NebulaIE +from .nebula import ( + NebulaIE, + NebulaChannelIE, + NebulaClassIE, + NebulaSubscriptionsIE, +) from .nerdcubed import NerdCubedFeedIE from .netzkino import NetzkinoIE from .neteasemusic import ( diff --git a/youtube_dl/extractor/nebula.py b/youtube_dl/extractor/nebula.py index 5b3c2cbf7..3e55db939 100644 --- a/youtube_dl/extractor/nebula.py +++ b/youtube_dl/extractor/nebula.py @@ -1,320 +1,573 @@ # coding: utf-8 from __future__ import unicode_literals -import json -import os +import itertools +from .art19 import Art19IE from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote, compat_str -from ..utils import parse_iso8601, ExtractorError, try_get, urljoin, sanitized_Request +from ..compat import ( + compat_HTTPError as HTTPError, + compat_kwargs, + compat_str as str, +) +from ..utils import ( + ExtractorError, + int_or_none, + json_stringify, + # make_archive_id, + merge_dicts, + parse_iso8601, + smuggle_url, + str_or_none, + T, + traverse_obj, + try_call, + unsmuggle_url, + update_url, + url_basename, + url_or_none, + urljoin, +) + +_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)' -class NebulaIE(InfoExtractor): - """ - Nebula (https://watchnebula.com/) is a video platform created by the streamer community Standard. It hosts videos - off-YouTube from a small hand-picked group of creators. - - All videos require a subscription to watch. There are no known freely available videos. An authentication token to - an account with a valid subscription can be specified in multiple ways, including credentials in .netrc or a cookie - jar. - As neither of these parameters appear to be supported by the unit test runner, it's recommended to set the envvar - NEBULA_TOKEN to execute the test runs. - - Nebula uses the Zype video infrastructure and this extractor is using the 'url_transparent' mode to hand off - video extraction to the Zype extractor. - - This description has been last updated on 2020-10-22. - """ - - _VALID_URL = r'https?://(?:www\.)?watchnebula\.com/videos/(?P[-\w]+)' # the 'id' group is actually the display_id, but we misname it 'id' to be able to use _match_id() - _TESTS = [ - { - 'url': 'https://watchnebula.com/videos/that-time-disney-remade-beauty-and-the-beast', - 'md5': 'fe79c4df8b3aa2fea98a93d027465c7e', - 'info_dict': { - 'id': '5c271b40b13fd613090034fd', - 'ext': 'mp4', - 'title': 'That Time Disney Remade Beauty and the Beast', - 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.', - 'upload_date': '20180731', - 'timestamp': 1533009600, - 'channel': 'Lindsay Ellis', - 'uploader': 'Lindsay Ellis', - } - }, - { - 'url': 'https://watchnebula.com/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', - 'md5': '6d4edd14ce65720fa63aba5c583fb328', - 'info_dict': { - 'id': '5e7e78171aaf320001fbd6be', - 'ext': 'mp4', - 'title': 'Landing Craft - How The Allies Got Ashore', - 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.', - 'upload_date': '20200327', - 'timestamp': 1585348140, - 'channel': 'The Logistics of D-Day', - 'uploader': 'The Logistics of D-Day', - } - }, - { - 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw', - 'md5': '8c7d272910eea320f6f8e6d3084eecf5', - 'info_dict': { - 'id': '5e779ebdd157bc0001d1c75a', - 'ext': 'mp4', - 'title': 'Episode 1: The Draw', - 'description': r'contains:There’s free money on offer… if the players can all work together.', - 'upload_date': '20200323', - 'timestamp': 1584980400, - 'channel': 'Tom Scott Presents: Money', - 'uploader': 'Tom Scott Presents: Money', - } - }, - ] - _WORKING = True # FIXME: should this be set to False, to hide the tests from CI, given that the unit tests require an auth cookie of a (paid) subscription? +class NebulaBaseIE(InfoExtractor): _NETRC_MACHINE = 'watchnebula' + _token = _api_token = None - def _perform_login(self, username, password, video_id): - """ - Log in to Nebula, authenticating using a given username and password. + def _real_initialize(self): + self._login() - Returns a Nebula token, as the frontend would store it in the - nebula-auth cookie. Or False, if authentication fails. - """ - data = json.dumps({'email': username, 'password': password}).encode('utf8') - request = sanitized_Request(method='POST', - url='https://api.watchnebula.com/api/v1/auth/login/', - data=data, - headers={ - 'content-type': 'application/json', - # Overwrite the cookie headers, because - # submitting the 'sessionid' cookie - # always causes a 403 on auth endpoint - 'cookie': ''}) - response = self._download_json(request, fatal=False, video_id=video_id, - note='Authenticating to Nebula with supplied credentials', - errnote='Authentication failed or rejected') - if not response or 'key' not in response: - return False - return response['key'] + def _login(self): + if not self._api_token: + self._api_token = try_call( + lambda: self._get_cookies('https://nebula.tv')['nebula_auth.apiToken'].value) + self._token = self._download_json( + 'https://users.api.nebula.app/api/v1/authorization/', None, + headers={'Authorization': 'Token {0}'.format(self._api_token)} if self._api_token else {}, + note='Authorizing to Nebula', data=b'')['token'] + if self._token: + return - def _retrieve_nebula_auth(self, video_id): - """ - Attempt to find a Nebula API token. Makes multiple attempts in the - following order: - a) login credentials used to authenticate to the Nebula login endpoint, - either from .netrc or specified using --username/--password - b) the --cookies supplied cookie jar - c) the NEBULA_TOKEN environment variable - d) the --video-password command line argument (this isn't documented in - the error message, because probably highly unpopular) - If none of these are successful, an end user-intended error message is - raised, listing some solutions. - - Returns a Nebula API token, which subsequently can be used to make - authenticated calls to the Nebula API. - """ - nebula_token = None - - # option #1: login credentials via .netrc or --username and --password username, password = self._get_login_info() - if username and password: - self.to_screen('Authenticating to Nebula using .netrc or command line-supplied credentials') - nebula_token = self._perform_login(username, password, video_id) + if username is None: + return + self._perform_login(username, password) - # option #2: nebula token via cookie jar - if not nebula_token: - # TODO: is there a helper to do all this cookie extraction? - nebula_cookies = self._get_cookies('https://watchnebula.com') - nebula_cookie = nebula_cookies.get('nebula-auth') - if nebula_cookie: - self.to_screen('Authenticating to Nebula with credentials from cookie jar') - nebula_cookie_value = compat_urllib_parse_unquote(nebula_cookie.value) - nebula_token = self._parse_json(nebula_cookie_value, video_id).get('apiToken') + def _perform_login(self, username, password): + try: + response = self._download_json( + 'https://nebula.tv/auth/login/', None, + 'Logging in to Nebula', 'Login failed', + data=json_stringify({'email': username, 'password': password}), + headers={'content-type': 'application/json'}) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 400: + raise ExtractorError('Login failed: Invalid username or password', expected=True) + raise + self._api_token = traverse_obj(response, ('key', T(str))) + if not self._api_token: + raise ExtractorError('Login failed: No token') - # option #3: nebula token via environment variable - if not nebula_token and 'NEBULA_TOKEN' in os.environ: - nebula_token = os.environ.get('NEBULA_TOKEN') - if nebula_token: - self.to_screen('Authenticating to Nebula with token from NEBULA_TOKEN environment variable') + def _call_api(self, *args, **kwargs): - # option #4: nebula token via --videopassword - if not nebula_token: - nebula_token = self._downloader.params.get('videopassword') - if nebula_token: - self.to_screen('Authenticating to Nebula with token from --videopassword') + def kwargs_set_token(kw): + kw.setdefault('headers', {})['Authorization'] = 'Bearer {0}'.format(self._token) + return compat_kwargs(kw) - if not nebula_token: - raise ExtractorError('Nebula requires an account with an active subscription. ' - 'You can supply your authentication information by either ' - 'a) storing your credentials in .netrc or supplying them via --username and --password, or ' - 'b) passing in a cookie jar containing a nebula-auth cookie via --cookies, or ' - 'c) setting the environment variable NEBULA_TOKEN.') - return nebula_token + if self._token: + kwargs = kwargs_set_token(kwargs) + try: + return self._download_json(*args, **kwargs) + except ExtractorError as e: + if not isinstance(e.cause, HTTPError) or e.cause.status not in (401, 403): + raise + self.to_screen( + 'Reauthorizing with Nebula and retrying, because last API ' + 'call resulted in error {0}'.format(e.cause.status)) + self._real_initialize() + if self._token: + kwargs = kwargs_set_token(kwargs) + return self._download_json(*args, **kwargs) - def _retrieve_zype_api_key(self, page_url, display_id): - """ - Retrieves the Zype API key required to make calls to the Zype API. + def _extract_formats(self, content_id, slug): + for retry in (False, True): + try: + # fmts, subs = self._extract_m3u8_formats_and_subtitles( + fmts, subs = self._extract_m3u8_formats( + 'https://content.api.nebula.app/{0}s/{1}/manifest.m3u8'.format( + content_id.split(':', 1)[0], content_id), + slug, 'mp4', query={ + 'token': self._token, + 'app_version': '23.10.0', + 'platform': 'ios', + }), {} + self._sort_formats(fmts) + return {'formats': fmts, 'subtitles': subs} + except ExtractorError as e: + if not isinstance(e.cause, HTTPError): + raise + if e.cause.status == 401: + self.raise_login_required() + if not retry and e.cause.status == 403: + self.to_screen('Reauthorizing with Nebula and retrying, because fetching video resulted in error') + self._real_initialize() + continue + raise - Unfortunately, the Nebula frontend stores this as a JS object literal in one of its JS chunks, - looking somewhat like this (but minified): + def _extract_video_metadata(self, episode): + channel_url = traverse_obj( + episode, (('channel_slug', 'class_slug'), T(lambda u: urljoin('https://nebula.tv/', u))), get_all=False) + return merge_dicts({ + 'id': episode['id'].partition(':')[2], + 'title': episode['title'], + 'channel_url': channel_url, + 'uploader_url': channel_url, + }, traverse_obj(episode, { + 'display_id': 'slug', + 'description': 'description', + 'timestamp': ('published_at', T(parse_iso8601)), + 'duration': ('duration', T(int_or_none)), + 'channel_id': 'channel_slug', + 'uploader_id': 'channel_slug', + 'channel': 'channel_title', + 'uploader': 'channel_title', + 'series': 'channel_title', + 'creator': 'channel_title', + 'thumbnail': ('images', 'thumbnail', 'src', T(url_or_none)), + 'episode_number': ('order', {int_or_none}), + # Old code was wrongly setting extractor_key from NebulaSubscriptionsIE + # '_old_archive_ids': ('zype_id', {lambda x: [ + # make_archive_id(NebulaIE, x), make_archive_id(NebulaSubscriptionsIE, x)] if x else None}), + })) - return { - NODE_ENV: "production", - REACT_APP_NAME: "Nebula", - REACT_APP_NEBULA_API: "https://api.watchnebula.com/api/v1/", - REACT_APP_ZYPE_API: "https://api.zype.com/", - REACT_APP_ZYPE_API_KEY: "", - REACT_APP_ZYPE_APP_KEY: "", - // ... - } - So we have to find the reference to the chunk in the video page (as it is hashed and the hash will - change when they do a new release), then download the chunk and extract the API key from there, - hoping they won't rename the constant. - - Alternatively, it is currently hardcoded and shared among all users. We haven't seen it - change so far, so we could also just hardcode it in the extractor as a fallback. - """ - # fetch the video page - webpage = self._download_webpage(page_url, video_id=display_id) - - # find the script tag with a file named 'main..chunk.js' in there - main_script_relpath = self._search_regex( - r']*src="(?P[^"]*main.[0-9a-f]*.chunk.js)"[^>]*>', webpage, - group='script_relpath', name='script relative path', fatal=True) - - # fetch the JS chunk - main_script_abspath = urljoin(page_url, main_script_relpath) - main_script = self._download_webpage(main_script_abspath, video_id=display_id, - note='Retrieving Zype API key') - - # find the API key named 'REACT_APP_ZYPE_API_KEY' in there - api_key = self._search_regex( - r'REACT_APP_ZYPE_API_KEY\s*:\s*"(?P[\w-]*)"', main_script, - group='api_key', name='API key', fatal=True) - - return api_key - - def _call_zype_api(self, path, params, video_id, api_key, note): - """ - A helper for making calls to the Zype API. - """ - query = {'api_key': api_key, 'per_page': 1} - query.update(params) - return self._download_json('https://api.zype.com' + path, video_id, query=query, note=note) - - def _fetch_zype_video_data(self, display_id, api_key): - """ - Fetch video meta data from the Zype API. - """ - response = self._call_zype_api('/videos', {'friendly_title': display_id}, - display_id, api_key, note='Retrieving metadata from Zype') - if 'response' not in response or len(response['response']) != 1: - raise ExtractorError('Unable to find video on Zype API') - return response['response'][0] - - def _call_nebula_api(self, path, video_id, access_token, note): - """ - A helper for making calls to the Nebula API. - """ - return self._download_json('https://api.watchnebula.com/api/v1' + path, video_id, headers={ - 'Authorization': 'Token {access_token}'.format(access_token=access_token) - }, note=note) - - def _fetch_zype_access_token(self, video_id, nebula_token): - """ - Requests a Zype access token from the Nebula API. - """ - user_object = self._call_nebula_api('/auth/user/', video_id, nebula_token, note='Retrieving Zype access token') - access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], compat_str) - if not access_token: - if try_get(user_object, lambda x: x['is_subscribed'], bool): - raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint, please try loading an arbitrary video in a browser with this account to ''prime'' it for video downloading') - raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint') - return access_token - - def _build_video_url(self, video_id, zype_access_token): - """ - Construct a Zype video URL (as supported by the Zype extractor), given a Zype video ID and a Zype access token. - """ - return 'https://player.zype.com/embed/{video_id}.html?access_token={access_token}'.format( - video_id=video_id, - access_token=zype_access_token) - - def _extract_channel(self, video_meta): - """ - Extract the channel title, by going through the list of categories and finding the first value of the - first category that has a value. - - I know this look like a terrible approach. But actually, it's just reproducing the behavior of the - React code the Nebula frontend uses (as of 2020-04-07): - - let channel; - if (video && video.categories && video.categories.length) { - const channelTitle = video.categories.map((category) => (category.value[0])) - .filter((title) => (!!title))[0]; - channel = getChannelByTitle(state, { title: channelTitle }); - } - - Basically, it finds the first (truthy) value in the category list and that's assumed to be the - channel title. And then the channel details (e.g. the URL) are looked up by title (!) (not by any - kind of ID) via an additional API call. - - TODO: Implement the API calls giving us the channel list, so that we can do the title lookup and then figure out the channel URL - - May return None of no category list could be found or no category had a label ('value'). - """ - categories = video_meta.get('categories', []) if video_meta else [] - for category in categories: - if category.get('value'): # we're intentionally not using "'value' in category" here, because the expression is supposed to be falsy for empty lists in category['value'] as well! - return category['value'][0] +class NebulaIE(NebulaBaseIE): + IE_NAME = 'nebula:video' + _VALID_URL = r'{0}/videos/(?P[\w-]+)'.format(_BASE_URL_RE) + _TESTS = [{ + 'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast', + 'info_dict': { + 'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf', + 'ext': 'mp4', + 'title': 'That Time Disney Remade Beauty and the Beast', + 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.', + 'upload_date': '20180731', + 'timestamp': 1533009600, + 'channel': 'Lindsay Ellis', + 'channel_id': 'lindsayellis', + 'uploader': 'Lindsay Ellis', + 'uploader_id': 'lindsayellis', + 'uploader_url': r're:https://nebula\.(tv|app)/lindsayellis', + 'series': 'Lindsay Ellis', + 'display_id': 'that-time-disney-remade-beauty-and-the-beast', + 'channel_url': r're:https://nebula\.(tv|app)/lindsayellis', + 'creator': 'Lindsay Ellis', + 'duration': 2212, + 'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$', + # '_old_archive_ids': ['nebula 5c271b40b13fd613090034fd', 'nebulasubscriptions 5c271b40b13fd613090034fd'], + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': 'm3u8', + }, + }, { + 'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', + 'md5': 'd05739cf6c38c09322422f696b569c23', + 'info_dict': { + 'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34', + 'ext': 'mp4', + 'title': 'Landing Craft - How The Allies Got Ashore', + 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.', + 'upload_date': '20200327', + 'timestamp': 1585348140, + 'channel': 'Real Engineering — The Logistics of D-Day', + 'channel_id': 'd-day', + 'uploader': 'Real Engineering — The Logistics of D-Day', + 'uploader_id': 'd-day', + 'series': 'Real Engineering — The Logistics of D-Day', + 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', + 'creator': 'Real Engineering — The Logistics of D-Day', + 'duration': 841, + 'channel_url': 'https://nebula.tv/d-day', + 'uploader_url': 'https://nebula.tv/d-day', + 'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$', + # '_old_archive_ids': ['nebula 5e7e78171aaf320001fbd6be', 'nebulasubscriptions 5e7e78171aaf320001fbd6be'], + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': 'm3u8', + }, + 'skip': 'Only available for registered users', + }, { + 'url': 'https://nebula.tv/videos/money-episode-1-the-draw', + 'md5': 'ebe28a7ad822b9ee172387d860487868', + 'info_dict': { + 'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553', + 'ext': 'mp4', + 'title': 'Episode 1: The Draw', + 'description': r'contains:There’s free money on offer… if the players can all work together.', + 'upload_date': '20200323', + 'timestamp': 1584980400, + 'channel': 'Tom Scott Presents: Money', + 'channel_id': 'tom-scott-presents-money', + 'uploader': 'Tom Scott Presents: Money', + 'uploader_id': 'tom-scott-presents-money', + 'uploader_url': 'https://nebula.tv/tom-scott-presents-money', + 'duration': 825, + 'channel_url': 'https://nebula.tv/tom-scott-presents-money', + 'series': 'Tom Scott Presents: Money', + 'display_id': 'money-episode-1-the-draw', + 'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$', + # '_old_archive_ids': ['nebula 5e779ebdd157bc0001d1c75a', 'nebulasubscriptions 5e779ebdd157bc0001d1c75a'], + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': 'm3u8', + }, + 'skip': 'Only available for registered users', + }, { + 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw', + 'only_matching': True, + }, { + 'url': 'https://nebula.tv/videos/tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines', + 'info_dict': { + 'id': 'e389af9d-1dab-44f2-8788-ee24deb7ff0d', + 'ext': 'mp4', + 'display_id': 'tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines', + 'title': 'Did the US Really Blow Up the NordStream Pipelines?', + 'description': 'md5:b4e2a14e3ff08f546a3209c75261e789', + 'upload_date': '20230223', + 'timestamp': 1677144070, + 'channel': 'TLDR News EU', + 'channel_id': 'tldrnewseu', + 'uploader': 'TLDR News EU', + 'uploader_id': 'tldrnewseu', + 'uploader_url': r're:https://nebula\.(tv|app)/tldrnewseu', + 'duration': 524, + 'channel_url': r're:https://nebula\.(tv|app)/tldrnewseu', + 'series': 'TLDR News EU', + 'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$', + 'creator': 'TLDR News EU', + # '_old_archive_ids': ['nebula 63f64c74366fcd00017c1513', 'nebulasubscriptions 63f64c74366fcd00017c1513'], + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': 'm3u8', + }, + }, { + 'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw', + 'only_matching': True, + }] def _real_extract(self, url): - # extract the video's display ID from the URL (we'll retrieve the video ID later) - display_id = self._match_id(url) + slug = self._match_id(url) + url, smuggled_data = unsmuggle_url(url, {}) + if smuggled_data.get('id'): + return merge_dicts({ + 'id': smuggled_data['id'], + 'display_id': slug, + 'title': '', + }, self._extract_formats(smuggled_data['id'], slug)) - # retrieve Nebula authentication information - nebula_token = self._retrieve_nebula_auth(display_id) + metadata = self._call_api( + 'https://content.api.nebula.app/content/videos/{0}'.format(slug), + slug, note='Fetching video metadata') + return merge_dicts( + self._extract_video_metadata(metadata), + self._extract_formats(metadata['id'], slug), + rev=True + ) - # fetch video meta data from the Nebula API - api_key = self._retrieve_zype_api_key(url, display_id) - video_meta = self._fetch_zype_video_data(display_id, api_key) - video_id = video_meta['_id'] - # extract additional info - channel_title = self._extract_channel(video_meta) +class NebulaClassIE(NebulaBaseIE): + IE_NAME = 'nebula:media' + _VALID_URL = r'{0}/(?!(?:myshows|library|videos)/)(?P[\w-]+)/(?P[\w-]+)/?(?:$|[?#])'.format(_BASE_URL_RE) + _TESTS = [{ + 'url': 'https://nebula.tv/copyright-for-fun-and-profit/14', + 'info_dict': { + 'id': 'd7432cdc-c608-474d-942c-f74345daed7b', + 'ext': 'mp4', + 'display_id': '14', + 'channel_url': 'https://nebula.tv/copyright-for-fun-and-profit', + 'episode_number': 14, + 'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$', + 'uploader_url': 'https://nebula.tv/copyright-for-fun-and-profit', + 'duration': 646, + 'episode': 'Episode 14', + 'title': 'Photos, Sculpture, and Video', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': 'm3u8', + }, + 'skip': 'Only available for registered users', + }, { + 'add_ies': [Art19IE], + 'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town', + 'info_dict': { + 'ext': 'mp3', + 'id': '83ef3b53-049e-4211-b34e-7bb518e67d64', + 'description': r"re:(?s)20 years ago, what was previously the Soviet Union's .{467}#do-not-sell-my-info\.$", + 'series_id': 'e0223cfc-f39c-4ad4-8724-bd8731bd31b5', + 'modified_timestamp': 1629410982, + 'episode_id': '83ef3b53-049e-4211-b34e-7bb518e67d64', + 'series': 'Extremities', + # 'modified_date': '20200903', + 'upload_date': '20200902', + 'title': 'Pyramiden: The High-Arctic Soviet Ghost Town', + 'release_timestamp': 1571237958, + 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$', + 'duration': 1546.05714, + 'timestamp': 1599085555, + 'release_date': '20191016', + }, + }, { + 'url': 'https://nebula.tv/thelayover/the-layover-episode-1', + 'info_dict': { + 'ext': 'mp3', + 'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0', + 'episode_number': 1, + 'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$', + 'release_date': '20230304', + 'modified_date': '20230403', + 'series': 'The Layover', + 'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0', + 'modified_timestamp': 1680554566, + 'duration': 3130.46401, + 'release_timestamp': 1677943800, + 'title': 'The Layover — Episode 1', + 'series_id': '874303a5-4900-4626-a4b6-2aacac34466a', + 'upload_date': '20230303', + 'episode': 'Episode 1', + 'timestamp': 1677883672, + 'description': 'md5:002cca89258e3bc7c268d5b8c24ba482', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': 'm3u8', + }, + 'skip': 'Only available for registered users', + }] - # fetch the access token for Zype, then construct the video URL - zype_access_token = self._fetch_zype_access_token(display_id, nebula_token=nebula_token) - video_url = self._build_video_url(video_id, zype_access_token) + def _real_extract(self, url): + slug, episode = self._match_valid_url(url).group('id', 'ep') + url, smuggled_data = unsmuggle_url(url, {}) + if smuggled_data.get('id'): + return merge_dicts({ + 'id': smuggled_data['id'], + 'display_id': slug, + 'title': '', + }, self._extract_formats(smuggled_data['id'], slug)) - return { - 'id': video_id, - 'display_id': display_id, + metadata = self._call_api( + 'https://content.api.nebula.app/content/{0}/{1}/?include=lessons'.format( + slug, episode), + slug, note='Fetching class/podcast metadata') + content_type = traverse_obj(metadata, 'type') + if content_type == 'lesson': + return merge_dicts( + self._extract_video_metadata(metadata), + self._extract_formats(metadata['id'], slug)) + elif content_type == 'podcast_episode': + episode_url = metadata.get('episode_url') + if not episode_url and metadata.get('premium'): + self.raise_login_required() - # we're passing this video URL on to the 'Zype' extractor (that's the video infrastructure that Nebula is - # built on top of) and use the 'url_transparent' type to indicate that our meta data should be better than - # whatever the Zype extractor is able to identify - '_type': 'url_transparent', - 'ie_key': 'Zype', - 'url': video_url, + if Art19IE.suitable(episode_url): + return self.url_result(episode_url, Art19IE.ie_key()) + return merge_dicts({ + 'id': metadata['id'], + 'title': metadata['title'], + }, traverse_obj(metadata, { + 'url': ('episode_url', T(url_or_none)), + 'description': ('description', T(str_or_none)), + 'timestamp': ('published_at', T(parse_iso8601)), + 'duration': ('duration', T(int_or_none)), + 'channel_id': ('channel_id', T(str_or_none)), + 'channel': ('channel_title', T(str_or_none)), + 'thumbnail': ('assets', 'regular', T(url_or_none)), + })) - # the meta data we were able to extract from Nebula - 'title': video_meta.get('title'), - 'description': video_meta.get('description'), - 'timestamp': parse_iso8601(video_meta.get('published_at')), - 'thumbnails': [ - { - 'id': tn.get('name'), # this appears to be null in all cases I've encountered - 'url': tn['url'], - 'width': tn.get('width'), - 'height': tn.get('height'), - } for tn in video_meta.get('thumbnails', [])], - 'duration': video_meta.get('duration'), - 'channel': channel_title, - 'uploader': channel_title, # we chose here to declare the channel name as the 'uploader' -- that's certainly arguable, as sometimes it's more of a series - # TODO: uploader_url: the video page clearly links to this (in the example case: /lindsayellis), but I cannot figure out where it gets it from! - # TODO: channel_id - # TODO: channel_url - } + raise ExtractorError('Unexpected content type {0!r}'.format(content_type)) + + +class NebulaPlaylistBaseIE(NebulaBaseIE): + _BASE_API_URL = 'https://content.api.nebula.app/' + _API_QUERY = {'ordering': '-published_at'} + + @classmethod + def _get_api_url(cls, item_id, path='/video_episodes/'): + return update_url(cls._BASE_API_URL, path=path, query_update=cls._API_QUERY) + + @staticmethod + def _get_episode_url(episode, episode_id): + return 'https://nebula.tv/videos/{0}'.format(episode_id) + + @classmethod + def url_result(cls, url, *args, **kwargs): + url_transparent = kwargs.pop('url_transparent', False) + smuggled_data = kwargs.pop('smuggled_data', None) + if smuggled_data: + url = smuggle_url(url, smuggled_data) + ie_key = args[0] if len(args) > 0 else kwargs.get('ie_key') + if not ie_key: + args = (NebulaIE.ie_key(),) + args + return merge_dicts( + {'_type': 'url_transparent'} if url_transparent else {}, + super(NebulaPlaylistBaseIE, cls).url_result(url, *args), + **kwargs) + + def _generate_playlist_entries(self, pl_id=None, slug=None, dl_note=None): + next_url = self._get_api_url(pl_id) + if dl_note is None: + dl_note = self.IE_NAME.rpartition(':')[::2] + if dl_note[0] and dl_note[1]: + dl_note = '{0} '.format(dl_note[1]) + else: + dl_note = '' + slug = slug or pl_id + for page_num in itertools.count(1): + episodes = self._call_api( + next_url, slug, note='Retrieving {0}page {1}'.format( + dl_note, page_num)) + for episode in traverse_obj(episodes, ('results', Ellipsis)): + metadata = self._extract_video_metadata(episode) + yield self.url_result( + self._get_episode_url(episode, metadata['display_id']), + smuggled_data={'id': episode['id']}, url_transparent=True, + **metadata) + next_url = episodes.get('next') + if not next_url: + break + + +class NebulaSubscriptionsIE(NebulaPlaylistBaseIE): + IE_NAME = 'nebula:subscriptions' + _VALID_URL = r'{0}/myshows'.format(_BASE_URL_RE) + _API_QUERY = { + 'following': 'true', + 'include': 'engagement', + 'ordering': '-published_at', + } + _TESTS = [{ + 'url': 'https://nebula.tv/myshows', + 'playlist_mincount': 1, + 'info_dict': { + 'id': 'myshows', + }, + 'skip': 'You must be logged in to find your subscriptions', + }] + + def _call_api(self, *args, **kwargs): + + try: + return super(NebulaSubscriptionsIE, self)._call_api(*args, **kwargs) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 400: + self.raise_login_required('You must be logged in to find your subscriptions') + raise + + def _real_extract(self, url): + slug = url_basename(url) + return self.playlist_result(self._generate_playlist_entries(slug), slug) + + +class NebulaChannelIE(NebulaPlaylistBaseIE): + IE_NAME = 'nebula:channel' + _VALID_URL = r'{0}/(?!myshows|library|videos)(?P[\w-]+)/?(?:$|[?#])'.format(_BASE_URL_RE) + _TESTS = [{ + 'url': 'https://nebula.tv/tom-scott-presents-money', + 'info_dict': { + 'id': 'tom-scott-presents-money', + 'title': 'Tom Scott Presents: Money', + 'description': 'Tom Scott hosts a series all about trust, negotiation and money.', + }, + 'playlist_count': 5, + }, { + 'url': 'https://nebula.tv/lindsayellis', + 'info_dict': { + 'id': 'lindsayellis', + 'title': 'Lindsay Ellis', + 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.', + }, + 'playlist_mincount': 2, + }, { + 'url': 'https://nebula.tv/johnnyharris', + 'info_dict': { + 'id': 'johnnyharris', + 'title': 'Johnny Harris', + 'description': 'I make videos about maps and many other things.', + }, + 'playlist_mincount': 90, + }, { + 'url': 'https://nebula.tv/copyright-for-fun-and-profit', + 'info_dict': { + 'id': 'copyright-for-fun-and-profit', + 'title': 'Copyright for Fun and Profit', + 'description': 'md5:6690248223eed044a9f11cd5a24f9742', + }, + 'playlist_count': 23, + }, { + 'url': 'https://nebula.tv/trussissuespodcast', + 'info_dict': { + 'id': 'trussissuespodcast', + 'title': 'Bite the Ballot', + 'description': 'md5:a08c4483bc0b705881d3e0199e721385', + }, + 'playlist_mincount': 80, + }] + + @classmethod + def _get_api_url(cls, item_id, path='/video_channels/{0}/video_episodes/'): + return super(NebulaChannelIE, cls)._get_api_url( + item_id, path=path.format(item_id)) + + @classmethod + def _get_episode_url(cls, episode, episode_id): + return ( + episode.get('share_url') + or super(NebulaChannelIE, cls)._get_episode_url(episode, episode_id)) + + def _generate_class_entries(self, channel): + for lesson in traverse_obj(channel, ('lessons', Ellipsis)): + metadata = self._extract_video_metadata(lesson) + yield self.url_result( + lesson.get('share_url') or 'https://nebula.tv/{0}/{1}'.format( + metadata['class_slug'], metadata['slug']), + smuggled_data={'id': lesson['id']}, url_transparent=True, + **metadata) + + def _generate_podcast_entries(self, collection_id, collection_slug): + next_url = 'https://content.api.nebula.app/podcast_channels/{0}/podcast_episodes/?ordering=-published_at&premium=true'.format( + collection_id) + for page_num in itertools.count(1): + episodes = self._call_api(next_url, collection_slug, note='Retrieving podcast page {0}'.format(page_num)) + + for episode in traverse_obj(episodes, ('results', lambda _, v: url_or_none(v['share_url']))): + yield self.url_result(episode['share_url'], NebulaClassIE) + next_url = episodes.get('next') + if not next_url: + break + + def _real_extract(self, url): + collection_slug = self._match_id(url) + channel = self._call_api( + 'https://content.api.nebula.app/content/{0}/?include=lessons'.format( + collection_slug), + collection_slug, note='Retrieving channel') + + channel_type = traverse_obj(channel, 'type') + if channel_type == 'class': + entries = self._generate_class_entries(channel) + elif channel_type == 'podcast_channel': + entries = self._generate_podcast_entries(channel['id'], collection_slug) + else: + entries = self._generate_playlist_entries(channel['id'], collection_slug) + + return self.playlist_result( + entries, + playlist_id=collection_slug, + playlist_title=channel.get('title'), + playlist_description=channel.get('description'))