Update PR with back-port from its development in yt-dlp

This commit is contained in:
dirkf 2024-11-23 10:31:42 +00:00 committed by GitHub
parent a0f69f9526
commit ddbadd037f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 550 additions and 293 deletions

View File

@ -569,7 +569,6 @@
- **ndr:embed** - **ndr:embed**
- **ndr:embed:base** - **ndr:embed:base**
- **NDTV** - **NDTV**
- **Nebula**
- **NerdCubedFeed** - **NerdCubedFeed**
- **netease:album**: 网易云音乐 - 专辑 - **netease:album**: 网易云音乐 - 专辑
- **netease:djradio**: 网易云音乐 - 电台 - **netease:djradio**: 网易云音乐 - 电台

View File

@ -731,7 +731,12 @@ from .ndr import (
NJoyEmbedIE, NJoyEmbedIE,
) )
from .ndtv import NDTVIE from .ndtv import NDTVIE
from .nebula import NebulaIE from .nebula import (
NebulaIE,
NebulaChannelIE,
NebulaClassIE,
NebulaSubscriptionsIE,
)
from .nerdcubed import NerdCubedFeedIE from .nerdcubed import NerdCubedFeedIE
from .netzkino import NetzkinoIE from .netzkino import NetzkinoIE
from .neteasemusic import ( from .neteasemusic import (

View File

@ -1,320 +1,573 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json import itertools
import os
from .art19 import Art19IE
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote, compat_str from ..compat import (
from ..utils import parse_iso8601, ExtractorError, try_get, urljoin, sanitized_Request compat_HTTPError as HTTPError,
compat_kwargs,
compat_str as str,
)
from ..utils import (
ExtractorError,
int_or_none,
json_stringify,
# make_archive_id,
merge_dicts,
parse_iso8601,
smuggle_url,
str_or_none,
T,
traverse_obj,
try_call,
unsmuggle_url,
update_url,
url_basename,
url_or_none,
urljoin,
)
_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
class NebulaIE(InfoExtractor): class NebulaBaseIE(InfoExtractor):
"""
Nebula (https://watchnebula.com/) is a video platform created by the streamer community Standard. It hosts videos
off-YouTube from a small hand-picked group of creators.
All videos require a subscription to watch. There are no known freely available videos. An authentication token to
an account with a valid subscription can be specified in multiple ways, including credentials in .netrc or a cookie
jar.
As neither of these parameters appear to be supported by the unit test runner, it's recommended to set the envvar
NEBULA_TOKEN to execute the test runs.
Nebula uses the Zype video infrastructure and this extractor is using the 'url_transparent' mode to hand off
video extraction to the Zype extractor.
This description has been last updated on 2020-10-22.
"""
_VALID_URL = r'https?://(?:www\.)?watchnebula\.com/videos/(?P<id>[-\w]+)' # the 'id' group is actually the display_id, but we misname it 'id' to be able to use _match_id()
_TESTS = [
{
'url': 'https://watchnebula.com/videos/that-time-disney-remade-beauty-and-the-beast',
'md5': 'fe79c4df8b3aa2fea98a93d027465c7e',
'info_dict': {
'id': '5c271b40b13fd613090034fd',
'ext': 'mp4',
'title': 'That Time Disney Remade Beauty and the Beast',
'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We werent able to remove it without reducing video quality, so its presented here in its original context.',
'upload_date': '20180731',
'timestamp': 1533009600,
'channel': 'Lindsay Ellis',
'uploader': 'Lindsay Ellis',
}
},
{
'url': 'https://watchnebula.com/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
'md5': '6d4edd14ce65720fa63aba5c583fb328',
'info_dict': {
'id': '5e7e78171aaf320001fbd6be',
'ext': 'mp4',
'title': 'Landing Craft - How The Allies Got Ashore',
'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
'upload_date': '20200327',
'timestamp': 1585348140,
'channel': 'The Logistics of D-Day',
'uploader': 'The Logistics of D-Day',
}
},
{
'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
'md5': '8c7d272910eea320f6f8e6d3084eecf5',
'info_dict': {
'id': '5e779ebdd157bc0001d1c75a',
'ext': 'mp4',
'title': 'Episode 1: The Draw',
'description': r'contains:Theres free money on offer… if the players can all work together.',
'upload_date': '20200323',
'timestamp': 1584980400,
'channel': 'Tom Scott Presents: Money',
'uploader': 'Tom Scott Presents: Money',
}
},
]
_WORKING = True # FIXME: should this be set to False, to hide the tests from CI, given that the unit tests require an auth cookie of a (paid) subscription?
_NETRC_MACHINE = 'watchnebula' _NETRC_MACHINE = 'watchnebula'
_token = _api_token = None
def _perform_login(self, username, password, video_id): def _real_initialize(self):
""" self._login()
Log in to Nebula, authenticating using a given username and password.
Returns a Nebula token, as the frontend would store it in the def _login(self):
nebula-auth cookie. Or False, if authentication fails. if not self._api_token:
""" self._api_token = try_call(
data = json.dumps({'email': username, 'password': password}).encode('utf8') lambda: self._get_cookies('https://nebula.tv')['nebula_auth.apiToken'].value)
request = sanitized_Request(method='POST', self._token = self._download_json(
url='https://api.watchnebula.com/api/v1/auth/login/', 'https://users.api.nebula.app/api/v1/authorization/', None,
data=data, headers={'Authorization': 'Token {0}'.format(self._api_token)} if self._api_token else {},
headers={ note='Authorizing to Nebula', data=b'')['token']
'content-type': 'application/json', if self._token:
# Overwrite the cookie headers, because return
# submitting the 'sessionid' cookie
# always causes a 403 on auth endpoint
'cookie': ''})
response = self._download_json(request, fatal=False, video_id=video_id,
note='Authenticating to Nebula with supplied credentials',
errnote='Authentication failed or rejected')
if not response or 'key' not in response:
return False
return response['key']
def _retrieve_nebula_auth(self, video_id):
"""
Attempt to find a Nebula API token. Makes multiple attempts in the
following order:
a) login credentials used to authenticate to the Nebula login endpoint,
either from .netrc or specified using --username/--password
b) the --cookies supplied cookie jar
c) the NEBULA_TOKEN environment variable
d) the --video-password command line argument (this isn't documented in
the error message, because probably highly unpopular)
If none of these are successful, an end user-intended error message is
raised, listing some solutions.
Returns a Nebula API token, which subsequently can be used to make
authenticated calls to the Nebula API.
"""
nebula_token = None
# option #1: login credentials via .netrc or --username and --password
username, password = self._get_login_info() username, password = self._get_login_info()
if username and password: if username is None:
self.to_screen('Authenticating to Nebula using .netrc or command line-supplied credentials') return
nebula_token = self._perform_login(username, password, video_id) self._perform_login(username, password)
# option #2: nebula token via cookie jar def _perform_login(self, username, password):
if not nebula_token: try:
# TODO: is there a helper to do all this cookie extraction? response = self._download_json(
nebula_cookies = self._get_cookies('https://watchnebula.com') 'https://nebula.tv/auth/login/', None,
nebula_cookie = nebula_cookies.get('nebula-auth') 'Logging in to Nebula', 'Login failed',
if nebula_cookie: data=json_stringify({'email': username, 'password': password}),
self.to_screen('Authenticating to Nebula with credentials from cookie jar') headers={'content-type': 'application/json'})
nebula_cookie_value = compat_urllib_parse_unquote(nebula_cookie.value) except ExtractorError as e:
nebula_token = self._parse_json(nebula_cookie_value, video_id).get('apiToken') if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError('Login failed: Invalid username or password', expected=True)
raise
self._api_token = traverse_obj(response, ('key', T(str)))
if not self._api_token:
raise ExtractorError('Login failed: No token')
# option #3: nebula token via environment variable def _call_api(self, *args, **kwargs):
if not nebula_token and 'NEBULA_TOKEN' in os.environ:
nebula_token = os.environ.get('NEBULA_TOKEN')
if nebula_token:
self.to_screen('Authenticating to Nebula with token from NEBULA_TOKEN environment variable')
# option #4: nebula token via --videopassword def kwargs_set_token(kw):
if not nebula_token: kw.setdefault('headers', {})['Authorization'] = 'Bearer {0}'.format(self._token)
nebula_token = self._downloader.params.get('videopassword') return compat_kwargs(kw)
if nebula_token:
self.to_screen('Authenticating to Nebula with token from --videopassword')
if not nebula_token: if self._token:
raise ExtractorError('Nebula requires an account with an active subscription. ' kwargs = kwargs_set_token(kwargs)
'You can supply your authentication information by either ' try:
'a) storing your credentials in .netrc or supplying them via --username and --password, or ' return self._download_json(*args, **kwargs)
'b) passing in a cookie jar containing a nebula-auth cookie via --cookies, or ' except ExtractorError as e:
'c) setting the environment variable NEBULA_TOKEN.') if not isinstance(e.cause, HTTPError) or e.cause.status not in (401, 403):
return nebula_token raise
self.to_screen(
'Reauthorizing with Nebula and retrying, because last API '
'call resulted in error {0}'.format(e.cause.status))
self._real_initialize()
if self._token:
kwargs = kwargs_set_token(kwargs)
return self._download_json(*args, **kwargs)
def _retrieve_zype_api_key(self, page_url, display_id): def _extract_formats(self, content_id, slug):
""" for retry in (False, True):
Retrieves the Zype API key required to make calls to the Zype API. try:
# fmts, subs = self._extract_m3u8_formats_and_subtitles(
fmts, subs = self._extract_m3u8_formats(
'https://content.api.nebula.app/{0}s/{1}/manifest.m3u8'.format(
content_id.split(':', 1)[0], content_id),
slug, 'mp4', query={
'token': self._token,
'app_version': '23.10.0',
'platform': 'ios',
}), {}
self._sort_formats(fmts)
return {'formats': fmts, 'subtitles': subs}
except ExtractorError as e:
if not isinstance(e.cause, HTTPError):
raise
if e.cause.status == 401:
self.raise_login_required()
if not retry and e.cause.status == 403:
self.to_screen('Reauthorizing with Nebula and retrying, because fetching video resulted in error')
self._real_initialize()
continue
raise
Unfortunately, the Nebula frontend stores this as a JS object literal in one of its JS chunks, def _extract_video_metadata(self, episode):
looking somewhat like this (but minified): channel_url = traverse_obj(
episode, (('channel_slug', 'class_slug'), T(lambda u: urljoin('https://nebula.tv/', u))), get_all=False)
return merge_dicts({
'id': episode['id'].partition(':')[2],
'title': episode['title'],
'channel_url': channel_url,
'uploader_url': channel_url,
}, traverse_obj(episode, {
'display_id': 'slug',
'description': 'description',
'timestamp': ('published_at', T(parse_iso8601)),
'duration': ('duration', T(int_or_none)),
'channel_id': 'channel_slug',
'uploader_id': 'channel_slug',
'channel': 'channel_title',
'uploader': 'channel_title',
'series': 'channel_title',
'creator': 'channel_title',
'thumbnail': ('images', 'thumbnail', 'src', T(url_or_none)),
'episode_number': ('order', {int_or_none}),
# Old code was wrongly setting extractor_key from NebulaSubscriptionsIE
# '_old_archive_ids': ('zype_id', {lambda x: [
# make_archive_id(NebulaIE, x), make_archive_id(NebulaSubscriptionsIE, x)] if x else None}),
}))
return {
NODE_ENV: "production",
REACT_APP_NAME: "Nebula",
REACT_APP_NEBULA_API: "https://api.watchnebula.com/api/v1/",
REACT_APP_ZYPE_API: "https://api.zype.com/",
REACT_APP_ZYPE_API_KEY: "<redacted>",
REACT_APP_ZYPE_APP_KEY: "<redacted>",
// ...
}
So we have to find the reference to the chunk in the video page (as it is hashed and the hash will class NebulaIE(NebulaBaseIE):
change when they do a new release), then download the chunk and extract the API key from there, IE_NAME = 'nebula:video'
hoping they won't rename the constant. _VALID_URL = r'{0}/videos/(?P<id>[\w-]+)'.format(_BASE_URL_RE)
_TESTS = [{
Alternatively, it is currently hardcoded and shared among all users. We haven't seen it 'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
change so far, so we could also just hardcode it in the extractor as a fallback. 'info_dict': {
""" 'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
# fetch the video page 'ext': 'mp4',
webpage = self._download_webpage(page_url, video_id=display_id) 'title': 'That Time Disney Remade Beauty and the Beast',
'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We werent able to remove it without reducing video quality, so its presented here in its original context.',
# find the script tag with a file named 'main.<hash>.chunk.js' in there 'upload_date': '20180731',
main_script_relpath = self._search_regex( 'timestamp': 1533009600,
r'<script[^>]*src="(?P<script_relpath>[^"]*main.[0-9a-f]*.chunk.js)"[^>]*>', webpage, 'channel': 'Lindsay Ellis',
group='script_relpath', name='script relative path', fatal=True) 'channel_id': 'lindsayellis',
'uploader': 'Lindsay Ellis',
# fetch the JS chunk 'uploader_id': 'lindsayellis',
main_script_abspath = urljoin(page_url, main_script_relpath) 'uploader_url': r're:https://nebula\.(tv|app)/lindsayellis',
main_script = self._download_webpage(main_script_abspath, video_id=display_id, 'series': 'Lindsay Ellis',
note='Retrieving Zype API key') 'display_id': 'that-time-disney-remade-beauty-and-the-beast',
'channel_url': r're:https://nebula\.(tv|app)/lindsayellis',
# find the API key named 'REACT_APP_ZYPE_API_KEY' in there 'creator': 'Lindsay Ellis',
api_key = self._search_regex( 'duration': 2212,
r'REACT_APP_ZYPE_API_KEY\s*:\s*"(?P<api_key>[\w-]*)"', main_script, 'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
group='api_key', name='API key', fatal=True) # '_old_archive_ids': ['nebula 5c271b40b13fd613090034fd', 'nebulasubscriptions 5c271b40b13fd613090034fd'],
},
return api_key 'params': {
'format': 'bestvideo',
def _call_zype_api(self, path, params, video_id, api_key, note): 'skip_download': 'm3u8',
""" },
A helper for making calls to the Zype API. }, {
""" 'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
query = {'api_key': api_key, 'per_page': 1} 'md5': 'd05739cf6c38c09322422f696b569c23',
query.update(params) 'info_dict': {
return self._download_json('https://api.zype.com' + path, video_id, query=query, note=note) 'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
'ext': 'mp4',
def _fetch_zype_video_data(self, display_id, api_key): 'title': 'Landing Craft - How The Allies Got Ashore',
""" 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
Fetch video meta data from the Zype API. 'upload_date': '20200327',
""" 'timestamp': 1585348140,
response = self._call_zype_api('/videos', {'friendly_title': display_id}, 'channel': 'Real Engineering — The Logistics of D-Day',
display_id, api_key, note='Retrieving metadata from Zype') 'channel_id': 'd-day',
if 'response' not in response or len(response['response']) != 1: 'uploader': 'Real Engineering — The Logistics of D-Day',
raise ExtractorError('Unable to find video on Zype API') 'uploader_id': 'd-day',
return response['response'][0] 'series': 'Real Engineering — The Logistics of D-Day',
'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
def _call_nebula_api(self, path, video_id, access_token, note): 'creator': 'Real Engineering — The Logistics of D-Day',
""" 'duration': 841,
A helper for making calls to the Nebula API. 'channel_url': 'https://nebula.tv/d-day',
""" 'uploader_url': 'https://nebula.tv/d-day',
return self._download_json('https://api.watchnebula.com/api/v1' + path, video_id, headers={ 'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
'Authorization': 'Token {access_token}'.format(access_token=access_token) # '_old_archive_ids': ['nebula 5e7e78171aaf320001fbd6be', 'nebulasubscriptions 5e7e78171aaf320001fbd6be'],
}, note=note) },
'params': {
def _fetch_zype_access_token(self, video_id, nebula_token): 'format': 'bestvideo',
""" 'skip_download': 'm3u8',
Requests a Zype access token from the Nebula API. },
""" 'skip': 'Only available for registered users',
user_object = self._call_nebula_api('/auth/user/', video_id, nebula_token, note='Retrieving Zype access token') }, {
access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], compat_str) 'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
if not access_token: 'md5': 'ebe28a7ad822b9ee172387d860487868',
if try_get(user_object, lambda x: x['is_subscribed'], bool): 'info_dict': {
raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint, please try loading an arbitrary video in a browser with this account to ''prime'' it for video downloading') 'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint') 'ext': 'mp4',
return access_token 'title': 'Episode 1: The Draw',
'description': r'contains:Theres free money on offer… if the players can all work together.',
def _build_video_url(self, video_id, zype_access_token): 'upload_date': '20200323',
""" 'timestamp': 1584980400,
Construct a Zype video URL (as supported by the Zype extractor), given a Zype video ID and a Zype access token. 'channel': 'Tom Scott Presents: Money',
""" 'channel_id': 'tom-scott-presents-money',
return 'https://player.zype.com/embed/{video_id}.html?access_token={access_token}'.format( 'uploader': 'Tom Scott Presents: Money',
video_id=video_id, 'uploader_id': 'tom-scott-presents-money',
access_token=zype_access_token) 'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
'duration': 825,
def _extract_channel(self, video_meta): 'channel_url': 'https://nebula.tv/tom-scott-presents-money',
""" 'series': 'Tom Scott Presents: Money',
Extract the channel title, by going through the list of categories and finding the first value of the 'display_id': 'money-episode-1-the-draw',
first category that has a value. 'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
# '_old_archive_ids': ['nebula 5e779ebdd157bc0001d1c75a', 'nebulasubscriptions 5e779ebdd157bc0001d1c75a'],
I know this look like a terrible approach. But actually, it's just reproducing the behavior of the },
React code the Nebula frontend uses (as of 2020-04-07): 'params': {
'format': 'bestvideo',
let channel; 'skip_download': 'm3u8',
if (video && video.categories && video.categories.length) { },
const channelTitle = video.categories.map((category) => (category.value[0])) 'skip': 'Only available for registered users',
.filter((title) => (!!title))[0]; }, {
channel = getChannelByTitle(state, { title: channelTitle }); 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
} 'only_matching': True,
}, {
Basically, it finds the first (truthy) value in the category list and that's assumed to be the 'url': 'https://nebula.tv/videos/tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
channel title. And then the channel details (e.g. the URL) are looked up by title (!) (not by any 'info_dict': {
kind of ID) via an additional API call. 'id': 'e389af9d-1dab-44f2-8788-ee24deb7ff0d',
'ext': 'mp4',
TODO: Implement the API calls giving us the channel list, so that we can do the title lookup and then figure out the channel URL 'display_id': 'tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
'title': 'Did the US Really Blow Up the NordStream Pipelines?',
May return None of no category list could be found or no category had a label ('value'). 'description': 'md5:b4e2a14e3ff08f546a3209c75261e789',
""" 'upload_date': '20230223',
categories = video_meta.get('categories', []) if video_meta else [] 'timestamp': 1677144070,
for category in categories: 'channel': 'TLDR News EU',
if category.get('value'): # we're intentionally not using "'value' in category" here, because the expression is supposed to be falsy for empty lists in category['value'] as well! 'channel_id': 'tldrnewseu',
return category['value'][0] 'uploader': 'TLDR News EU',
'uploader_id': 'tldrnewseu',
'uploader_url': r're:https://nebula\.(tv|app)/tldrnewseu',
'duration': 524,
'channel_url': r're:https://nebula\.(tv|app)/tldrnewseu',
'series': 'TLDR News EU',
'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
'creator': 'TLDR News EU',
# '_old_archive_ids': ['nebula 63f64c74366fcd00017c1513', 'nebulasubscriptions 63f64c74366fcd00017c1513'],
},
'params': {
'format': 'bestvideo',
'skip_download': 'm3u8',
},
}, {
'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
# extract the video's display ID from the URL (we'll retrieve the video ID later) slug = self._match_id(url)
display_id = self._match_id(url) url, smuggled_data = unsmuggle_url(url, {})
if smuggled_data.get('id'):
return merge_dicts({
'id': smuggled_data['id'],
'display_id': slug,
'title': '',
}, self._extract_formats(smuggled_data['id'], slug))
# retrieve Nebula authentication information metadata = self._call_api(
nebula_token = self._retrieve_nebula_auth(display_id) 'https://content.api.nebula.app/content/videos/{0}'.format(slug),
slug, note='Fetching video metadata')
return merge_dicts(
self._extract_video_metadata(metadata),
self._extract_formats(metadata['id'], slug),
rev=True
)
# fetch video meta data from the Nebula API
api_key = self._retrieve_zype_api_key(url, display_id)
video_meta = self._fetch_zype_video_data(display_id, api_key)
video_id = video_meta['_id']
# extract additional info class NebulaClassIE(NebulaBaseIE):
channel_title = self._extract_channel(video_meta) IE_NAME = 'nebula:media'
_VALID_URL = r'{0}/(?!(?:myshows|library|videos)/)(?P<id>[\w-]+)/(?P<ep>[\w-]+)/?(?:$|[?#])'.format(_BASE_URL_RE)
_TESTS = [{
'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
'info_dict': {
'id': 'd7432cdc-c608-474d-942c-f74345daed7b',
'ext': 'mp4',
'display_id': '14',
'channel_url': 'https://nebula.tv/copyright-for-fun-and-profit',
'episode_number': 14,
'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
'uploader_url': 'https://nebula.tv/copyright-for-fun-and-profit',
'duration': 646,
'episode': 'Episode 14',
'title': 'Photos, Sculpture, and Video',
},
'params': {
'format': 'bestvideo',
'skip_download': 'm3u8',
},
'skip': 'Only available for registered users',
}, {
'add_ies': [Art19IE],
'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
'info_dict': {
'ext': 'mp3',
'id': '83ef3b53-049e-4211-b34e-7bb518e67d64',
'description': r"re:(?s)20 years ago, what was previously the Soviet Union's .{467}#do-not-sell-my-info\.$",
'series_id': 'e0223cfc-f39c-4ad4-8724-bd8731bd31b5',
'modified_timestamp': 1629410982,
'episode_id': '83ef3b53-049e-4211-b34e-7bb518e67d64',
'series': 'Extremities',
# 'modified_date': '20200903',
'upload_date': '20200902',
'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
'release_timestamp': 1571237958,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'duration': 1546.05714,
'timestamp': 1599085555,
'release_date': '20191016',
},
}, {
'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
'info_dict': {
'ext': 'mp3',
'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
'episode_number': 1,
'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
'release_date': '20230304',
'modified_date': '20230403',
'series': 'The Layover',
'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
'modified_timestamp': 1680554566,
'duration': 3130.46401,
'release_timestamp': 1677943800,
'title': 'The Layover — Episode 1',
'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
'upload_date': '20230303',
'episode': 'Episode 1',
'timestamp': 1677883672,
'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
},
'params': {
'format': 'bestvideo',
'skip_download': 'm3u8',
},
'skip': 'Only available for registered users',
}]
# fetch the access token for Zype, then construct the video URL def _real_extract(self, url):
zype_access_token = self._fetch_zype_access_token(display_id, nebula_token=nebula_token) slug, episode = self._match_valid_url(url).group('id', 'ep')
video_url = self._build_video_url(video_id, zype_access_token) url, smuggled_data = unsmuggle_url(url, {})
if smuggled_data.get('id'):
return merge_dicts({
'id': smuggled_data['id'],
'display_id': slug,
'title': '',
}, self._extract_formats(smuggled_data['id'], slug))
return { metadata = self._call_api(
'id': video_id, 'https://content.api.nebula.app/content/{0}/{1}/?include=lessons'.format(
'display_id': display_id, slug, episode),
slug, note='Fetching class/podcast metadata')
content_type = traverse_obj(metadata, 'type')
if content_type == 'lesson':
return merge_dicts(
self._extract_video_metadata(metadata),
self._extract_formats(metadata['id'], slug))
elif content_type == 'podcast_episode':
episode_url = metadata.get('episode_url')
if not episode_url and metadata.get('premium'):
self.raise_login_required()
# we're passing this video URL on to the 'Zype' extractor (that's the video infrastructure that Nebula is if Art19IE.suitable(episode_url):
# built on top of) and use the 'url_transparent' type to indicate that our meta data should be better than return self.url_result(episode_url, Art19IE.ie_key())
# whatever the Zype extractor is able to identify return merge_dicts({
'_type': 'url_transparent', 'id': metadata['id'],
'ie_key': 'Zype', 'title': metadata['title'],
'url': video_url, }, traverse_obj(metadata, {
'url': ('episode_url', T(url_or_none)),
'description': ('description', T(str_or_none)),
'timestamp': ('published_at', T(parse_iso8601)),
'duration': ('duration', T(int_or_none)),
'channel_id': ('channel_id', T(str_or_none)),
'channel': ('channel_title', T(str_or_none)),
'thumbnail': ('assets', 'regular', T(url_or_none)),
}))
# the meta data we were able to extract from Nebula raise ExtractorError('Unexpected content type {0!r}'.format(content_type))
'title': video_meta.get('title'),
'description': video_meta.get('description'),
'timestamp': parse_iso8601(video_meta.get('published_at')), class NebulaPlaylistBaseIE(NebulaBaseIE):
'thumbnails': [ _BASE_API_URL = 'https://content.api.nebula.app/'
{ _API_QUERY = {'ordering': '-published_at'}
'id': tn.get('name'), # this appears to be null in all cases I've encountered
'url': tn['url'], @classmethod
'width': tn.get('width'), def _get_api_url(cls, item_id, path='/video_episodes/'):
'height': tn.get('height'), return update_url(cls._BASE_API_URL, path=path, query_update=cls._API_QUERY)
} for tn in video_meta.get('thumbnails', [])],
'duration': video_meta.get('duration'), @staticmethod
'channel': channel_title, def _get_episode_url(episode, episode_id):
'uploader': channel_title, # we chose here to declare the channel name as the 'uploader' -- that's certainly arguable, as sometimes it's more of a series return 'https://nebula.tv/videos/{0}'.format(episode_id)
# TODO: uploader_url: the video page clearly links to this (in the example case: /lindsayellis), but I cannot figure out where it gets it from!
# TODO: channel_id @classmethod
# TODO: channel_url def url_result(cls, url, *args, **kwargs):
} url_transparent = kwargs.pop('url_transparent', False)
smuggled_data = kwargs.pop('smuggled_data', None)
if smuggled_data:
url = smuggle_url(url, smuggled_data)
ie_key = args[0] if len(args) > 0 else kwargs.get('ie_key')
if not ie_key:
args = (NebulaIE.ie_key(),) + args
return merge_dicts(
{'_type': 'url_transparent'} if url_transparent else {},
super(NebulaPlaylistBaseIE, cls).url_result(url, *args),
**kwargs)
def _generate_playlist_entries(self, pl_id=None, slug=None, dl_note=None):
next_url = self._get_api_url(pl_id)
if dl_note is None:
dl_note = self.IE_NAME.rpartition(':')[::2]
if dl_note[0] and dl_note[1]:
dl_note = '{0} '.format(dl_note[1])
else:
dl_note = ''
slug = slug or pl_id
for page_num in itertools.count(1):
episodes = self._call_api(
next_url, slug, note='Retrieving {0}page {1}'.format(
dl_note, page_num))
for episode in traverse_obj(episodes, ('results', Ellipsis)):
metadata = self._extract_video_metadata(episode)
yield self.url_result(
self._get_episode_url(episode, metadata['display_id']),
smuggled_data={'id': episode['id']}, url_transparent=True,
**metadata)
next_url = episodes.get('next')
if not next_url:
break
class NebulaSubscriptionsIE(NebulaPlaylistBaseIE):
IE_NAME = 'nebula:subscriptions'
_VALID_URL = r'{0}/myshows'.format(_BASE_URL_RE)
_API_QUERY = {
'following': 'true',
'include': 'engagement',
'ordering': '-published_at',
}
_TESTS = [{
'url': 'https://nebula.tv/myshows',
'playlist_mincount': 1,
'info_dict': {
'id': 'myshows',
},
'skip': 'You must be logged in to find your subscriptions',
}]
def _call_api(self, *args, **kwargs):
try:
return super(NebulaSubscriptionsIE, self)._call_api(*args, **kwargs)
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
self.raise_login_required('You must be logged in to find your subscriptions')
raise
def _real_extract(self, url):
slug = url_basename(url)
return self.playlist_result(self._generate_playlist_entries(slug), slug)
class NebulaChannelIE(NebulaPlaylistBaseIE):
IE_NAME = 'nebula:channel'
_VALID_URL = r'{0}/(?!myshows|library|videos)(?P<id>[\w-]+)/?(?:$|[?#])'.format(_BASE_URL_RE)
_TESTS = [{
'url': 'https://nebula.tv/tom-scott-presents-money',
'info_dict': {
'id': 'tom-scott-presents-money',
'title': 'Tom Scott Presents: Money',
'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
},
'playlist_count': 5,
}, {
'url': 'https://nebula.tv/lindsayellis',
'info_dict': {
'id': 'lindsayellis',
'title': 'Lindsay Ellis',
'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
},
'playlist_mincount': 2,
}, {
'url': 'https://nebula.tv/johnnyharris',
'info_dict': {
'id': 'johnnyharris',
'title': 'Johnny Harris',
'description': 'I make videos about maps and many other things.',
},
'playlist_mincount': 90,
}, {
'url': 'https://nebula.tv/copyright-for-fun-and-profit',
'info_dict': {
'id': 'copyright-for-fun-and-profit',
'title': 'Copyright for Fun and Profit',
'description': 'md5:6690248223eed044a9f11cd5a24f9742',
},
'playlist_count': 23,
}, {
'url': 'https://nebula.tv/trussissuespodcast',
'info_dict': {
'id': 'trussissuespodcast',
'title': 'Bite the Ballot',
'description': 'md5:a08c4483bc0b705881d3e0199e721385',
},
'playlist_mincount': 80,
}]
@classmethod
def _get_api_url(cls, item_id, path='/video_channels/{0}/video_episodes/'):
return super(NebulaChannelIE, cls)._get_api_url(
item_id, path=path.format(item_id))
@classmethod
def _get_episode_url(cls, episode, episode_id):
return (
episode.get('share_url')
or super(NebulaChannelIE, cls)._get_episode_url(episode, episode_id))
def _generate_class_entries(self, channel):
for lesson in traverse_obj(channel, ('lessons', Ellipsis)):
metadata = self._extract_video_metadata(lesson)
yield self.url_result(
lesson.get('share_url') or 'https://nebula.tv/{0}/{1}'.format(
metadata['class_slug'], metadata['slug']),
smuggled_data={'id': lesson['id']}, url_transparent=True,
**metadata)
def _generate_podcast_entries(self, collection_id, collection_slug):
next_url = 'https://content.api.nebula.app/podcast_channels/{0}/podcast_episodes/?ordering=-published_at&premium=true'.format(
collection_id)
for page_num in itertools.count(1):
episodes = self._call_api(next_url, collection_slug, note='Retrieving podcast page {0}'.format(page_num))
for episode in traverse_obj(episodes, ('results', lambda _, v: url_or_none(v['share_url']))):
yield self.url_result(episode['share_url'], NebulaClassIE)
next_url = episodes.get('next')
if not next_url:
break
def _real_extract(self, url):
collection_slug = self._match_id(url)
channel = self._call_api(
'https://content.api.nebula.app/content/{0}/?include=lessons'.format(
collection_slug),
collection_slug, note='Retrieving channel')
channel_type = traverse_obj(channel, 'type')
if channel_type == 'class':
entries = self._generate_class_entries(channel)
elif channel_type == 'podcast_channel':
entries = self._generate_podcast_entries(channel['id'], collection_slug)
else:
entries = self._generate_playlist_entries(channel['id'], collection_slug)
return self.playlist_result(
entries,
playlist_id=collection_slug,
playlist_title=channel.get('title'),
playlist_description=channel.get('description'))