Compare commits

...

8 Commits

Author SHA1 Message Date
Nikhil Chelliah
89896d2c58
Merge ce604ae7dce406aaf8fde8f25ee40bd390255b58 into 673277e510ebd996b62a2fcc76169bf3cce29910 2025-03-08 23:46:08 +00:00
dirkf
673277e510
[YouTube] Fix 91b1569 2025-02-28 01:02:20 +00:00
dirkf
91b1569f68
[YouTube] Fix channel playlist extraction (#33074)
* [YouTube] Extract playlist items from LOCKUP_VIEW_MODEL_...
* resolves #33073
* thx seproDev (yt-dlp/yt-dlp#11615)

Co-authored-by: sepro <sepro@sepr0.com>
2025-02-28 00:02:10 +00:00
nikhil
ce604ae7dc Support _non_-tokenized source URLs too 2021-08-03 00:16:49 -04:00
nikhil
b675a6e6b9 Satisfy flake8, coding conventions, tests 2021-08-02 23:27:43 -04:00
nikhil
b511872fbf Unset ffmpeg's -seekable, -http_seekable, and -icy flags 2021-08-02 20:39:20 -04:00
nikhil
236e3d30ba More fixes 2021-08-01 18:45:06 -04:00
nikhil
e7f4793d4d [extractor/nbc] Fix NBC Olympics extractor 2021-07-29 11:31:27 -04:00
3 changed files with 136 additions and 27 deletions

View File

@ -393,6 +393,19 @@ class FFmpegFD(ExternalFD):
# https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
# http://trac.ffmpeg.org/ticket/6125#comment:10
args += ['-seekable', '1' if seekable else '0']
http_seekable = info_dict.get('_http_seekable')
if http_seekable is not None:
# setting -http_seekable prevents ffmpeg from guessing if the server
# supports seeking in other kinds of requests (by adding the same header
# as above: `Range: bytes=0-`)
args += ['-http_seekable', '1' if http_seekable else '0']
icy = info_dict.get('_icy')
if icy is not None:
# setting -icy 0 prevents ffmpeg from sending the header `Icy-Metadata: 1`,
# which can cause also problems
# https://github.com/ytdl-org/youtube-dl/pull/29688
# https://trac.ffmpeg.org/ticket/5460#comment:5
args += ['-icy', '1' if icy else '0']
args += self._configuration_args()

View File

@ -477,43 +477,94 @@ class NBCOlympicsIE(InfoExtractor):
class NBCOlympicsStreamIE(AdobePassIE):
IE_NAME = 'nbcolympics:stream'
_VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
_TEST = {
'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8',
_TESTS = [
# "Tokenized" .m3u8 source URL
{
'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11',
'info_dict': {
'id': '203493',
'id': '2019740',
'ext': 'mp4',
'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'title': r"re:Women's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$",
},
'params': {
# m3u8 download
'skip_download': True,
},
}
},
# Plain .m3u8 source URL
{
'url': 'https://stream.nbcolympics.com/gymnastics-event-finals-mens-floor-pommel-horse-womens-vault-bars',
'info_dict': {
'id': '2021729',
'ext': 'mp4',
'title': r're:Event Finals: M Floor, W Vault, M Pommel, W Uneven Bars [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
},
'params': {
# m3u8 download
'skip_download': True,
},
},
]
_DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json'
_LEAP_URL_TEMPLATE = 'https://api-leap.nbcsports.com/feeds/assets/%s?application=NBCOlympics&platform=%s&format=nbc-player&env=staging'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid')
resource = self._search_regex(
r"resource\s*=\s*'(.+)';", webpage,
'resource').replace("' + pid + '", pid)
event_config = self._download_json(
self._DATA_URL_TEMPLATE % ('event_config', pid),
pid)['eventConfig']
pid,
'Downloading event config',
)['eventConfig']
resource = event_config.get('resourceId', 'NBCOlympics')
title = self._live_title(event_config['eventTitle'])
source_url = self._download_json(
self._DATA_URL_TEMPLATE % ('live_sources', pid),
pid)['videoSources'][0]['sourceUrl']
leap_config = self._download_json(
self._LEAP_URL_TEMPLATE % (pid, 'desktop'),
pid,
'Downloading leap config',
)
source_url = leap_config['videoSources'][0]['cdnSources']['primary'][0]['sourceUrl']
ap_resource = self._get_mvpd_resource(
resource,
re.sub(r'[^\w\d ]+', '', event_config['eventTitle']),
pid,
event_config.get('ratingId', 'NO VALUE'),
)
media_token = self._extract_mvpd_auth(
url, pid, event_config.get('requestorId', 'NBCOlympics'), resource)
formats = self._extract_m3u8_formats(self._download_webpage(
'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={
url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource)
if event_config.get('cdnToken') is True:
source_url = self._download_json(
'https://tokens.playmakerservices.com/',
pid,
'Retrieving tokenized URL',
data=json.dumps({
'application': 'NBCSports',
'authentication-type': 'adobe-pass',
'cdn': 'akamai',
'mediaToken': base64.b64encode(media_token.encode()),
'resource': base64.b64encode(resource.encode()),
# Indicates that the player communicates its token not via the path
# but via a cookie? NBC's player specifies `'false'` but field just
# doesn't seem to have an effect.
# 'inPath': 'false',
'pid': pid,
'platform': 'desktop',
'requestorId': 'NBCOlympics',
'resourceId': base64.b64encode(ap_resource.encode()).decode(),
'token': base64.b64encode(media_token.encode()).decode(),
'url': source_url,
}), pid, 'mp4')
'version': 'v1',
}).encode(),
)['akamai'][0]['tokenizedUrl']
formats = self._extract_m3u8_formats(source_url, pid, 'mp4')
for f in formats:
f['_seekable'] = False
f['_http_seekable'] = False
f['_icy'] = False
self._sort_formats(formats)
return {

View File

@ -27,6 +27,7 @@ from ..compat import (
)
from ..jsinterp import JSInterpreter
from ..utils import (
bug_reports_message,
clean_html,
dict_get,
error_to_compat_str,
@ -65,6 +66,7 @@ from ..utils import (
url_or_none,
urlencode_postdata,
urljoin,
variadic,
)
@ -460,6 +462,26 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'uploader': uploader,
}
@staticmethod
def _extract_thumbnails(data, *path_list, **kw_final_key):
"""
Extract thumbnails from thumbnails dict
@param path_list: path list to level that contains 'thumbnails' key
"""
final_key = kw_final_key.get('final_key', 'thumbnails')
return traverse_obj(data, ((
tuple(variadic(path) + (final_key, Ellipsis)
for path in path_list or [()])), {
'url': ('url', T(url_or_none),
# Sometimes youtube gives a wrong thumbnail URL. See:
# https://github.com/yt-dlp/yt-dlp/issues/233
# https://github.com/ytdl-org/youtube-dl/issues/28023
T(lambda u: update_url(u, query=None) if u and 'maxresdefault' in u else u)),
'height': ('height', T(int_or_none)),
'width': ('width', T(int_or_none)),
}, T(lambda t: t if t.get('url') else None)))
def _search_results(self, query, params):
data = {
'context': {
@ -3183,8 +3205,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
expected_type=txt_or_none)
def _grid_entries(self, grid_renderer):
for item in grid_renderer['items']:
if not isinstance(item, dict):
for item in traverse_obj(grid_renderer, ('items', Ellipsis, T(dict))):
lockup_view_model = traverse_obj(item, ('lockupViewModel', T(dict)))
if lockup_view_model:
entry = self._extract_lockup_view_model(lockup_view_model)
if entry:
yield entry
continue
renderer = self._extract_grid_item_renderer(item)
if not isinstance(renderer, dict):
@ -3268,6 +3294,25 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
continue
yield self._extract_video(renderer)
def _extract_lockup_view_model(self, view_model):
content_id = view_model.get('contentId')
if not content_id:
return
content_type = view_model.get('contentType')
if content_type not in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'):
self.report_warning(
'Unsupported lockup view model content type "{0}"{1}'.format(content_type, bug_reports_message()), only_once=True)
return
return merge_dicts(self.url_result(
update_url_query('https://www.youtube.com/playlist', {'list': content_id}),
ie=YoutubeTabIE.ie_key(), video_id=content_id), {
'title': traverse_obj(view_model, (
'metadata', 'lockupMetadataViewModel', 'title', 'content', T(compat_str))),
'thumbnails': self._extract_thumbnails(view_model, (
'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail',
'thumbnailViewModel', 'image'), final_key='sources'),
})
def _video_entry(self, video_renderer):
video_id = video_renderer.get('videoId')
if video_id: