mirror of
https://github.com/ytdl-org/youtube-dl
synced 2024-12-23 04:30:10 +09:00
Compare commits
6 Commits
76fe4ba3b2
...
90a271e914
Author | SHA1 | Date | |
---|---|---|---|
|
90a271e914 | ||
|
172d270607 | ||
|
22feed08a1 | ||
|
942b8ca3be | ||
|
3729c52f9d | ||
|
71679eaee8 |
@ -96,7 +96,10 @@ class CNNIE(TurnerBaseIE):
|
||||
config['data_src'] % path, page_title, {
|
||||
'default': {
|
||||
'media_src': config['media_src'],
|
||||
}
|
||||
},
|
||||
'f4m': {
|
||||
'host': 'cnn-vh.akamaihd.net',
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
|
@ -2605,6 +2605,13 @@ class InfoExtractor(object):
|
||||
return entries
|
||||
|
||||
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
||||
signed = 'hdnea=' in manifest_url
|
||||
if not signed:
|
||||
# https://learn.akamai.com/en-us/webhelp/media-services-on-demand/stream-packaging-user-guide/GUID-BE6C0F73-1E06-483B-B0EA-57984B91B7F9.html
|
||||
manifest_url = re.sub(
|
||||
r'(?:b=[\d,-]+|(?:__a__|attributes)=off|__b__=\d+)&?',
|
||||
'', manifest_url).strip('?')
|
||||
|
||||
formats = []
|
||||
|
||||
hdcore_sign = 'hdcore=3.7.0'
|
||||
@ -2630,7 +2637,7 @@ class InfoExtractor(object):
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
http_host = hosts.get('http')
|
||||
if http_host and m3u8_formats and 'hdnea=' not in m3u8_url:
|
||||
if http_host and m3u8_formats and not signed:
|
||||
REPL_REGEX = r'https?://[^/]+/i/([^,]+),([^/]+),([^/]+)\.csmil/.+'
|
||||
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
|
||||
qualities_length = len(qualities)
|
||||
|
@ -679,7 +679,14 @@ from .nationalgeographic import (
|
||||
NationalGeographicTVIE,
|
||||
)
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nba import (
|
||||
NBAWatchEmbedIE,
|
||||
NBAWatchIE,
|
||||
NBAWatchCollectionIE,
|
||||
NBAEmbedIE,
|
||||
NBAIE,
|
||||
NBAChannelIE,
|
||||
)
|
||||
from .nbc import (
|
||||
CSNNEIE,
|
||||
NBCIE,
|
||||
|
@ -35,6 +35,7 @@ from ..utils import (
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_or_none,
|
||||
xpath_attr,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
@ -217,6 +218,33 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
}],
|
||||
},
|
||||
# RSS feed with item with description and thumbnails
|
||||
{
|
||||
'url': 'https://anchor.fm/s/dd00e14/podcast/rss',
|
||||
'info_dict': {
|
||||
'id': 'https://anchor.fm/s/dd00e14/podcast/rss',
|
||||
'title': 're:.*100% Hydrogen.*',
|
||||
'description': 're:.*In this episode.*',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'ext': 'm4a',
|
||||
'id': 'c1c879525ce2cb640b344507e682c36d',
|
||||
'title': 're:Hydrogen!',
|
||||
'description': 're:.*In this episode we are going.*',
|
||||
'timestamp': 1567977776,
|
||||
'upload_date': '20190908',
|
||||
'duration': 459,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'episode_number': 1,
|
||||
'season_number': 1,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# RSS feed with enclosures and unsupported link URLs
|
||||
{
|
||||
'url': 'http://www.hellointernet.fm/podcast?format=rss',
|
||||
@ -2218,10 +2246,10 @@ class GenericIE(InfoExtractor):
|
||||
default=None)
|
||||
|
||||
duration = itunes('duration')
|
||||
explicit = itunes('explicit')
|
||||
if explicit == 'true':
|
||||
explicit = (itunes('explicit') or '').lower()
|
||||
if explicit in ('true', 'yes'):
|
||||
age_limit = 18
|
||||
elif explicit == 'false':
|
||||
elif explicit in ('false', 'no'):
|
||||
age_limit = 0
|
||||
else:
|
||||
age_limit = None
|
||||
@ -2234,7 +2262,7 @@ class GenericIE(InfoExtractor):
|
||||
'timestamp': unified_timestamp(
|
||||
xpath_text(it, 'pubDate', default=None)),
|
||||
'duration': int_or_none(duration) or parse_duration(duration),
|
||||
'thumbnail': url_or_none(itunes('image')),
|
||||
'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
|
||||
'episode': itunes('title'),
|
||||
'episode_number': int_or_none(itunes('episode')),
|
||||
'season_number': int_or_none(itunes('season')),
|
||||
|
@ -5,33 +5,137 @@ import re
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
OnDemandPagedList,
|
||||
remove_start,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class NBAIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
|
||||
class NBACVPBaseIE(TurnerBaseIE):
|
||||
def _extract_nba_cvp_info(self, path, video_id, fatal=False):
|
||||
return self._extract_cvp_info(
|
||||
'http://secure.nba.com/%s' % path, video_id, {
|
||||
'default': {
|
||||
'media_src': 'http://nba.cdn.turner.com/nba/big',
|
||||
},
|
||||
'm3u8': {
|
||||
'media_src': 'http://nbavod-f.akamaihd.net',
|
||||
},
|
||||
}, fatal=fatal)
|
||||
|
||||
|
||||
class NBAWatchBaseIE(NBACVPBaseIE):
|
||||
_VALID_URL_BASE = r'https?://(?:(?:www\.)?nba\.com(?:/watch)?|watch\.nba\.com)/'
|
||||
|
||||
def _extract_video(self, filter_key, filter_value):
|
||||
video = self._download_json(
|
||||
'https://neulionscnbav2-a.akamaihd.net/solr/nbad_program/usersearch',
|
||||
filter_value, query={
|
||||
'fl': 'description,image,name,pid,releaseDate,runtime,tags,seoName',
|
||||
'q': filter_key + ':' + filter_value,
|
||||
'wt': 'json',
|
||||
})['response']['docs'][0]
|
||||
|
||||
video_id = str(video['pid'])
|
||||
title = video['name']
|
||||
|
||||
formats = []
|
||||
m3u8_url = (self._download_json(
|
||||
'https://watch.nba.com/service/publishpoint', video_id, query={
|
||||
'type': 'video',
|
||||
'format': 'json',
|
||||
'id': video_id,
|
||||
}, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
|
||||
}, fatal=False) or {}).get('path')
|
||||
if m3u8_url:
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
re.sub(r'_(?:pc|iphone)\.', '.', m3u8_url), video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
formats.extend(m3u8_formats)
|
||||
for f in m3u8_formats:
|
||||
http_f = f.copy()
|
||||
http_f.update({
|
||||
'format_id': http_f['format_id'].replace('hls-', 'http-'),
|
||||
'protocol': 'http',
|
||||
'url': http_f['url'].replace('.m3u8', ''),
|
||||
})
|
||||
formats.append(http_f)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': urljoin('https://nbadsdmt.akamaized.net/media/nba/nba/thumbs/', video.get('image')),
|
||||
'description': video.get('description'),
|
||||
'duration': int_or_none(video.get('runtime')),
|
||||
'timestamp': parse_iso8601(video.get('releaseDate')),
|
||||
'tags': video.get('tags'),
|
||||
}
|
||||
|
||||
seo_name = video.get('seoName')
|
||||
if seo_name and re.search(r'\d{4}/\d{2}/\d{2}/', seo_name):
|
||||
base_path = ''
|
||||
if seo_name.startswith('teams/'):
|
||||
base_path += seo_name.split('/')[1] + '/'
|
||||
base_path += 'video/'
|
||||
cvp_info = self._extract_nba_cvp_info(
|
||||
base_path + seo_name + '.xml', video_id, False)
|
||||
if cvp_info:
|
||||
formats.extend(cvp_info['formats'])
|
||||
info = merge_dicts(info, cvp_info)
|
||||
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
||||
|
||||
class NBAWatchEmbedIE(NBAWatchBaseIE):
|
||||
IENAME = 'nba:watch:embed'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://watch.nba.com/embed?id=659395',
|
||||
'md5': 'b7e3f9946595f4ca0a13903ce5edd120',
|
||||
'info_dict': {
|
||||
'id': '659395',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
|
||||
'description': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
|
||||
'timestamp': 1492228800,
|
||||
'upload_date': '20170415',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video('pid', video_id)
|
||||
|
||||
|
||||
class NBAWatchIE(NBAWatchBaseIE):
|
||||
IE_NAME = 'nba:watch'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||
'md5': '9e7729d3010a9c71506fd1248f74e4f4',
|
||||
'md5': '9d902940d2a127af3f7f9d2f3dc79c96',
|
||||
'info_dict': {
|
||||
'id': '0021200253-okc-bkn-recap',
|
||||
'id': '70946',
|
||||
'ext': 'mp4',
|
||||
'title': 'Thunder vs. Nets',
|
||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||
'duration': 181,
|
||||
'timestamp': 1354638466,
|
||||
'timestamp': 1354597200,
|
||||
'upload_date': '20121204',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
|
||||
'only_matching': True,
|
||||
@ -39,116 +143,286 @@ class NBAIE(TurnerBaseIE):
|
||||
'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
|
||||
'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
|
||||
'info_dict': {
|
||||
'id': 'channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
|
||||
'id': '330865',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hawks vs. Cavaliers Game 1',
|
||||
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
|
||||
'duration': 228,
|
||||
'timestamp': 1432134543,
|
||||
'upload_date': '20150520',
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake',
|
||||
'info_dict': {
|
||||
'id': 'teams/clippers/2016/02/17/1455672027478-Doc_Feb16_720.mov-297324',
|
||||
'ext': 'mp4',
|
||||
'title': 'Practice: Doc Rivers - 2/16/16',
|
||||
'description': 'Head Coach Doc Rivers addresses the media following practice.',
|
||||
'upload_date': '20160216',
|
||||
'timestamp': 1455672000,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
||||
'info_dict': {
|
||||
'id': 'timberwolves',
|
||||
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
'params': {
|
||||
# Download the whole playlist takes too long time
|
||||
'playlist_items': '1-30',
|
||||
'timestamp': 1432094400,
|
||||
'upload_date': '20150521',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
||||
'info_dict': {
|
||||
'id': 'teams/timberwolves/2014/12/12/Wigginsmp4-3462601',
|
||||
'ext': 'mp4',
|
||||
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
|
||||
'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.',
|
||||
'upload_date': '20141212',
|
||||
'timestamp': 1418418600,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
'url': 'http://watch.nba.com/nba/video/channels/nba_tv/2015/06/11/YT_go_big_go_home_Game4_061115',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# only CVP mp4 format available
|
||||
'url': 'https://watch.nba.com/video/teams/cavaliers/2012/10/15/sloan121015mov-2249106',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://watch.nba.com/video/top-100-dunks-from-the-2019-20-season?plsrc=nba&collection=2019-20-season-highlights',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 30
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
collection_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('collection', [None])[0]
|
||||
if collection_id:
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % display_id)
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % collection_id)
|
||||
return self.url_result(
|
||||
'https://www.nba.com/watch/list/collection/' + collection_id,
|
||||
NBAWatchCollectionIE.ie_key(), collection_id)
|
||||
return self._extract_video('seoName', display_id)
|
||||
|
||||
def _fetch_page(self, team, video_id, page):
|
||||
search_url = 'http://searchapp2.nba.com/nba-search/query.jsp?' + compat_urllib_parse_urlencode({
|
||||
'type': 'teamvideo',
|
||||
'start': page * self._PAGE_SIZE + 1,
|
||||
'npp': (page + 1) * self._PAGE_SIZE + 1,
|
||||
'sort': 'recent',
|
||||
'output': 'json',
|
||||
'site': team,
|
||||
})
|
||||
results = self._download_json(
|
||||
search_url, video_id, note='Download page %d of playlist data' % page)['results'][0]
|
||||
for item in results:
|
||||
yield self.url_result(compat_urlparse.urljoin('http://www.nba.com/', item['url']))
|
||||
|
||||
def _extract_playlist(self, orig_path, video_id, webpage):
|
||||
team = orig_path.split('/')[0]
|
||||
class NBAWatchCollectionIE(NBAWatchBaseIE):
|
||||
IE_NAME = 'nba:watch:collection'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.nba.com/list/collection/season-preview-2020',
|
||||
'info_dict': {
|
||||
'id': 'season-preview-2020',
|
||||
},
|
||||
'playlist_mincount': 43,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video because of --no-playlist')
|
||||
video_path = self._search_regex(
|
||||
r'nbaVideoCore\.firstVideo\s*=\s*\'([^\']+)\';', webpage, 'video path')
|
||||
video_url = 'http://www.nba.com/%s/video/%s' % (team, video_path)
|
||||
return self.url_result(video_url)
|
||||
|
||||
self.to_screen('Downloading playlist - add --no-playlist to just download video')
|
||||
playlist_title = self._og_search_title(webpage, fatal=False)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, team, video_id),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, team, playlist_title)
|
||||
def _fetch_page(self, collection_id, page):
|
||||
page += 1
|
||||
videos = self._download_json(
|
||||
'https://content-api-prod.nba.com/public/1/endeavor/video-list/collection/' + collection_id,
|
||||
collection_id, 'Downloading page %d JSON metadata' % page, query={
|
||||
'count': self._PAGE_SIZE,
|
||||
'page': page,
|
||||
})['results']['videos']
|
||||
for video in videos:
|
||||
program = video.get('program') or {}
|
||||
seo_name = program.get('seoName') or program.get('slug')
|
||||
if not seo_name:
|
||||
continue
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'id': program.get('id'),
|
||||
'title': program.get('title') or video.get('title'),
|
||||
'url': 'https://www.nba.com/watch/video/' + seo_name,
|
||||
'thumbnail': video.get('image'),
|
||||
'description': program.get('description') or video.get('description'),
|
||||
'duration': parse_duration(program.get('runtimeHours')),
|
||||
'timestamp': parse_iso8601(video.get('releaseDate')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, video_id = re.match(self._VALID_URL, url).groups()
|
||||
orig_path = path
|
||||
if path.startswith('nba/'):
|
||||
path = path[3:]
|
||||
collection_id = self._match_id(url)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, collection_id),
|
||||
self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, collection_id)
|
||||
|
||||
if 'video/' not in path:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
path = remove_start(self._search_regex(r'data-videoid="([^"]+)"', webpage, 'video id'), '/')
|
||||
|
||||
if path == '{{id}}':
|
||||
return self._extract_playlist(orig_path, video_id, webpage)
|
||||
class NBABaseIE(NBACVPBaseIE):
|
||||
_VALID_URL_BASE = r'''(?x)
|
||||
https?://(?:www\.)?nba\.com/
|
||||
(?P<team>
|
||||
blazers|
|
||||
bucks|
|
||||
bulls|
|
||||
cavaliers|
|
||||
celtics|
|
||||
clippers|
|
||||
grizzlies|
|
||||
hawks|
|
||||
heat|
|
||||
hornets|
|
||||
jazz|
|
||||
kings|
|
||||
knicks|
|
||||
lakers|
|
||||
magic|
|
||||
mavericks|
|
||||
nets|
|
||||
nuggets|
|
||||
pacers|
|
||||
pelicans|
|
||||
pistons|
|
||||
raptors|
|
||||
rockets|
|
||||
sixers|
|
||||
spurs|
|
||||
suns|
|
||||
thunder|
|
||||
timberwolves|
|
||||
warriors|
|
||||
wizards
|
||||
)
|
||||
(?:/play\#)?/'''
|
||||
_CHANNEL_PATH_REGEX = r'video/channel|series'
|
||||
|
||||
# See prepareContentId() of pkgCvp.js
|
||||
if path.startswith('video/teams'):
|
||||
path = 'video/channels/proxy/' + path[6:]
|
||||
def _embed_url_result(self, team, content_id):
|
||||
return self.url_result(update_url_query(
|
||||
'https://secure.nba.com/assets/amp/include/video/iframe.html', {
|
||||
'contentId': content_id,
|
||||
'team': team,
|
||||
}), NBAEmbedIE.ie_key())
|
||||
|
||||
return self._extract_cvp_info(
|
||||
'http://www.nba.com/%s.xml' % path, video_id, {
|
||||
'default': {
|
||||
'media_src': 'http://nba.cdn.turner.com/nba/big',
|
||||
},
|
||||
'm3u8': {
|
||||
'media_src': 'http://nbavod-f.akamaihd.net',
|
||||
},
|
||||
def _call_api(self, team, content_id, query, resource):
|
||||
return self._download_json(
|
||||
'https://api.nba.net/2/%s/video,imported_video,wsc/' % team,
|
||||
content_id, 'Download %s JSON metadata' % resource,
|
||||
query=query, headers={
|
||||
'accessToken': 'internal|bb88df6b4c2244e78822812cecf1ee1b',
|
||||
})['response']['result']
|
||||
|
||||
def _extract_video(self, video, team, extract_all=True):
|
||||
video_id = compat_str(video['nid'])
|
||||
team = video['brand']
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video.get('title') or video.get('headline') or video['shortHeadline'],
|
||||
'description': video.get('description'),
|
||||
'timestamp': parse_iso8601(video.get('published')),
|
||||
}
|
||||
|
||||
subtitles = {}
|
||||
captions = try_get(video, lambda x: x['videoCaptions']['sidecars'], dict) or {}
|
||||
for caption_url in captions.values():
|
||||
subtitles.setdefault('en', []).append({'url': caption_url})
|
||||
|
||||
formats = []
|
||||
mp4_url = video.get('mp4')
|
||||
if mp4_url:
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
})
|
||||
|
||||
if extract_all:
|
||||
source_url = video.get('videoSource')
|
||||
if source_url and not source_url.startswith('s3://') and self._is_valid_url(source_url, video_id, 'source'):
|
||||
formats.append({
|
||||
'format_id': 'source',
|
||||
'url': source_url,
|
||||
'preference': 1,
|
||||
})
|
||||
|
||||
m3u8_url = video.get('m3u8')
|
||||
if m3u8_url:
|
||||
if '.akamaihd.net/i/' in m3u8_url:
|
||||
formats.extend(self._extract_akamai_formats(
|
||||
m3u8_url, video_id, {'http': 'pmd.cdn.turner.com'}))
|
||||
else:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
content_xml = video.get('contentXml')
|
||||
if team and content_xml:
|
||||
cvp_info = self._extract_nba_cvp_info(
|
||||
team + content_xml, video_id, fatal=False)
|
||||
if cvp_info:
|
||||
formats.extend(cvp_info['formats'])
|
||||
subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles'])
|
||||
info = merge_dicts(info, cvp_info)
|
||||
|
||||
self._sort_formats(formats)
|
||||
else:
|
||||
info.update(self._embed_url_result(team, video['videoId']))
|
||||
|
||||
info.update({
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
def _real_extract(self, url):
|
||||
team, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if '/play#/' in url:
|
||||
display_id = compat_urllib_parse_unquote(display_id)
|
||||
else:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
display_id = self._search_regex(
|
||||
self._CONTENT_ID_REGEX + r'\s*:\s*"([^"]+)"', webpage, 'video id')
|
||||
return self._extract_url_results(team, display_id)
|
||||
|
||||
|
||||
class NBAEmbedIE(NBABaseIE):
|
||||
IENAME = 'nba:embed'
|
||||
_VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&Env=',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://secure.nba.com/assets/amp/include/video/iframe.html?contentId=2016/10/29/0021600027boschaplay7&adFree=false&profile=71&team=&videoPlayerName=LAMPCVP',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
content_id = qs['contentId'][0]
|
||||
team = qs.get('team', [None])[0]
|
||||
if not team:
|
||||
return self.url_result(
|
||||
'https://watch.nba.com/video/' + content_id, NBAWatchIE.ie_key())
|
||||
video = self._call_api(team, content_id, {'videoid': content_id}, 'video')[0]
|
||||
return self._extract_video(video, team)
|
||||
|
||||
|
||||
class NBAIE(NBABaseIE):
|
||||
IENAME = 'nba'
|
||||
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
|
||||
'info_dict': {
|
||||
'id': '45039',
|
||||
'ext': 'mp4',
|
||||
'title': 'AND WE BACK.',
|
||||
'description': 'Part 1 of our 2020-21 schedule is here! Watch our games on NBC Sports Chicago.',
|
||||
'duration': 94,
|
||||
'timestamp': 1607112000,
|
||||
'upload_date': '20201218',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.nba.com/bucks/play#/video/teams%2Fbucks%2F2020%2F12%2F17%2F64860%2F1608252863446-Op_Dream_16x9-64860',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.nba.com/bucks/play#/video/wsc%2Fteams%2F2787C911AA1ACD154B5377F7577CCC7134B2A4B0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_CONTENT_ID_REGEX = r'videoID'
|
||||
|
||||
def _extract_url_results(self, team, content_id):
|
||||
return self._embed_url_result(team, content_id)
|
||||
|
||||
|
||||
class NBAChannelIE(NBABaseIE):
|
||||
IENAME = 'nba:channel'
|
||||
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nba.com/blazers/video/channel/summer_league',
|
||||
'info_dict': {
|
||||
'title': 'Summer League',
|
||||
},
|
||||
'playlist_mincount': 138,
|
||||
}, {
|
||||
'url': 'https://www.nba.com/bucks/play#/series/On%20This%20Date',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_CONTENT_ID_REGEX = r'videoSubCategory'
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _fetch_page(self, team, channel, page):
|
||||
results = self._call_api(team, channel, {
|
||||
'channels': channel,
|
||||
'count': self._PAGE_SIZE,
|
||||
'offset': page * self._PAGE_SIZE,
|
||||
}, 'page %d' % (page + 1))
|
||||
for video in results:
|
||||
yield self._extract_video(video, team, False)
|
||||
|
||||
def _extract_url_results(self, team, content_id):
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, team, content_id),
|
||||
self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, playlist_title=content_id)
|
||||
|
@ -33,8 +33,7 @@ class NRKBaseIE(InfoExtractor):
|
||||
|
||||
def _extract_nrk_formats(self, asset_url, video_id):
|
||||
if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url):
|
||||
return self._extract_akamai_formats(
|
||||
re.sub(r'(?:b=\d+-\d+|__a__=off)&?', '', asset_url), video_id)
|
||||
return self._extract_akamai_formats(asset_url, video_id)
|
||||
asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url)
|
||||
formats = self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', 'm3u8_native', fatal=False)
|
||||
|
@ -6,6 +6,7 @@ import re
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
@ -49,8 +50,13 @@ class TurnerBaseIE(AdobePassIE):
|
||||
self._AKAMAI_SPE_TOKEN_CACHE[secure_path] = token
|
||||
return video_url + '?hdnea=' + token
|
||||
|
||||
def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}):
|
||||
video_data = self._download_xml(data_src, video_id)
|
||||
def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}, fatal=False):
|
||||
video_data = self._download_xml(
|
||||
data_src, video_id,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||
fatal=fatal)
|
||||
if not video_data:
|
||||
return {}
|
||||
video_id = video_data.attrib['id']
|
||||
title = xpath_text(video_data, 'headline', fatal=True)
|
||||
content_id = xpath_text(video_data, 'contentId') or video_id
|
||||
@ -63,12 +69,14 @@ class TurnerBaseIE(AdobePassIE):
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
thumbnails = []
|
||||
subtitles = {}
|
||||
rex = re.compile(
|
||||
r'(?P<width>[0-9]+)x(?P<height>[0-9]+)(?:_(?P<bitrate>[0-9]+))?')
|
||||
# Possible formats locations: files/file, files/groupFiles/files
|
||||
# and maybe others
|
||||
for video_file in video_data.findall('.//file'):
|
||||
video_url = video_file.text.strip()
|
||||
video_url = url_or_none(video_file.text.strip())
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
@ -108,9 +116,28 @@ class TurnerBaseIE(AdobePassIE):
|
||||
continue
|
||||
urls.append(video_url)
|
||||
format_id = video_file.get('bitrate')
|
||||
if ext == 'smil':
|
||||
if ext in ('scc', 'srt', 'vtt'):
|
||||
subtitles.setdefault('en', []).append({
|
||||
'ext': ext,
|
||||
'url': video_url,
|
||||
})
|
||||
elif ext == 'png':
|
||||
thumbnails.append({
|
||||
'id': format_id,
|
||||
'url': video_url,
|
||||
})
|
||||
elif ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
video_url, video_id, fatal=False))
|
||||
elif re.match(r'https?://[^/]+\.akamaihd\.net/[iz]/', video_url):
|
||||
formats.extend(self._extract_akamai_formats(
|
||||
video_url, video_id, {
|
||||
'hds': path_data.get('f4m', {}).get('host'),
|
||||
# nba.cdn.turner.com, ht.cdn.turner.com, ht2.cdn.turner.com
|
||||
# ht3.cdn.turner.com, i.cdn.turner.com, s.cdn.turner.com
|
||||
# ssl.cdn.turner.com
|
||||
'http': 'pmd.cdn.turner.com',
|
||||
}))
|
||||
elif ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4',
|
||||
@ -129,7 +156,7 @@ class TurnerBaseIE(AdobePassIE):
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
}
|
||||
mobj = rex.search(format_id + video_url)
|
||||
mobj = rex.search(video_url)
|
||||
if mobj:
|
||||
f.update({
|
||||
'width': int(mobj.group('width')),
|
||||
@ -152,7 +179,6 @@ class TurnerBaseIE(AdobePassIE):
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for source in video_data.findall('closedCaptions/source'):
|
||||
for track in source.findall('track'):
|
||||
track_url = url_or_none(track.get('url'))
|
||||
@ -168,12 +194,12 @@ class TurnerBaseIE(AdobePassIE):
|
||||
}.get(source.get('format'))
|
||||
})
|
||||
|
||||
thumbnails = [{
|
||||
'id': image.get('cut'),
|
||||
thumbnails.extend({
|
||||
'id': image.get('cut') or image.get('name'),
|
||||
'url': image.text,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in video_data.findall('images/image')]
|
||||
} for image in video_data.findall('images/image'))
|
||||
|
||||
is_live = xpath_text(video_data, 'isLive') == 'true'
|
||||
|
||||
|
@ -300,6 +300,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
|
||||
video_id)
|
||||
|
||||
def _extract_ytcfg(self, video_id, webpage):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
||||
default='{}'), video_id, fatal=False)
|
||||
|
||||
|
||||
class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com'
|
||||
@ -2283,16 +2289,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# annotations
|
||||
video_annotations = None
|
||||
if self._downloader.params.get('writeannotations', False):
|
||||
xsrf_token = self._search_regex(
|
||||
r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
|
||||
video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
|
||||
xsrf_token = None
|
||||
ytcfg = self._extract_ytcfg(video_id, video_webpage)
|
||||
if ytcfg:
|
||||
xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
|
||||
if not xsrf_token:
|
||||
xsrf_token = self._search_regex(
|
||||
r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
|
||||
video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
|
||||
invideo_url = try_get(
|
||||
player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
|
||||
if xsrf_token and invideo_url:
|
||||
xsrf_field_name = self._search_regex(
|
||||
r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
|
||||
video_webpage, 'xsrf field name',
|
||||
group='xsrf_field_name', default='session_token')
|
||||
xsrf_field_name = None
|
||||
if ytcfg:
|
||||
xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
|
||||
if not xsrf_field_name:
|
||||
xsrf_field_name = self._search_regex(
|
||||
r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
|
||||
video_webpage, 'xsrf field name',
|
||||
group='xsrf_field_name', default='session_token')
|
||||
video_annotations = self._download_webpage(
|
||||
self._proto_relative_url(invideo_url),
|
||||
video_id, note='Downloading annotations',
|
||||
@ -3130,10 +3145,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
playlist_title=title)
|
||||
|
||||
def _extract_identity_token(self, webpage, item_id):
|
||||
ytcfg = self._parse_json(
|
||||
self._search_regex(
|
||||
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
||||
default='{}'), item_id, fatal=False)
|
||||
ytcfg = self._extract_ytcfg(item_id, webpage)
|
||||
if ytcfg:
|
||||
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
|
||||
if token:
|
||||
|
Loading…
Reference in New Issue
Block a user