Compare commits

...

9 Commits

Author SHA1 Message Date
Nikhil Chelliah
0508e99ea0
Merge ce604ae7dce406aaf8fde8f25ee40bd390255b58 into 3eb8d22ddb8982ca4fb56bb7a8d6517538bf14c6 2025-04-01 09:47:47 +02:00
dirkf
3eb8d22ddb
[JSInterp] Temporary fix for #33102 2025-03-31 04:21:09 +01:00
dirkf
4e714f9df1 [Misc] Correct [_]IE_DESC/NAME in a few IEs
* thx seproDev, yt-dlp/yt-dlp/pull/12694/commits/ae69e3c
* also add documenting comment in `InfoExtractor`
2025-03-26 12:47:19 +00:00
dirkf
c1ea7f5a24 [ITV] Mark ITVX not working
* update old shim
* correct [_]IE_DESC
2025-03-26 12:17:49 +00:00
nikhil
ce604ae7dc Support _non_-tokenized source URLs too 2021-08-03 00:16:49 -04:00
nikhil
b675a6e6b9 Satisfy flake8, coding conventions, tests 2021-08-02 23:27:43 -04:00
nikhil
b511872fbf Unset ffmpeg's -seekable, -http_seekable, and -icy flags 2021-08-02 20:39:20 -04:00
nikhil
236e3d30ba More fixes 2021-08-01 18:45:06 -04:00
nikhil
e7f4793d4d [extractor/nbc] Fix NBC Olympics extractor 2021-07-29 11:31:27 -04:00
8 changed files with 101 additions and 40 deletions

View File

@ -393,6 +393,19 @@ class FFmpegFD(ExternalFD):
# https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127 # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
# http://trac.ffmpeg.org/ticket/6125#comment:10 # http://trac.ffmpeg.org/ticket/6125#comment:10
args += ['-seekable', '1' if seekable else '0'] args += ['-seekable', '1' if seekable else '0']
http_seekable = info_dict.get('_http_seekable')
if http_seekable is not None:
# setting -http_seekable prevents ffmpeg from guessing if the server
# supports seeking in other kinds of requests (by adding the same header
# as above: `Range: bytes=0-`)
args += ['-http_seekable', '1' if http_seekable else '0']
icy = info_dict.get('_icy')
if icy is not None:
# setting -icy 0 prevents ffmpeg from sending the header `Icy-Metadata: 1`,
# which can cause also problems
# https://github.com/ytdl-org/youtube-dl/pull/29688
# https://trac.ffmpeg.org/ticket/5460#comment:5
args += ['-icy', '1' if icy else '0']
args += self._configuration_args() args += self._configuration_args()

View File

@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):
class BokeCCIE(BokeCCBaseIE): class BokeCCIE(BokeCCBaseIE):
_IE_DESC = 'CC视频' IE_DESC = 'CC视频'
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{ _TESTS = [{

View File

@ -9,7 +9,7 @@ from ..utils import (
class CloudyIE(InfoExtractor): class CloudyIE(InfoExtractor):
_IE_DESC = 'cloudy.ec' IE_DESC = 'cloudy.ec'
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)' _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.cloudy.ec/v/af511e2527aac', 'url': 'https://www.cloudy.ec/v/af511e2527aac',

View File

@ -422,6 +422,8 @@ class InfoExtractor(object):
_GEO_COUNTRIES = None _GEO_COUNTRIES = None
_GEO_IP_BLOCKS = None _GEO_IP_BLOCKS = None
_WORKING = True _WORKING = True
# supply this in public subclasses: used in supported sites list, etc
# IE_DESC = 'short description of IE'
def __init__(self, downloader=None): def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader.""" """Constructor. Receives an optional downloader."""

View File

@ -35,15 +35,6 @@ from ..utils import (
class ITVBaseIE(InfoExtractor): class ITVBaseIE(InfoExtractor):
def _search_nextjs_data(self, webpage, video_id, **kw):
transform_source = kw.pop('transform_source', None)
fatal = kw.pop('fatal', True)
return self._parse_json(
self._search_regex(
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
webpage, 'next.js data', group='js', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True): def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
if errnote is False: if errnote is False:
return False return False
@ -109,7 +100,9 @@ class ITVBaseIE(InfoExtractor):
class ITVIE(ITVBaseIE): class ITVIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
_IE_DESC = 'ITVX' IE_DESC = 'ITVX'
_WORKING = False
_TESTS = [{ _TESTS = [{
'note': 'Hub URLs redirect to ITVX', 'note': 'Hub URLs redirect to ITVX',
'url': 'https://www.itv.com/hub/liar/2a4547a0012', 'url': 'https://www.itv.com/hub/liar/2a4547a0012',
@ -270,7 +263,7 @@ class ITVIE(ITVBaseIE):
'ext': determine_ext(href, 'vtt'), 'ext': determine_ext(href, 'vtt'),
}) })
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}') next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {}) video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle') title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
info = self._og_extract(webpage, require_title=not title) info = self._og_extract(webpage, require_title=not title)
@ -323,7 +316,7 @@ class ITVIE(ITVBaseIE):
class ITVBTCCIE(ITVBaseIE): class ITVBTCCIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
_IE_DESC = 'ITV articles: News, British Touring Car Championship' IE_DESC = 'ITV articles: News, British Touring Car Championship'
_TESTS = [{ _TESTS = [{
'note': 'British Touring Car Championship', 'note': 'British Touring Car Championship',
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch', 'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',

View File

@ -477,43 +477,94 @@ class NBCOlympicsIE(InfoExtractor):
class NBCOlympicsStreamIE(AdobePassIE): class NBCOlympicsStreamIE(AdobePassIE):
IE_NAME = 'nbcolympics:stream' IE_NAME = 'nbcolympics:stream'
_VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)' _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
_TEST = { _TESTS = [
'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8', # "Tokenized" .m3u8 source URL
'info_dict': { {
'id': '203493', 'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11',
'ext': 'mp4', 'info_dict': {
'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'id': '2019740',
'ext': 'mp4',
'title': r"re:Women's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$",
},
'params': {
# m3u8 download
'skip_download': True,
},
}, },
'params': { # Plain .m3u8 source URL
# m3u8 download {
'skip_download': True, 'url': 'https://stream.nbcolympics.com/gymnastics-event-finals-mens-floor-pommel-horse-womens-vault-bars',
'info_dict': {
'id': '2021729',
'ext': 'mp4',
'title': r're:Event Finals: M Floor, W Vault, M Pommel, W Uneven Bars [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, },
} ]
_DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json' _DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json'
_LEAP_URL_TEMPLATE = 'https://api-leap.nbcsports.com/feeds/assets/%s?application=NBCOlympics&platform=%s&format=nbc-player&env=staging'
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid') pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid')
resource = self._search_regex(
r"resource\s*=\s*'(.+)';", webpage,
'resource').replace("' + pid + '", pid)
event_config = self._download_json( event_config = self._download_json(
self._DATA_URL_TEMPLATE % ('event_config', pid), self._DATA_URL_TEMPLATE % ('event_config', pid),
pid)['eventConfig'] pid,
'Downloading event config',
)['eventConfig']
resource = event_config.get('resourceId', 'NBCOlympics')
title = self._live_title(event_config['eventTitle']) title = self._live_title(event_config['eventTitle'])
source_url = self._download_json(
self._DATA_URL_TEMPLATE % ('live_sources', pid), leap_config = self._download_json(
pid)['videoSources'][0]['sourceUrl'] self._LEAP_URL_TEMPLATE % (pid, 'desktop'),
pid,
'Downloading leap config',
)
source_url = leap_config['videoSources'][0]['cdnSources']['primary'][0]['sourceUrl']
ap_resource = self._get_mvpd_resource(
resource,
re.sub(r'[^\w\d ]+', '', event_config['eventTitle']),
pid,
event_config.get('ratingId', 'NO VALUE'),
)
media_token = self._extract_mvpd_auth( media_token = self._extract_mvpd_auth(
url, pid, event_config.get('requestorId', 'NBCOlympics'), resource) url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource)
formats = self._extract_m3u8_formats(self._download_webpage(
'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={ if event_config.get('cdnToken') is True:
'cdn': 'akamai', source_url = self._download_json(
'mediaToken': base64.b64encode(media_token.encode()), 'https://tokens.playmakerservices.com/',
'resource': base64.b64encode(resource.encode()), pid,
'url': source_url, 'Retrieving tokenized URL',
}), pid, 'mp4') data=json.dumps({
'application': 'NBCSports',
'authentication-type': 'adobe-pass',
'cdn': 'akamai',
# Indicates that the player communicates its token not via the path
# but via a cookie? NBC's player specifies `'false'` but field just
# doesn't seem to have an effect.
# 'inPath': 'false',
'pid': pid,
'platform': 'desktop',
'requestorId': 'NBCOlympics',
'resourceId': base64.b64encode(ap_resource.encode()).decode(),
'token': base64.b64encode(media_token.encode()).decode(),
'url': source_url,
'version': 'v1',
}).encode(),
)['akamai'][0]['tokenizedUrl']
formats = self._extract_m3u8_formats(source_url, pid, 'mp4')
for f in formats:
f['_seekable'] = False
f['_http_seekable'] = False
f['_icy'] = False
self._sort_formats(formats) self._sort_formats(formats)
return { return {

View File

@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'], ['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
['arch', '', 'http://ussenate-f.akamaihd.net/'] ['arch', '', 'http://ussenate-f.akamaihd.net/']
] ]
_IE_NAME = 'senate.gov' IE_NAME = 'senate.gov'
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)' _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',

View File

@ -686,6 +686,8 @@ class JSInterpreter(object):
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e) raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
def _dump(self, obj, namespace): def _dump(self, obj, namespace):
if obj is JS_Undefined:
return 'undefined'
try: try:
return json.dumps(obj) return json.dumps(obj)
except TypeError: except TypeError: