more complete patch with subtitles

This commit is contained in:
Sacha Arnoud 2021-09-19 03:03:31 +00:00
parent a803582717
commit ecbd463552
3 changed files with 82 additions and 62 deletions

View File

@ -1879,6 +1879,8 @@ class YoutubeDL(object):
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
elif sub_info.get('downloader') is not None:
sub_info.get('downloader')(self, encodeFilename(sub_filename))
else:
try:
sub_data = ie._request_webpage(

View File

@ -1635,7 +1635,7 @@ class InfoExtractor(object):
entry_protocol='m3u8', preference=None,
m3u8_id=None, note=None, errnote=None,
fatal=True, live=False, data=None, headers={},
query={}):
query={}, include_subtitles=False):
res = self._download_webpage_handle(
m3u8_url, video_id,
note=note or 'Downloading m3u8 information',
@ -1650,11 +1650,11 @@ class InfoExtractor(object):
return self._parse_m3u8_formats(
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
preference=preference, m3u8_id=m3u8_id, live=live)
preference=preference, m3u8_id=m3u8_id, live=live, include_subtitles=include_subtitles)
def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
entry_protocol='m3u8', preference=None,
m3u8_id=None, live=False):
m3u8_id=None, live=False, include_subtitles=False):
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
return []
@ -1662,6 +1662,7 @@ class InfoExtractor(object):
return []
formats = []
subtitles = {}
format_url = lambda u: (
u
@ -1696,13 +1697,19 @@ class InfoExtractor(object):
groups = {}
last_stream_inf = {}
def extract_media(x_media_line):
def extract_media(x_media_line, include_subtitles=False):
media = parse_m3u8_attributes(x_media_line)
# As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
if not (media_type and group_id and name):
return
groups.setdefault(group_id, []).append(media)
if include_subtitles and (media_type == 'SUBTITLES'):
subtitles[media['LANGUAGE']] = [{
'url': format_url(media['URI']),
'ext': media['SUBFORMAT'],
}]
return
if media_type not in ('VIDEO', 'AUDIO'):
return
media_url = media.get('URI')
@ -1748,7 +1755,7 @@ class InfoExtractor(object):
# precede EXT-X-MEDIA tags in HLS manifest such as [3].
for line in m3u8_doc.splitlines():
if line.startswith('#EXT-X-MEDIA:'):
extract_media(line)
extract_media(line, include_subtitles=include_subtitles)
for line in m3u8_doc.splitlines():
if line.startswith('#EXT-X-STREAM-INF:'):
@ -1828,6 +1835,8 @@ class InfoExtractor(object):
formats.append(http_f)
last_stream_inf = {}
if include_subtitles:
return formats, subtitles
return formats
@staticmethod

View File

@ -20,6 +20,7 @@ from ..utils import (
urljoin,
)
from .dailymotion import DailymotionIE
from ..downloader import PROTOCOL_MAP
class FranceTVBaseInfoExtractor(InfoExtractor):
@ -90,17 +91,47 @@ class FranceTVIE(InfoExtractor):
# Videos are identified by idDiffusion so catalogue part is optional.
# However when provided, some extra formats may be returned so we pass
# it if available.
info = self._download_json(
'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/',
video_id, 'Downloading video JSON', query={
'idDiffusion': video_id,
'catalogue': catalogue or '',
})
if info.get('status') == 'NOK':
info = {
'title': None,
'subtitle': None,
'image': None,
'subtitles': {},
'duration': None,
'videos': [],
'formats': [],
}
def update_info(name, value):
if (info[name] is None) and value:
info[name] = value
for device_type in ['desktop', 'mobile']:
linfo = self._download_json(
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
video_id, 'Downloading %s video JSON' % device_type, query={
'device_type': device_type,
'browser': 'chrome',
}, fatal=False)
if linfo and linfo.get('video'):
if linfo.get('meta'):
update_info('title', linfo['meta'].get('title'))
update_info('subtitle', linfo['meta'].get('additional_title'))
update_info('image', linfo['meta'].get('image_url'))
if linfo['video'].get('url'):
if linfo['video'].get('drm'):
self._downloader.to_screen('This video source is DRM protected. Skipping')
else:
info['videos'].append(linfo['video'])
update_info('duration', linfo['video'].get('duration'))
if len(info['videos']) == 0:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, info['message']),
expected=True)
'No video source has been found',
expected=True,
video_id=video_id)
allowed_countries = info['videos'][0].get('geoblocage')
if allowed_countries:
georestricted = True
@ -129,29 +160,7 @@ class FranceTVIE(InfoExtractor):
is_live = None
videos = []
for video in (info.get('videos') or []):
if video.get('statut') != 'ONLINE':
continue
if not video.get('url'):
continue
videos.append(video)
if not videos:
for device_type in ['desktop', 'mobile']:
fallback_info = self._download_json(
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
video_id, 'Downloading fallback %s video JSON' % device_type, query={
'device_type': device_type,
'browser': 'chrome',
}, fatal=False)
if fallback_info and fallback_info.get('video'):
videos.append(fallback_info['video'])
formats = []
for video in videos:
for video in info['videos']:
video_url = video.get('url')
if not video_url:
continue
@ -167,56 +176,56 @@ class FranceTVIE(InfoExtractor):
# See https://github.com/ytdl-org/youtube-dl/issues/3963
# m3u8 urls work fine
continue
formats.extend(self._extract_f4m_formats(
info['formats'].extend(self._extract_f4m_formats(
sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
video_id, f4m_id=format_id, fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format, subtitle = self._extract_m3u8_formats(
sign(video_url, format_id), video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id=format_id,
fatal=False))
fatal=False, include_subtitles=True)
info['formats'].extend(format)
for lang in subtitle:
if lang in info['subtitles']:
info['subtitles'][lang].extend(subtitle[lang])
else:
info['subtitles'][lang] = subtitle[lang]
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
info['formats'].extend(self._extract_mpd_formats(
sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
elif video_url.startswith('rtmp'):
formats.append({
info['formats'].append({
'url': video_url,
'format_id': 'rtmp-%s' % format_id,
'ext': 'flv',
})
else:
if self._is_valid_url(video_url, video_id, format_id):
formats.append({
info['formats'].append({
'url': video_url,
'format_id': format_id,
})
self._sort_formats(formats)
self._sort_formats(info['formats'])
title = info['titre']
subtitle = info.get('sous_titre')
if subtitle:
title += ' - %s' % subtitle
title = title.strip()
subtitles = {}
subtitles_list = [{
'url': subformat['url'],
'ext': subformat.get('format'),
} for subformat in info.get('subtitles', []) if subformat.get('url')]
if subtitles_list:
subtitles['fr'] = subtitles_list
if info['subtitle']:
info['title'] += ' - %s' % info['subtitle']
info['title'] = info['title'].strip()
for lang, sts in info['subtitles'].items():
for st in sts:
st['downloader'] = lambda ydl, filename: PROTOCOL_MAP['m3u8_native'](ydl, ydl.params).download(filename, st)
return {
'id': video_id,
'title': self._live_title(title) if is_live else title,
'title': self._live_title(info['title']) if is_live else info['title'],
'description': clean_html(info.get('synopsis')),
'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
'thumbnail': info.get('image'),
'duration': int_or_none(info.get('duration')),
'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
'is_live': is_live,
'formats': formats,
'subtitles': subtitles,
'formats': info['formats'],
'subtitles': info['subtitles'],
}
def _real_extract(self, url):