mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-02-01 00:00:11 +09:00
more complete patch with subtitles
This commit is contained in:
parent
a803582717
commit
ecbd463552
@ -1879,6 +1879,8 @@ class YoutubeDL(object):
|
|||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||||
return
|
return
|
||||||
|
elif sub_info.get('downloader') is not None:
|
||||||
|
sub_info.get('downloader')(self, encodeFilename(sub_filename))
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
sub_data = ie._request_webpage(
|
sub_data = ie._request_webpage(
|
||||||
|
@ -1635,7 +1635,7 @@ class InfoExtractor(object):
|
|||||||
entry_protocol='m3u8', preference=None,
|
entry_protocol='m3u8', preference=None,
|
||||||
m3u8_id=None, note=None, errnote=None,
|
m3u8_id=None, note=None, errnote=None,
|
||||||
fatal=True, live=False, data=None, headers={},
|
fatal=True, live=False, data=None, headers={},
|
||||||
query={}):
|
query={}, include_subtitles=False):
|
||||||
res = self._download_webpage_handle(
|
res = self._download_webpage_handle(
|
||||||
m3u8_url, video_id,
|
m3u8_url, video_id,
|
||||||
note=note or 'Downloading m3u8 information',
|
note=note or 'Downloading m3u8 information',
|
||||||
@ -1650,11 +1650,11 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
return self._parse_m3u8_formats(
|
return self._parse_m3u8_formats(
|
||||||
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
|
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
|
||||||
preference=preference, m3u8_id=m3u8_id, live=live)
|
preference=preference, m3u8_id=m3u8_id, live=live, include_subtitles=include_subtitles)
|
||||||
|
|
||||||
def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
|
def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
|
||||||
entry_protocol='m3u8', preference=None,
|
entry_protocol='m3u8', preference=None,
|
||||||
m3u8_id=None, live=False):
|
m3u8_id=None, live=False, include_subtitles=False):
|
||||||
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@ -1662,6 +1662,7 @@ class InfoExtractor(object):
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
subtitles = {}
|
||||||
|
|
||||||
format_url = lambda u: (
|
format_url = lambda u: (
|
||||||
u
|
u
|
||||||
@ -1696,13 +1697,19 @@ class InfoExtractor(object):
|
|||||||
groups = {}
|
groups = {}
|
||||||
last_stream_inf = {}
|
last_stream_inf = {}
|
||||||
|
|
||||||
def extract_media(x_media_line):
|
def extract_media(x_media_line, include_subtitles=False):
|
||||||
media = parse_m3u8_attributes(x_media_line)
|
media = parse_m3u8_attributes(x_media_line)
|
||||||
# As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
|
# As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
|
||||||
media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
|
media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
|
||||||
if not (media_type and group_id and name):
|
if not (media_type and group_id and name):
|
||||||
return
|
return
|
||||||
groups.setdefault(group_id, []).append(media)
|
groups.setdefault(group_id, []).append(media)
|
||||||
|
if include_subtitles and (media_type == 'SUBTITLES'):
|
||||||
|
subtitles[media['LANGUAGE']] = [{
|
||||||
|
'url': format_url(media['URI']),
|
||||||
|
'ext': media['SUBFORMAT'],
|
||||||
|
}]
|
||||||
|
return
|
||||||
if media_type not in ('VIDEO', 'AUDIO'):
|
if media_type not in ('VIDEO', 'AUDIO'):
|
||||||
return
|
return
|
||||||
media_url = media.get('URI')
|
media_url = media.get('URI')
|
||||||
@ -1748,7 +1755,7 @@ class InfoExtractor(object):
|
|||||||
# precede EXT-X-MEDIA tags in HLS manifest such as [3].
|
# precede EXT-X-MEDIA tags in HLS manifest such as [3].
|
||||||
for line in m3u8_doc.splitlines():
|
for line in m3u8_doc.splitlines():
|
||||||
if line.startswith('#EXT-X-MEDIA:'):
|
if line.startswith('#EXT-X-MEDIA:'):
|
||||||
extract_media(line)
|
extract_media(line, include_subtitles=include_subtitles)
|
||||||
|
|
||||||
for line in m3u8_doc.splitlines():
|
for line in m3u8_doc.splitlines():
|
||||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||||
@ -1828,6 +1835,8 @@ class InfoExtractor(object):
|
|||||||
formats.append(http_f)
|
formats.append(http_f)
|
||||||
|
|
||||||
last_stream_inf = {}
|
last_stream_inf = {}
|
||||||
|
if include_subtitles:
|
||||||
|
return formats, subtitles
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -20,6 +20,7 @@ from ..utils import (
|
|||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
from .dailymotion import DailymotionIE
|
from .dailymotion import DailymotionIE
|
||||||
|
from ..downloader import PROTOCOL_MAP
|
||||||
|
|
||||||
|
|
||||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||||
@ -90,17 +91,47 @@ class FranceTVIE(InfoExtractor):
|
|||||||
# Videos are identified by idDiffusion so catalogue part is optional.
|
# Videos are identified by idDiffusion so catalogue part is optional.
|
||||||
# However when provided, some extra formats may be returned so we pass
|
# However when provided, some extra formats may be returned so we pass
|
||||||
# it if available.
|
# it if available.
|
||||||
info = self._download_json(
|
|
||||||
'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/',
|
|
||||||
video_id, 'Downloading video JSON', query={
|
|
||||||
'idDiffusion': video_id,
|
|
||||||
'catalogue': catalogue or '',
|
|
||||||
})
|
|
||||||
|
|
||||||
if info.get('status') == 'NOK':
|
info = {
|
||||||
|
'title': None,
|
||||||
|
'subtitle': None,
|
||||||
|
'image': None,
|
||||||
|
'subtitles': {},
|
||||||
|
'duration': None,
|
||||||
|
'videos': [],
|
||||||
|
'formats': [],
|
||||||
|
}
|
||||||
|
|
||||||
|
def update_info(name, value):
|
||||||
|
if (info[name] is None) and value:
|
||||||
|
info[name] = value
|
||||||
|
|
||||||
|
for device_type in ['desktop', 'mobile']:
|
||||||
|
linfo = self._download_json(
|
||||||
|
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
|
||||||
|
video_id, 'Downloading %s video JSON' % device_type, query={
|
||||||
|
'device_type': device_type,
|
||||||
|
'browser': 'chrome',
|
||||||
|
}, fatal=False)
|
||||||
|
|
||||||
|
if linfo and linfo.get('video'):
|
||||||
|
if linfo.get('meta'):
|
||||||
|
update_info('title', linfo['meta'].get('title'))
|
||||||
|
update_info('subtitle', linfo['meta'].get('additional_title'))
|
||||||
|
update_info('image', linfo['meta'].get('image_url'))
|
||||||
|
if linfo['video'].get('url'):
|
||||||
|
if linfo['video'].get('drm'):
|
||||||
|
self._downloader.to_screen('This video source is DRM protected. Skipping')
|
||||||
|
else:
|
||||||
|
info['videos'].append(linfo['video'])
|
||||||
|
update_info('duration', linfo['video'].get('duration'))
|
||||||
|
|
||||||
|
if len(info['videos']) == 0:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'%s returned error: %s' % (self.IE_NAME, info['message']),
|
'No video source has been found',
|
||||||
expected=True)
|
expected=True,
|
||||||
|
video_id=video_id)
|
||||||
|
|
||||||
allowed_countries = info['videos'][0].get('geoblocage')
|
allowed_countries = info['videos'][0].get('geoblocage')
|
||||||
if allowed_countries:
|
if allowed_countries:
|
||||||
georestricted = True
|
georestricted = True
|
||||||
@ -129,29 +160,7 @@ class FranceTVIE(InfoExtractor):
|
|||||||
|
|
||||||
is_live = None
|
is_live = None
|
||||||
|
|
||||||
videos = []
|
for video in info['videos']:
|
||||||
|
|
||||||
for video in (info.get('videos') or []):
|
|
||||||
if video.get('statut') != 'ONLINE':
|
|
||||||
continue
|
|
||||||
if not video.get('url'):
|
|
||||||
continue
|
|
||||||
videos.append(video)
|
|
||||||
|
|
||||||
if not videos:
|
|
||||||
for device_type in ['desktop', 'mobile']:
|
|
||||||
fallback_info = self._download_json(
|
|
||||||
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
|
|
||||||
video_id, 'Downloading fallback %s video JSON' % device_type, query={
|
|
||||||
'device_type': device_type,
|
|
||||||
'browser': 'chrome',
|
|
||||||
}, fatal=False)
|
|
||||||
|
|
||||||
if fallback_info and fallback_info.get('video'):
|
|
||||||
videos.append(fallback_info['video'])
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for video in videos:
|
|
||||||
video_url = video.get('url')
|
video_url = video.get('url')
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
@ -167,56 +176,56 @@ class FranceTVIE(InfoExtractor):
|
|||||||
# See https://github.com/ytdl-org/youtube-dl/issues/3963
|
# See https://github.com/ytdl-org/youtube-dl/issues/3963
|
||||||
# m3u8 urls work fine
|
# m3u8 urls work fine
|
||||||
continue
|
continue
|
||||||
formats.extend(self._extract_f4m_formats(
|
info['formats'].extend(self._extract_f4m_formats(
|
||||||
sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
|
sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
|
||||||
video_id, f4m_id=format_id, fatal=False))
|
video_id, f4m_id=format_id, fatal=False))
|
||||||
elif ext == 'm3u8':
|
elif ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
format, subtitle = self._extract_m3u8_formats(
|
||||||
sign(video_url, format_id), video_id, 'mp4',
|
sign(video_url, format_id), video_id, 'mp4',
|
||||||
entry_protocol='m3u8_native', m3u8_id=format_id,
|
entry_protocol='m3u8_native', m3u8_id=format_id,
|
||||||
fatal=False))
|
fatal=False, include_subtitles=True)
|
||||||
|
info['formats'].extend(format)
|
||||||
|
for lang in subtitle:
|
||||||
|
if lang in info['subtitles']:
|
||||||
|
info['subtitles'][lang].extend(subtitle[lang])
|
||||||
|
else:
|
||||||
|
info['subtitles'][lang] = subtitle[lang]
|
||||||
elif ext == 'mpd':
|
elif ext == 'mpd':
|
||||||
formats.extend(self._extract_mpd_formats(
|
info['formats'].extend(self._extract_mpd_formats(
|
||||||
sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
|
sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
|
||||||
elif video_url.startswith('rtmp'):
|
elif video_url.startswith('rtmp'):
|
||||||
formats.append({
|
info['formats'].append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': 'rtmp-%s' % format_id,
|
'format_id': 'rtmp-%s' % format_id,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
if self._is_valid_url(video_url, video_id, format_id):
|
if self._is_valid_url(video_url, video_id, format_id):
|
||||||
formats.append({
|
info['formats'].append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(info['formats'])
|
||||||
|
|
||||||
title = info['titre']
|
if info['subtitle']:
|
||||||
subtitle = info.get('sous_titre')
|
info['title'] += ' - %s' % info['subtitle']
|
||||||
if subtitle:
|
info['title'] = info['title'].strip()
|
||||||
title += ' - %s' % subtitle
|
|
||||||
title = title.strip()
|
|
||||||
|
|
||||||
subtitles = {}
|
for lang, sts in info['subtitles'].items():
|
||||||
subtitles_list = [{
|
for st in sts:
|
||||||
'url': subformat['url'],
|
st['downloader'] = lambda ydl, filename: PROTOCOL_MAP['m3u8_native'](ydl, ydl.params).download(filename, st)
|
||||||
'ext': subformat.get('format'),
|
|
||||||
} for subformat in info.get('subtitles', []) if subformat.get('url')]
|
|
||||||
if subtitles_list:
|
|
||||||
subtitles['fr'] = subtitles_list
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._live_title(title) if is_live else title,
|
'title': self._live_title(info['title']) if is_live else info['title'],
|
||||||
'description': clean_html(info.get('synopsis')),
|
'description': clean_html(info.get('synopsis')),
|
||||||
'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
|
'thumbnail': info.get('image'),
|
||||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
|
'duration': int_or_none(info.get('duration')),
|
||||||
'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
|
'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
'formats': formats,
|
'formats': info['formats'],
|
||||||
'subtitles': subtitles,
|
'subtitles': info['subtitles'],
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
Loading…
Reference in New Issue
Block a user