Add an option to discard style information from TTML subtitles

This commit is contained in:
filip-hejsek 2022-08-11 05:28:03 +02:00 committed by GitHub
parent e6a836d54c
commit 74bbdc079b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 11 additions and 5 deletions

View File

@ -280,6 +280,7 @@ def _real_main(argv=None):
postprocessors.append({
'key': 'FFmpegSubtitlesConvertor',
'format': opts.convertsubtitles,
'ttml_convert_style': opts.ttmlconvertstyle,
})
if opts.embedsubtitles:
postprocessors.append({

View File

@ -862,6 +862,10 @@ def parseOpts(overrideArguments=None):
'--convert-subs', '--convert-subtitles',
metavar='FORMAT', dest='convertsubtitles', default=None,
help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)')
postproc.add_option(
'--ttml-convert-no-style',
action='store_false', dest='ttmlconvertstyle', default=True,
help='Discard all style information when converting subtitles from ttml/dfxp')
parser.add_option_group(general)
parser.add_option_group(network)

View File

@ -599,9 +599,10 @@ class FFmpegFixupM3u8PP(FFmpegPostProcessor):
class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
def __init__(self, downloader=None, format=None):
def __init__(self, downloader=None, format=None, ttml_convert_style=True):
super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
self.format = format
self.ttml_convert_style = ttml_convert_style
def run(self, info):
subs = info.get('requested_subtitles')
@ -634,7 +635,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext'))
with open(dfxp_file, 'rb') as f:
srt_data = dfxp2srt(f.read())
srt_data = dfxp2srt(f.read(), self.ttml_convert_style)
with io.open(srt_file, 'wt', encoding='utf-8') as f:
f.write(srt_data)

View File

@ -4494,7 +4494,7 @@ def srt_subtitles_timecode(seconds):
return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
def dfxp2srt(dfxp_data):
def dfxp2srt(dfxp_data, convert_style=True):
'''
@param dfxp_data A bytes-like object containing DFXP data
@returns A unicode object containing converted SRT data
@ -4536,7 +4536,7 @@ def dfxp2srt(dfxp_data):
def start(self, tag, attrib):
if tag in (_x('ttml:br'), 'br'):
self._out += '\n'
else:
elif convert_style:
unclosed_elements = []
style = {}
element_style_id = attrib.get('style')
@ -4579,7 +4579,7 @@ def dfxp2srt(dfxp_data):
self._unclosed_elements.append(unclosed_elements)
def end(self, tag):
if tag not in (_x('ttml:br'), 'br'):
if convert_style and tag not in (_x('ttml:br'), 'br'):
unclosed_elements = self._unclosed_elements.pop()
for element in reversed(unclosed_elements):
self._out += '</%s>' % element