From 51f781c75ba2152642ef17d140cd1dec7ea763a9 Mon Sep 17 00:00:00 2001 From: df Date: Mon, 30 Aug 2021 19:10:02 +0100 Subject: [PATCH] Fix handling of output template fields with initial '-' --- test/test_YoutubeDL.py | 18 +++++++++++- test/test_utils.py | 6 ++-- youtube_dl/YoutubeDL.py | 62 ++++++++++++++++++++++++++++++----------- youtube_dl/utils.py | 6 +--- 4 files changed, 66 insertions(+), 26 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index a35effe0e..fe01ba660 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -631,6 +631,10 @@ class TestYoutubeDL(unittest.TestCase): 'height': 1080, 'title1': '$PATH', 'title2': '%PATH%', + 'track': '-track with initial hyphen', + 'display_id': '-initial_hyphen', + 'episode': '.episode with initial period', + 'episode_id': '.initial_period', } def fname(templ, na_placeholder='NA'): @@ -646,7 +650,9 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(fname(NA_TEST_OUTTMPL), 'NA-NA-1234.mp4') # Or by provided placeholder self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4') - self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4') + self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '_-1234.mp4') + NA_TEST_OUTTMPL = '%(uploader_date)s+%(width)d+%(id)s.%(ext)s' + self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '++1234.mp4') self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4') self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4') self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4') @@ -664,6 +670,16 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4') self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH') self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%') + self.assertEqual(fname('%(track)s at start changes hyphen'), + '_track with initial hyphen at start changes hyphen') + self.assertEqual(fname('medial %(track)s doesn\'t change hyphen'), + 'medial -track with initial hyphen doesn\'t change hyphen') + self.assertEqual(fname('%(display_id)s at start doesn\'t change hyphen'), + '-initial_hyphen at start doesn\'t change hyphen') + self.assertEqual(fname('%(episode)s at start changes period'), + '_episode with initial period at start changes period') + self.assertEqual(fname('%(episode_id)s at start doesn\'t change period'), + '.initial_period at start doesn\'t change period') def test_format_note(self): ydl = YoutubeDL() diff --git a/test/test_utils.py b/test/test_utils.py index 259c4763e..a2d14c142 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -146,10 +146,8 @@ class TestUtil(unittest.TestCase): sanitize_filename('New World record at 0:12:34'), 'New World record at 0_12_34') - self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf') - self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf') - self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf') - self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf') + self.assertEqual(sanitize_filename('__gas__dgf_'), 'gas_dgf') + self.assertEqual(sanitize_filename('__gas__dgf_', is_id=True), '__gas__dgf_') forbidden = '"\0\\/' for fc in forbidden: diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index fe30758ef..c3608b812 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -634,6 +634,20 @@ class YoutubeDL(object): except UnicodeEncodeError: self.to_screen('[download] The file has already been downloaded') + # As of [1] format syntax is: + # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type + # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting + _FORMAT_RE = r'''(?x) + (?\w+)'), maxsplit=1)[1:])) + field_is_id = ( + lambda x: + (lambda y: y and is_id(y.group('field')))(re.match(FORMAT_FIELD_RE, x))) + # list formatted fragments and tags + tagged_fn = [(x % template_dict, field_is_id(x)) for x in filename] + # list the initial fragments that can be munged from [.-] to _, ie + # those that not IDs, or empty + mungable = itertools.takewhile(lambda x: not(x[1] and x[0]), tagged_fn) + col0 = lambda l: next(iter(zip(*l)), []) + mungable = col0(mungable) + # finally combine the munged initial part and the rest + filename = (re.sub(r'^[.-]', '_', ''.join(mungable)) + + ''.join(col0(tagged_fn[len(mungable):]))) # Temporary fix for #4787 # 'Treat' all problem characters by passing filename through preferredencoding diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e722eed58..7a071fff0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2104,15 +2104,11 @@ def sanitize_filename(s, restricted=False, is_id=False): s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) result = ''.join(map(replace_insane, s)) if not is_id: - while '__' in result: - result = result.replace('__', '_') + result = re.sub(r'_{2,}', '_', result) result = result.strip('_') # Common case of "Foreign band name - English song title" if restricted and result.startswith('-_'): result = result[2:] - if result.startswith('-'): - result = '_' + result[len('-'):] - result = result.lstrip('.') if not result: result = '_' return result