2025-07-14 23:44:14 +09:00
2 changed files with 5 additions and 34 deletions
--- a/test/test_compat.py
+++ b/test/test_compat.py
@ -11,7 +11,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))


 from youtube_dl.compat import (
-    compat_casefold,
    compat_getenv,
    compat_setenv,
    compat_etree_Element,
@ -119,21 +118,9 @@ class TestCompat(unittest.TestCase):
 <smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
        compat_etree_fromstring(xml)

-    def test_compat_struct_unpack(self):
+    def test_struct_unpack(self):
        self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))

-    def test_compat_casefold(self):
-        if hasattr(compat_str, 'casefold'):
-            # don't bother to test str.casefold() (again)
-            return
-        # thanks https://bugs.python.org/file24232/casefolding.patch
-        self.assertEqual(compat_casefold('hello'), 'hello')
-        self.assertEqual(compat_casefold('hELlo'), 'hello')
-        self.assertEqual(compat_casefold('ß'), 'ss')
-        self.assertEqual(compat_casefold('ﬁ'), 'fi')
-        self.assertEqual(compat_casefold('\u03a3'), '\u03c3')
-        self.assertEqual(compat_casefold('A\u0345\u03a3'), 'a\u03b9\u03c3')
-

 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/casefold.py
+++ b/youtube_dl/casefold.py
@ -1,20 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals

-from .compat import (
-    compat_str,
-    compat_chr,
-)
+from .compat import compat_str

-# Below is included the text of icu/CaseFolding.txt retrieved from
-# https://github.com/unicode-org/icu/blob/main/icu4c/source/data/unidata/CaseFolding.txt
-# In case newly foldable Unicode characters are defined, paste the new version
-# of the text inside the ''' marks.
-# The text is expected to have only blank lines andlines with 1st character #,
-# all ignored, and fold definitions like this:
-# `from_hex_code; space_separated_to_hex_code_list; comment`
-
-_map_str = '''
 # CaseFolding-15.0.0.txt
 # Date: 2022-02-02, 23:35:35 GMT
 # © 2022 Unicode®, Inc.
@ -77,6 +65,7 @@ _map_str = '''
 #  have the value C for the status field, and the code point itself for the mapping field.

 # =================================================================
+_map_str = '''
 0041; C; 0061; # LATIN CAPITAL LETTER A
 0042; C; 0062; # LATIN CAPITAL LETTER B
 0043; C; 0063; # LATIN CAPITAL LETTER C
@ -1638,22 +1627,17 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
 1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO
 1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA
 '''
-
-_parse_unichr = lambda s: compat_chr(int(s, 16))
-
 _map = dict(
-    (_parse_unichr(from_), ''.join(map(_parse_unichr, to_.split(' '))))
+    (unichr(int(from_, 16)), ''.join((unichr(int(v, 16)) for v in to_.split(' '))))
    for from_, type_, to_, _ in (
-        l.split('; ', 3) for l in _map_str.splitlines() if l and not l[0] == '#')
+        l.split('; ', 3) for l in _map_str.splitlines() if l)
    if type_ in ('C', 'F'))
 del _map_str

-
 def casefold(s):
    assert isinstance(s, compat_str)
    return ''.join((_map.get(c, c) for c in s))

-
 __all__ = [
    casefold
 ]