From b4469a0f652c450a81901795a8f522ae5457a1cf Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 7 Dec 2024 03:39:44 +0000
Subject: [PATCH] [YouTube] Handle player `3bb1f723` * fix signature code
 extraction * raise if n function returns input value * add new tests from
 yt-dlp

Co-authored-by: bashonly
---
 test/test_youtube_signature.py  | 37 +++++++++++++++++++++++++-------
 youtube_dl/extractor/youtube.py | 38 ++++++++++++++++++++++-----------
 2 files changed, 54 insertions(+), 21 deletions(-)

diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 56e92fac5..7d1ff90ba 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -12,6 +12,7 @@ import re
 import string
 
 from youtube_dl.compat import (
+    compat_contextlib_suppress,
     compat_open as open,
     compat_str,
     compat_urlretrieve,
@@ -50,23 +51,33 @@ _SIG_TESTS = [
     (
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
         84,
-        '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
+        '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>',
     ),
     (
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
         83,
-        '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
+        '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F',
     ),
     (
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
         '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
-        '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
+        '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B',
     ),
     (
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
         '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
         '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
-    )
+    ),
+    (
+        'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
+        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+        'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
+    ),
+    (
+        'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
+        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+        'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+    ),
 ]
 
 _NSIG_TESTS = [
@@ -142,6 +153,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
         'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
     ),
+    (
+        'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js',
+        'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w',
+    ),
     (
         'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
         'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
@@ -154,6 +169,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
         'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
     ),
+    (
+        'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
+        '1wWCVpRR96eAmMI87L', 'KSkWAVv1ZQxC3A',
+    ),
     (
         'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
         '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
@@ -182,6 +201,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
         'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
     ),
+    (
+        'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
+        'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ',
+    ),
 ]
 
 
@@ -216,11 +239,9 @@ class TestSignature(unittest.TestCase):
             os.mkdir(self.TESTDATA_DIR)
 
     def tearDown(self):
-        try:
+        with compat_contextlib_suppress(OSError):
             for f in os.listdir(self.TESTDATA_DIR):
                 os.remove(f)
-        except OSError:
-            pass
 
 
 def t_factory(name, sig_func, url_pattern):
@@ -258,7 +279,7 @@ def n_sig(jscode, sig_input):
 
 
 make_sig_test = t_factory(
-    'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
+    'signature', signature, re.compile(r'.*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$'))
 for test_spec in _SIG_TESTS:
     make_sig_test(*test_spec)
 
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 6fe520e9a..d633032ae 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1579,19 +1579,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         self.to_screen('Extracted signature function:\n' + code)
 
     def _parse_sig_js(self, jscode):
+        # Examples where `sig` is funcname:
+        # sig=function(a){a=a.split(""); ... ;return a.join("")};
+        # ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
+        # {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))}
+        # sig=function(J){J=J.split(""); ... ;return J.join("")};
+        # ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
+        # {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
         funcname = self._search_regex(
-            (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+            (r'\b(?P<var>[a-zA-Z0-9$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\((?P=var)\)\)',
+             r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
+             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
+             # Old patterns
+             r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
-             r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
-             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
-             r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
              # Obsolete patterns
              r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
             jscode, 'Initial JS player signature function name', group='sig')
 
@@ -1658,6 +1665,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
     def _extract_n_function_name(self, jscode):
         func_name, idx = self._search_regex(
+            # (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
             # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
             # or:  (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
             # or:  (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
@@ -1666,7 +1674,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             # older: (b=a.get("n"))&&(b=nfunc(b)
             r'''(?x)
                 \((?:[\w$()\s]+,)*?\s*      # (
-                (?P<b>[a-z])\s*=\s*         # b=
+                (?P<b>[a-zA-Z])\s*=\s*      # b=, R=
                 (?:
                     (?:                     # expect ,c=a.get(b) (etc)
                         String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
@@ -1679,7 +1687,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                        (?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
                        (?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
                        |                    # ,c=a.get(b)
-                       ,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
+                       ,\s*(?P<c>[a-zA-Z])\s*=\s*[a-zA-Z]\s*
                        (?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
                        (?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
                    )
@@ -1697,15 +1705,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 r'''(?xs)
                     (?:(?<=[^\w$])|^)       # instead of \b, which ignores $
                     (?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
-                    \s*\{(?:(?!};).)+?["']enhanced_except_
+                    \s*\{(?:(?!};).)+?(?:
+                        ["']enhanced_except_ |
+                        return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[a-zA-Z0-9_$]+
+                    )
                 ''', jscode, 'Initial JS player n function name', group='name')
         if not idx:
             return func_name
 
-        return self._parse_json(self._search_regex(
-            r'var\s+{0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
-            'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
-            func_name, transform_source=js_to_json)[int(idx)]
+        return self._search_json(
+            r'var\s+{0}\s*='.format(re.escape(func_name)), jscode,
+            'Initial JS player n function list ({0}.{1})'.format(func_name, idx),
+            func_name, contains_pattern=r'\[[\s\S]+\]', end_pattern='[,;]',
+            transform_source=js_to_json)[int(idx)]
 
     def _extract_n_function_code(self, video_id, player_url):
         player_id = self._extract_player_info(player_url)
@@ -1734,7 +1746,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             except Exception as e:
                 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
 
-            if ret.startswith('enhanced_except_'):
+            if ret.startswith('enhanced_except_') or ret.endswith(s):
                 raise JSInterpreter.Exception('Signature function returned an exception')
             return ret