From 3d649843fe97e6d9ceed91c4cf0f4f4eb07958f8 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 13 Dec 2024 03:09:29 +0000 Subject: [PATCH] [YouTube] Simplify pattern for nsig function name extraction --- test/test_youtube_signature.py | 4 +++ youtube_dl/extractor/youtube.py | 46 ++++++++++++++------------------- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 1c2ba848e..1d6bce67c 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -206,6 +206,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js', 'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ', ), + ( + 'https://www.youtube.com/s/player/f8f53e1a/player_ias.vflset/en_US/base.js', + 'VTQOUOv0mCIeJ7i8kZB', 'kcfD8wy0sNLyNQ', + ), ] diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 55881f4d1..f59d1e36a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1665,37 +1665,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_n_function_name(self, jscode): func_name, idx = self._search_regex( + # (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}}; # (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}}; - # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c) - # or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c) - # or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b) + # or: (b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c) + # or: (b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c) + # or: (PL(a),b=a.j.n||null)&&(b=narray[idx](b) # or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("") - # old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P[a-z])\s*=\s*[a-z]\s* + # old: (b=a.get("n"))&&(b=narray[idx](b)(?P[a-z])\s*=\s*[a-z]\s* # older: (b=a.get("n"))&&(b=nfunc(b) r'''(?x) - \((?:[\w$()\s]+,)*?\s* # ( - (?P[a-zA-Z])\s*=\s* # b=, R= - (?: - (?: # expect ,c=a.get(b) (etc) - String\s*\.\s*fromCharCode\s*\(\s*110\s*\)| - "n+"\[\s*\+?s*[\w$.]+\s*] - )\s*(?:,[\w$()\s]+(?=,))*| - (?P[\w$]+) # a (old[er]) - )\s* - (?(old) - # b.get("n") - (?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*? - (?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\)) - | # ,c=a.get(b) - ,\s*(?P[a-zA-Z])\s*=\s*[a-zA-Z]\s* - (?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*? - (?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\)) - ) - # interstitial junk - \s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)? - (?(c)(?P=c)|(?P=b))\s*=\s* # [c|b]= - # nfunc|nfunc[idx] - (?P[a-zA-Z_$][\w$]*)(?:\s*\[(?P\d+)\])?\s*\(\s*[\w$]+\s*\) + # (expr, ..., + \((?:(?:\s*[\w$]+\s*=)?(?:[\w$"+\.\s(\[]+(?:[)\]]\s*)?),)* + # b=... + (?P[\w$]+)\s*=\s*(?!(?P=b)[^\w$])[\w$]+\s*(?:(?: + \.\s*[\w$]+ | + \[\s*[\w$]+\s*\] | + \.\s*get\s*\(\s*[\w$"]+\s*\) + )\s*){,2}(?:\s*\|\|\s*null(?=\s*\)))?\s* + \)\s*&&\s*\( # ...)&&( + # b = nfunc, b = narray[idx] + (?P=b)\s*=\s*(?P[\w$]+)\s* + (?:\[\s*(?P[\w$]+)\s*\]\s*)? + # (...) + \(\s*[\w$]+\s*\) ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'), default=(None, None)) # thx bashonly: yt-dlp/yt-dlp/pull/10611