Compare commits

..

8 Commits

Author SHA1 Message Date
dirkf
87ba614c4c
Merge 3d649843fe into c5098961b0 2024-12-13 03:15:44 +00:00
dirkf
3d649843fe [YouTube] Simplify pattern for nsig function name extraction 2024-12-13 03:15:05 +00:00
dirkf
9bc8fcf23c [YouTube] Pass nsig value as return hook, fixes player 3bb1f723 2024-12-13 03:15:05 +00:00
dirkf
a93fff3052 [YouTube] Handle player 3bb1f723
* fix signature code extraction
* raise if n function returns input value
* add new tests from yt-dlp

Co-authored-by: bashonly
2024-12-13 03:15:05 +00:00
dirkf
2968201ae0 [jsinterp] Add return hook for player 3bb1f723
* set var `_ytdl_do_not_return` to a specific value in the scope of a function
* if an expression to be returned has that value, `return` becomes `void`
2024-12-13 03:15:05 +00:00
dirkf
067ad7439c [jsinterp] Strip /* comments */ when parsing
* NB: _separate() is looking creaky
2024-12-13 03:15:05 +00:00
dirkf
7ab85d88d6 [jsinterp] Fix and improve "methods"
* push, unshift return new length
* impove edge cases for push/pop, shift/unshift, forEach, indexOf, charCodeAt
* increase test coverage
2024-12-13 03:15:04 +00:00
dirkf
4bf85ca5ba [jsinterp] Fix and improve split/join
* improve split/join edge cases
* correctly implement regex split (not like re.split)
2024-12-13 03:15:04 +00:00
3 changed files with 27 additions and 30 deletions

View File

@ -516,10 +516,11 @@ class TestJSInterpreter(unittest.TestCase):
['t', 'e', 's', 't']) ['t', 'e', 's', 't'])
self._test('function f(){return "t-e-s-t".split(/[es-]+/)}', self._test('function f(){return "t-e-s-t".split(/[es-]+/)}',
['t', 't']) ['t', 't'])
# from MDN: surrogate pairs aren't handled: case 1 fails, and case 2 beats Py3.2 # from MDN: surrogate pairs aren't handled: case 1 fails
# self._test('function f(){return "😄😄".split(/(?:)/)}',
# ['\ud83d', '\ude04', '\ud83d', '\ude04'])
# case 2 beats Py3.2: it gets the case 1 result
if sys.version_info >= (2, 6) and not ((3, 0) <= sys.version_info < (3, 3)): if sys.version_info >= (2, 6) and not ((3, 0) <= sys.version_info < (3, 3)):
# self._test('function f(){return "😄😄".split(/(?:)/)}',
# ['\ud83d', '\ude04', '\ud83d', '\ude04'])
self._test('function f(){return "😄😄".split(/(?:)/u)}', self._test('function f(){return "😄😄".split(/(?:)/u)}',
['😄', '😄']) ['😄', '😄'])

View File

@ -206,6 +206,10 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ', 'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ',
), ),
(
'https://www.youtube.com/s/player/f8f53e1a/player_ias.vflset/en_US/base.js',
'VTQOUOv0mCIeJ7i8kZB', 'kcfD8wy0sNLyNQ',
),
] ]

View File

@ -1665,37 +1665,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode): def _extract_n_function_name(self, jscode):
func_name, idx = self._search_regex( func_name, idx = self._search_regex(
# (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}};
# (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}}; # (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c) # or: (b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c) # or: (b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c)
# or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b) # or: (PL(a),b=a.j.n||null)&&(b=narray[idx](b)
# or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("") # or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
# old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s* # old: (b=a.get("n"))&&(b=narray[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
# older: (b=a.get("n"))&&(b=nfunc(b) # older: (b=a.get("n"))&&(b=nfunc(b)
r'''(?x) r'''(?x)
\((?:[\w$()\s]+,)*?\s* # ( # (expr, ...,
(?P<b>[a-zA-Z])\s*=\s* # b=, R= \((?:(?:\s*[\w$]+\s*=)?(?:[\w$"+\.\s(\[]+(?:[)\]]\s*)?),)*
(?: # b=...
(?: # expect ,c=a.get(b) (etc) (?P<b>[\w$]+)\s*=\s*(?!(?P=b)[^\w$])[\w$]+\s*(?:(?:
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)| \.\s*[\w$]+ |
"n+"\[\s*\+?s*[\w$.]+\s*] \[\s*[\w$]+\s*\] |
)\s*(?:,[\w$()\s]+(?=,))*| \.\s*get\s*\(\s*[\w$"]+\s*\)
(?P<old>[\w$]+) # a (old[er]) )\s*){,2}(?:\s*\|\|\s*null(?=\s*\)))?\s*
)\s* \)\s*&&\s*\( # ...)&&(
(?(old) # b = nfunc, b = narray[idx]
# b.get("n") (?P=b)\s*=\s*(?P<nfunc>[\w$]+)\s*
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*? (?:\[\s*(?P<idx>[\w$]+)\s*\]\s*)?
(?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\)) # (...)
| # ,c=a.get(b) \(\s*[\w$]+\s*\)
,\s*(?P<c>[a-zA-Z])\s*=\s*[a-zA-Z]\s*
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
(?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
)
# interstitial junk
\s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
(?(c)(?P=c)|(?P=b))\s*=\s* # [c|b]=
# nfunc|nfunc[idx]
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'), ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
default=(None, None)) default=(None, None))
# thx bashonly: yt-dlp/yt-dlp/pull/10611 # thx bashonly: yt-dlp/yt-dlp/pull/10611