Compare commits

..

9 Commits

Author SHA1 Message Date
dirkf
9f17a6dff6
Merge 65ea8e014a into c5098961b0 2024-12-12 16:32:27 +00:00
dirkf
65ea8e014a
Again but better 2024-12-12 16:32:25 +00:00
dirkf
803210d287
Unicode matching too hard for 3.2? 2024-12-12 16:26:57 +00:00
dirkf
1e6397b4ac [YouTube] Pass nsig value as return hook, fixes player 3bb1f723 2024-12-12 11:51:22 +00:00
dirkf
54c145a24f [YouTube] Handle player 3bb1f723
* fix signature code extraction
* raise if n function returns input value
* add new tests from yt-dlp

Co-authored-by: bashonly
2024-12-12 11:42:15 +00:00
dirkf
1ebb56cdb8 [jsinterp] Add return hook for player 3bb1f723
* set var `_ytdl_do_not_return` to a specific value in the scope of a function
* if an expression to be returned has that value, `return` becomes `void`
2024-12-12 11:42:15 +00:00
dirkf
c1ebf43139 [jsinterp] Strip /* comments */ when parsing
* NB: _separate() is looking creaky
2024-12-12 11:42:15 +00:00
dirkf
8e172670f4 [jsinterp] Fix and improve "methods"
* push, unshift return new length
* impove edge cases for push/pop, shift/unshift, forEach, indexOf, charCodeAt
* increase test coverage
2024-12-12 11:42:15 +00:00
dirkf
57b0e045d4 [jsinterp] Fix and improve split/join
* improve split/join edge cases
* correctly implement regex split (not like re.split)
2024-12-12 11:42:15 +00:00
3 changed files with 30 additions and 27 deletions

View File

@ -516,11 +516,10 @@ class TestJSInterpreter(unittest.TestCase):
['t', 'e', 's', 't'])
self._test('function f(){return "t-e-s-t".split(/[es-]+/)}',
['t', 't'])
# from MDN: surrogate pairs aren't handled: case 1 fails
# from MDN: surrogate pairs aren't handled: case 1 fails, and case 2 beats Py3.2
if sys.version_info >= (2, 6) and not ((3, 0) <= sys.version_info < (3, 3)):
# self._test('function f(){return "😄😄".split(/(?:)/)}',
# ['\ud83d', '\ude04', '\ud83d', '\ude04'])
# case 2 beats Py3.2: it gets the case 1 result
if sys.version_info >= (2, 6) and not ((3, 0) <= sys.version_info < (3, 3)):
self._test('function f(){return "😄😄".split(/(?:)/u)}',
['😄', '😄'])

View File

@ -206,10 +206,6 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ',
),
(
'https://www.youtube.com/s/player/f8f53e1a/player_ias.vflset/en_US/base.js',
'VTQOUOv0mCIeJ7i8kZB', 'kcfD8wy0sNLyNQ',
),
]

View File

@ -1665,29 +1665,37 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode):
func_name, idx = self._search_regex(
# (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}};
# (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
# or: (b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
# or: (b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c)
# or: (PL(a),b=a.j.n||null)&&(b=narray[idx](b)
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
# or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
# or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
# old: (b=a.get("n"))&&(b=narray[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
# old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
# older: (b=a.get("n"))&&(b=nfunc(b)
r'''(?x)
# (expr, ...,
\((?:(?:\s*[\w$]+\s*=)?(?:[\w$"+\.\s(\[]+(?:[)\]]\s*)?),)*
# b=...
(?P<b>[\w$]+)\s*=\s*(?!(?P=b)[^\w$])[\w$]+\s*(?:(?:
\.\s*[\w$]+ |
\[\s*[\w$]+\s*\] |
\.\s*get\s*\(\s*[\w$"]+\s*\)
)\s*){,2}(?:\s*\|\|\s*null(?=\s*\)))?\s*
\)\s*&&\s*\( # ...)&&(
# b = nfunc, b = narray[idx]
(?P=b)\s*=\s*(?P<nfunc>[\w$]+)\s*
(?:\[\s*(?P<idx>[\w$]+)\s*\]\s*)?
# (...)
\(\s*[\w$]+\s*\)
\((?:[\w$()\s]+,)*?\s* # (
(?P<b>[a-zA-Z])\s*=\s* # b=, R=
(?:
(?: # expect ,c=a.get(b) (etc)
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
"n+"\[\s*\+?s*[\w$.]+\s*]
)\s*(?:,[\w$()\s]+(?=,))*|
(?P<old>[\w$]+) # a (old[er])
)\s*
(?(old)
# b.get("n")
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
(?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
| # ,c=a.get(b)
,\s*(?P<c>[a-zA-Z])\s*=\s*[a-zA-Z]\s*
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
(?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
)
# interstitial junk
\s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
(?(c)(?P=c)|(?P=b))\s*=\s* # [c|b]=
# nfunc|nfunc[idx]
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
default=(None, None))
# thx bashonly: yt-dlp/yt-dlp/pull/10611