mirror of
https://github.com/ytdl-org/youtube-dl
synced 2024-12-22 20:20:09 +09:00
Compare commits
12 Commits
d89280a81c
...
da457f9762
Author | SHA1 | Date | |
---|---|---|---|
|
da457f9762 | ||
|
1e6397b4ac | ||
|
54c145a24f | ||
|
1ebb56cdb8 | ||
|
c1ebf43139 | ||
|
8e172670f4 | ||
|
57b0e045d4 | ||
|
9993e8c5a9 | ||
|
7d3045ce12 | ||
|
527c137e4e | ||
|
7e93935dda | ||
|
af7c0ffc2f |
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
@ -11,7 +12,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import math
|
||||
import re
|
||||
|
||||
from youtube_dl.compat import compat_str
|
||||
from youtube_dl.compat import compat_str as str
|
||||
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
|
||||
|
||||
NaN = object()
|
||||
@ -19,7 +20,7 @@ NaN = object()
|
||||
|
||||
class TestJSInterpreter(unittest.TestCase):
|
||||
def _test(self, jsi_or_code, expected, func='f', args=()):
|
||||
if isinstance(jsi_or_code, compat_str):
|
||||
if isinstance(jsi_or_code, str):
|
||||
jsi_or_code = JSInterpreter(jsi_or_code)
|
||||
got = jsi_or_code.call_function(func, *args)
|
||||
if expected is NaN:
|
||||
@ -40,16 +41,27 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f(){return 42 + 7;}', 49)
|
||||
self._test('function f(){return 42 + undefined;}', NaN)
|
||||
self._test('function f(){return 42 + null;}', 42)
|
||||
self._test('function f(){return 1 + "";}', '1')
|
||||
self._test('function f(){return 42 + "7";}', '427')
|
||||
self._test('function f(){return false + true;}', 1)
|
||||
self._test('function f(){return "false" + true;}', 'falsetrue')
|
||||
self._test('function f(){return '
|
||||
'1 + "2" + [3,4] + {k: 56} + null + undefined + Infinity;}',
|
||||
'123,4[object Object]nullundefinedInfinity')
|
||||
|
||||
def test_sub(self):
|
||||
self._test('function f(){return 42 - 7;}', 35)
|
||||
self._test('function f(){return 42 - undefined;}', NaN)
|
||||
self._test('function f(){return 42 - null;}', 42)
|
||||
self._test('function f(){return 42 - "7";}', 35)
|
||||
self._test('function f(){return 42 - "spam";}', NaN)
|
||||
|
||||
def test_mul(self):
|
||||
self._test('function f(){return 42 * 7;}', 294)
|
||||
self._test('function f(){return 42 * undefined;}', NaN)
|
||||
self._test('function f(){return 42 * null;}', 0)
|
||||
self._test('function f(){return 42 * "7";}', 294)
|
||||
self._test('function f(){return 42 * "eggs";}', NaN)
|
||||
|
||||
def test_div(self):
|
||||
jsi = JSInterpreter('function f(a, b){return a / b;}')
|
||||
@ -57,17 +69,26 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test(jsi, NaN, args=(JS_Undefined, 1))
|
||||
self._test(jsi, float('inf'), args=(2, 0))
|
||||
self._test(jsi, 0, args=(0, 3))
|
||||
self._test(jsi, 6, args=(42, 7))
|
||||
self._test(jsi, 0, args=(42, float('inf')))
|
||||
self._test(jsi, 6, args=("42", 7))
|
||||
self._test(jsi, NaN, args=("spam", 7))
|
||||
|
||||
def test_mod(self):
|
||||
self._test('function f(){return 42 % 7;}', 0)
|
||||
self._test('function f(){return 42 % 0;}', NaN)
|
||||
self._test('function f(){return 42 % undefined;}', NaN)
|
||||
self._test('function f(){return 42 % "7";}', 0)
|
||||
self._test('function f(){return 42 % "beans";}', NaN)
|
||||
|
||||
def test_exp(self):
|
||||
self._test('function f(){return 42 ** 2;}', 1764)
|
||||
self._test('function f(){return 42 ** undefined;}', NaN)
|
||||
self._test('function f(){return 42 ** null;}', 1)
|
||||
self._test('function f(){return undefined ** 0;}', 1)
|
||||
self._test('function f(){return undefined ** 42;}', NaN)
|
||||
self._test('function f(){return 42 ** "2";}', 1764)
|
||||
self._test('function f(){return 42 ** "spam";}', NaN)
|
||||
|
||||
def test_calc(self):
|
||||
self._test('function f(a){return 2*a+1;}', 7, args=[3])
|
||||
@ -89,7 +110,35 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f(){return 19 & 21;}', 17)
|
||||
self._test('function f(){return 11 >> 2;}', 2)
|
||||
self._test('function f(){return []? 2+3: 4;}', 5)
|
||||
# equality
|
||||
self._test('function f(){return 1 == 1}', True)
|
||||
self._test('function f(){return 1 == 1.0}', True)
|
||||
self._test('function f(){return 1 == "1"}', True)
|
||||
self._test('function f(){return 1 == 2}', False)
|
||||
self._test('function f(){return 1 != "1"}', False)
|
||||
self._test('function f(){return 1 != 2}', True)
|
||||
self._test('function f(){var x = {a: 1}; var y = x; return x == y}', True)
|
||||
self._test('function f(){var x = {a: 1}; return x == {a: 1}}', False)
|
||||
self._test('function f(){return NaN == NaN}', False)
|
||||
self._test('function f(){return null == undefined}', True)
|
||||
self._test('function f(){return "spam, eggs" == "spam, eggs"}', True)
|
||||
# strict equality
|
||||
self._test('function f(){return 1 === 1}', True)
|
||||
self._test('function f(){return 1 === 1.0}', True)
|
||||
self._test('function f(){return 1 === "1"}', False)
|
||||
self._test('function f(){return 1 === 2}', False)
|
||||
self._test('function f(){var x = {a: 1}; var y = x; return x === y}', True)
|
||||
self._test('function f(){var x = {a: 1}; return x === {a: 1}}', False)
|
||||
self._test('function f(){return NaN === NaN}', False)
|
||||
self._test('function f(){return null === undefined}', False)
|
||||
self._test('function f(){return null === null}', True)
|
||||
self._test('function f(){return undefined === undefined}', True)
|
||||
self._test('function f(){return "uninterned" === "uninterned"}', True)
|
||||
self._test('function f(){return 1 === 1}', True)
|
||||
self._test('function f(){return 1 === "1"}', False)
|
||||
self._test('function f(){return 1 !== 1}', False)
|
||||
self._test('function f(){return 1 !== "1"}', True)
|
||||
# expressions
|
||||
self._test('function f(){return 0 && 1 || 2;}', 2)
|
||||
self._test('function f(){return 0 ?? 42;}', 0)
|
||||
self._test('function f(){return "life, the universe and everything" < 42;}', False)
|
||||
@ -111,7 +160,6 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
|
||||
self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)
|
||||
|
||||
@unittest.skip('Not yet fully implemented')
|
||||
def test_comments(self):
|
||||
self._test('''
|
||||
function f() {
|
||||
@ -130,6 +178,15 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
}
|
||||
''', 3)
|
||||
|
||||
self._test('''
|
||||
function f() {
|
||||
var x = ( /* 1 + */ 2 +
|
||||
/* 30 * 40 */
|
||||
50);
|
||||
return x;
|
||||
}
|
||||
''', 52)
|
||||
|
||||
def test_precedence(self):
|
||||
self._test('''
|
||||
function f() {
|
||||
@ -296,7 +353,7 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
def test_undefined(self):
|
||||
self._test('function f() { return undefined === undefined; }', True)
|
||||
self._test('function f() { return undefined; }', JS_Undefined)
|
||||
self._test('function f() {return undefined ?? 42; }', 42)
|
||||
self._test('function f() { return undefined ?? 42; }', 42)
|
||||
self._test('function f() { let v; return v; }', JS_Undefined)
|
||||
self._test('function f() { let v; return v**0; }', 1)
|
||||
self._test('function f() { let v; return [v>42, v<=42, v&&42, 42&&v]; }',
|
||||
@ -337,6 +394,16 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f() { let a; return a?.qq; }', JS_Undefined)
|
||||
self._test('function f() { let a = {m1: 42, m2: 0 }; return a?.qq; }', JS_Undefined)
|
||||
|
||||
def test_indexing(self):
|
||||
self._test('function f() { return [1, 2, 3, 4][3]}', 4)
|
||||
self._test('function f() { return [1, [2, [3, [4]]]][1][1][1][0]}', 4)
|
||||
self._test('function f() { var o = {1: 2, 3: 4}; return o[3]}', 4)
|
||||
self._test('function f() { var o = {1: 2, 3: 4}; return o["3"]}', 4)
|
||||
self._test('function f() { return [1, [2, {3: [4]}]][1][1]["3"][0]}', 4)
|
||||
self._test('function f() { return [1, 2, 3, 4].length}', 4)
|
||||
self._test('function f() { var o = {1: 2, 3: 4}; return o.length}', JS_Undefined)
|
||||
self._test('function f() { var o = {1: 2, 3: 4}; o["length"] = 42; return o.length}', 42)
|
||||
|
||||
def test_regex(self):
|
||||
self._test('function f() { let a=/,,[/,913,/](,)}/; }', None)
|
||||
|
||||
@ -424,6 +491,13 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test(jsi, 't-e-s-t', args=[test_input, '-'])
|
||||
self._test(jsi, '', args=[[], '-'])
|
||||
|
||||
self._test('function f(){return '
|
||||
'[1, 1.0, "abc", {a: 1}, null, undefined, Infinity, NaN].join()}',
|
||||
'1,1,abc,[object Object],,,Infinity,NaN')
|
||||
self._test('function f(){return '
|
||||
'[1, 1.0, "abc", {a: 1}, null, undefined, Infinity, NaN].join("~")}',
|
||||
'1~1~abc~[object Object]~~~Infinity~NaN')
|
||||
|
||||
def test_split(self):
|
||||
test_result = list('test')
|
||||
tests = [
|
||||
@ -437,6 +511,16 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test(jsi, test_result, args=['t-e-s-t', '-'])
|
||||
self._test(jsi, [''], args=['', '-'])
|
||||
self._test(jsi, [], args=['', ''])
|
||||
# RegExp split
|
||||
self._test('function f(){return "test".split(/(?:)/)}',
|
||||
['t', 'e', 's', 't'])
|
||||
self._test('function f(){return "t-e-s-t".split(/[es-]+/)}',
|
||||
['t', 't'])
|
||||
# from MDN: surrogate pairs aren't handled: case 1 fails
|
||||
# self._test('function f(){return "😄😄".split(/(?:)/)}',
|
||||
# ['\ud83d', '\ude04', '\ud83d', '\ude04'])
|
||||
self._test('function f(){return "😄😄".split(/(?:)/u)}',
|
||||
['😄', '😄'])
|
||||
|
||||
def test_slice(self):
|
||||
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
|
||||
@ -466,6 +550,40 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f(){return "012345678".slice(-1, 1)}', '')
|
||||
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
|
||||
|
||||
def test_pop(self):
|
||||
# pop
|
||||
self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.pop(), a]}',
|
||||
[8, [0, 1, 2, 3, 4, 5, 6, 7]])
|
||||
self._test('function f(){return [].pop()}', JS_Undefined)
|
||||
# push
|
||||
self._test('function f(){var a = [0, 1, 2]; return [a.push(3, 4), a]}',
|
||||
[5, [0, 1, 2, 3, 4]])
|
||||
self._test('function f(){var a = [0, 1, 2]; return [a.push(), a]}',
|
||||
[3, [0, 1, 2]])
|
||||
|
||||
def test_shift(self):
|
||||
# shift
|
||||
self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.shift(), a]}',
|
||||
[0, [1, 2, 3, 4, 5, 6, 7, 8]])
|
||||
self._test('function f(){return [].shift()}', JS_Undefined)
|
||||
# unshift
|
||||
self._test('function f(){var a = [0, 1, 2]; return [a.unshift(3, 4), a]}',
|
||||
[5, [3, 4, 0, 1, 2]])
|
||||
self._test('function f(){var a = [0, 1, 2]; return [a.unshift(), a]}',
|
||||
[3, [0, 1, 2]])
|
||||
|
||||
def test_forEach(self):
|
||||
self._test('function f(){var ret = []; var l = [4, 2]; '
|
||||
'var log = function(e,i,a){ret.push([e,i,a]);}; '
|
||||
'l.forEach(log); '
|
||||
'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
|
||||
[2, 4, 1, [4, 2]])
|
||||
self._test('function f(){var ret = []; var l = [4, 2]; '
|
||||
'var log = function(e,i,a){this.push([e,i,a]);}; '
|
||||
'l.forEach(log, ret); '
|
||||
'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
|
||||
[2, 4, 1, [4, 2]])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
@ -275,7 +276,8 @@ def signature(jscode, sig_input):
|
||||
|
||||
def n_sig(jscode, sig_input):
|
||||
funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
|
||||
return JSInterpreter(jscode).call_function(funcname, sig_input)
|
||||
return JSInterpreter(jscode).call_function(
|
||||
funcname, sig_input, _ytdl_do_not_return=sig_input)
|
||||
|
||||
|
||||
make_sig_test = t_factory(
|
||||
|
@ -3170,7 +3170,7 @@ class InfoExtractor(object):
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
# of jwplayer.flash.swf
|
||||
rtmp_url_parts = re.split(
|
||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||
r'((?:mp4|mp3|flv):)', source_url, maxsplit=1)
|
||||
if len(rtmp_url_parts) == 3:
|
||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||
a_format.update({
|
||||
|
@ -1740,7 +1740,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def extract_nsig(s):
|
||||
try:
|
||||
ret = func([s])
|
||||
ret = func([s], kwargs={'_ytdl_do_not_return': s})
|
||||
except JSInterpreter.Exception:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
@ -10,6 +11,7 @@ from functools import update_wrapper, wraps
|
||||
from .utils import (
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
js_to_json,
|
||||
remove_quotes,
|
||||
unified_timestamp,
|
||||
@ -20,6 +22,7 @@ from .compat import (
|
||||
compat_basestring,
|
||||
compat_chr,
|
||||
compat_collections_chain_map as ChainMap,
|
||||
compat_contextlib_suppress,
|
||||
compat_filter as filter,
|
||||
compat_itertools_zip_longest as zip_longest,
|
||||
compat_map as map,
|
||||
@ -63,6 +66,10 @@ _NaN = float('nan')
|
||||
_Infinity = float('inf')
|
||||
|
||||
|
||||
class JS_Undefined(object):
|
||||
pass
|
||||
|
||||
|
||||
def _js_bit_op(op):
|
||||
|
||||
def zeroise(x):
|
||||
@ -75,43 +82,114 @@ def _js_bit_op(op):
|
||||
return wrapped
|
||||
|
||||
|
||||
def _js_arith_op(op):
|
||||
def _js_arith_op(op, div=False):
|
||||
|
||||
@wraps_op(op)
|
||||
def wrapped(a, b):
|
||||
if JS_Undefined in (a, b):
|
||||
return _NaN
|
||||
return op(a or 0, b or 0)
|
||||
# null, "" --> 0
|
||||
a, b = (float_or_none(
|
||||
(x.strip() if isinstance(x, compat_basestring) else x) or 0,
|
||||
default=_NaN) for x in (a, b))
|
||||
if _NaN in (a, b):
|
||||
return _NaN
|
||||
try:
|
||||
return op(a, b)
|
||||
except ZeroDivisionError:
|
||||
return _NaN if not (div and (a or b)) else _Infinity
|
||||
|
||||
return wrapped
|
||||
|
||||
|
||||
def _js_div(a, b):
|
||||
if JS_Undefined in (a, b) or not (a or b):
|
||||
return _NaN
|
||||
return operator.truediv(a or 0, b) if b else _Infinity
|
||||
_js_arith_add = _js_arith_op(operator.add)
|
||||
|
||||
|
||||
def _js_mod(a, b):
|
||||
if JS_Undefined in (a, b) or not b:
|
||||
return _NaN
|
||||
return (a or 0) % b
|
||||
def _js_add(a, b):
|
||||
if not (isinstance(a, compat_basestring) or isinstance(b, compat_basestring)):
|
||||
return _js_arith_add(a, b)
|
||||
if not isinstance(a, compat_basestring):
|
||||
a = _js_toString(a)
|
||||
elif not isinstance(b, compat_basestring):
|
||||
b = _js_toString(b)
|
||||
return operator.concat(a, b)
|
||||
|
||||
|
||||
_js_mod = _js_arith_op(operator.mod)
|
||||
__js_exp = _js_arith_op(operator.pow)
|
||||
|
||||
|
||||
def _js_exp(a, b):
|
||||
if not b:
|
||||
return 1 # even 0 ** 0 !!
|
||||
elif JS_Undefined in (a, b):
|
||||
return _NaN
|
||||
return (a or 0) ** b
|
||||
return __js_exp(a, b)
|
||||
|
||||
|
||||
def _js_eq_op(op):
|
||||
def _js_to_primitive(v):
|
||||
return (
|
||||
','.join(map(_js_toString, v)) if isinstance(v, list)
|
||||
else '[object Object]' if isinstance(v, dict)
|
||||
else compat_str(v) if not isinstance(v, (
|
||||
compat_numeric_types, compat_basestring))
|
||||
else v
|
||||
)
|
||||
|
||||
|
||||
def _js_toString(v):
|
||||
return (
|
||||
'undefined' if v is JS_Undefined
|
||||
else 'Infinity' if v == _Infinity
|
||||
else 'NaN' if v is _NaN
|
||||
else 'null' if v is None
|
||||
# bool <= int: do this first
|
||||
else ('false', 'true')[v] if isinstance(v, bool)
|
||||
else '{0:.7f}'.format(v).rstrip('.0') if isinstance(v, compat_numeric_types)
|
||||
else _js_to_primitive(v))
|
||||
|
||||
|
||||
_nullish = frozenset((None, JS_Undefined))
|
||||
|
||||
|
||||
def _js_eq(a, b):
|
||||
# NaN != any
|
||||
if _NaN in (a, b):
|
||||
return False
|
||||
# Object is Object
|
||||
if isinstance(a, type(b)) and isinstance(b, (dict, list)):
|
||||
return operator.is_(a, b)
|
||||
# general case
|
||||
if a == b:
|
||||
return True
|
||||
# null == undefined
|
||||
a_b = set((a, b))
|
||||
if a_b & _nullish:
|
||||
return a_b <= _nullish
|
||||
a, b = _js_to_primitive(a), _js_to_primitive(b)
|
||||
if not isinstance(a, compat_basestring):
|
||||
a, b = b, a
|
||||
# Number to String: convert the string to a number
|
||||
# Conversion failure results in ... false
|
||||
if isinstance(a, compat_basestring):
|
||||
return float_or_none(a) == b
|
||||
return a == b
|
||||
|
||||
|
||||
def _js_neq(a, b):
|
||||
return not _js_eq(a, b)
|
||||
|
||||
|
||||
def _js_id_op(op):
|
||||
|
||||
@wraps_op(op)
|
||||
def wrapped(a, b):
|
||||
if set((a, b)) <= set((None, JS_Undefined)):
|
||||
return op(a, a)
|
||||
if _NaN in (a, b):
|
||||
return op(_NaN, None)
|
||||
if not isinstance(a, (compat_basestring, compat_numeric_types)):
|
||||
a, b = b, a
|
||||
# strings are === if ==
|
||||
# why 'a' is not 'a': https://stackoverflow.com/a/1504848
|
||||
if isinstance(a, (compat_basestring, compat_numeric_types)):
|
||||
return a == b if op(0, 0) else a != b
|
||||
return op(a, b)
|
||||
|
||||
return wrapped
|
||||
@ -150,8 +228,8 @@ def _js_unary_op(op):
|
||||
|
||||
# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/typeof
|
||||
def _js_typeof(expr):
|
||||
try:
|
||||
result = {
|
||||
with compat_contextlib_suppress(TypeError, KeyError):
|
||||
return {
|
||||
JS_Undefined: 'undefined',
|
||||
_NaN: 'number',
|
||||
_Infinity: 'number',
|
||||
@ -159,21 +237,16 @@ def _js_typeof(expr):
|
||||
False: 'boolean',
|
||||
None: 'object',
|
||||
}[expr]
|
||||
except (TypeError, KeyError):
|
||||
result = None
|
||||
if result is None:
|
||||
for t, n in (
|
||||
(compat_basestring, 'string'),
|
||||
(compat_numeric_types, 'number'),
|
||||
):
|
||||
if isinstance(expr, t):
|
||||
result = n
|
||||
break
|
||||
else:
|
||||
if callable(expr):
|
||||
result = 'function'
|
||||
for t, n in (
|
||||
(compat_basestring, 'string'),
|
||||
(compat_numeric_types, 'number'),
|
||||
):
|
||||
if isinstance(expr, t):
|
||||
return n
|
||||
if callable(expr):
|
||||
return 'function'
|
||||
# TODO: Symbol, BigInt
|
||||
return 'object' if result is None else result
|
||||
return 'object'
|
||||
|
||||
|
||||
# (op, definition) in order of binding priority, tightest first
|
||||
@ -182,19 +255,19 @@ def _js_typeof(expr):
|
||||
_OPERATORS = (
|
||||
('>>', _js_bit_op(operator.rshift)),
|
||||
('<<', _js_bit_op(operator.lshift)),
|
||||
('+', _js_arith_op(operator.add)),
|
||||
('+', _js_add),
|
||||
('-', _js_arith_op(operator.sub)),
|
||||
('*', _js_arith_op(operator.mul)),
|
||||
('%', _js_mod),
|
||||
('/', _js_div),
|
||||
('/', _js_arith_op(operator.truediv, div=True)),
|
||||
('**', _js_exp),
|
||||
)
|
||||
|
||||
_COMP_OPERATORS = (
|
||||
('===', operator.is_),
|
||||
('!==', operator.is_not),
|
||||
('==', _js_eq_op(operator.eq)),
|
||||
('!=', _js_eq_op(operator.ne)),
|
||||
('===', _js_id_op(operator.is_)),
|
||||
('!==', _js_id_op(operator.is_not)),
|
||||
('==', _js_eq),
|
||||
('!=', _js_neq),
|
||||
('<=', _js_comp_op(operator.le)),
|
||||
('>=', _js_comp_op(operator.ge)),
|
||||
('<', _js_comp_op(operator.lt)),
|
||||
@ -226,10 +299,6 @@ _MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
|
||||
_QUOTES = '\'"/'
|
||||
|
||||
|
||||
class JS_Undefined(object):
|
||||
pass
|
||||
|
||||
|
||||
class JS_Break(ExtractorError):
|
||||
def __init__(self):
|
||||
ExtractorError.__init__(self, 'Invalid break')
|
||||
@ -299,7 +368,7 @@ class Debugger(object):
|
||||
raise
|
||||
if cls.ENABLED and stmt.strip():
|
||||
if should_ret or repr(ret) != stmt:
|
||||
cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
|
||||
cls.write(['->', '=>'][bool(should_ret)], repr(ret), '<-|', stmt, level=allow_recursion)
|
||||
return ret, should_ret
|
||||
return interpret_statement
|
||||
|
||||
@ -328,6 +397,9 @@ class JSInterpreter(object):
|
||||
RE_FLAGS = {
|
||||
# special knowledge: Python's re flags are bitmask values, current max 128
|
||||
# invent new bitmask values well above that for literal parsing
|
||||
# JS 'u' flag is effectively always set (surrogate pairs aren't seen),
|
||||
# but \u{...} and \p{...} escapes aren't handled); no additional JS 'v'
|
||||
# features are supported
|
||||
# TODO: execute matches with these flags (remaining: d, y)
|
||||
'd': 1024, # Generate indices for substring matches
|
||||
'g': 2048, # Global search
|
||||
@ -335,6 +407,7 @@ class JSInterpreter(object):
|
||||
'm': re.M, # Multi-line search
|
||||
's': re.S, # Allows . to match newline characters
|
||||
'u': re.U, # Treat a pattern as a sequence of unicode code points
|
||||
'v': re.U, # Like 'u' with extended character class and \p{} syntax
|
||||
'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string
|
||||
}
|
||||
|
||||
@ -415,9 +488,18 @@ class JSInterpreter(object):
|
||||
skipping = 0
|
||||
if skip_delims:
|
||||
skip_delims = variadic(skip_delims)
|
||||
skip_txt = None
|
||||
for idx, char in enumerate(expr):
|
||||
if skip_txt and idx <= skip_txt[1]:
|
||||
continue
|
||||
paren_delta = 0
|
||||
if not in_quote:
|
||||
if char == '/' and expr[idx:idx + 2] == '/*':
|
||||
# skip a comment
|
||||
skip_txt = expr[idx:].find('*/', 2)
|
||||
skip_txt = [idx, idx + skip_txt + 1] if skip_txt >= 2 else None
|
||||
if skip_txt:
|
||||
continue
|
||||
if char in _MATCHING_PARENS:
|
||||
counters[_MATCHING_PARENS[char]] += 1
|
||||
paren_delta = 1
|
||||
@ -450,12 +532,19 @@ class JSInterpreter(object):
|
||||
if pos < delim_len:
|
||||
pos += 1
|
||||
continue
|
||||
yield expr[start: idx - delim_len]
|
||||
if skip_txt and skip_txt[0] >= start and skip_txt[1] <= idx - delim_len:
|
||||
yield expr[start:skip_txt[0]] + expr[skip_txt[1] + 1: idx - delim_len]
|
||||
else:
|
||||
yield expr[start: idx - delim_len]
|
||||
skip_txt = None
|
||||
start, pos = idx + 1, 0
|
||||
splits += 1
|
||||
if max_split and splits >= max_split:
|
||||
break
|
||||
yield expr[start:]
|
||||
if skip_txt and skip_txt[0] >= start:
|
||||
yield expr[start:skip_txt[0]] + expr[skip_txt[1] + 1:]
|
||||
else:
|
||||
yield expr[start:]
|
||||
|
||||
@classmethod
|
||||
def _separate_at_paren(cls, expr, delim=None):
|
||||
@ -495,13 +584,14 @@ class JSInterpreter(object):
|
||||
except Exception as e:
|
||||
raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
|
||||
|
||||
def _index(self, obj, idx, allow_undefined=False):
|
||||
if idx == 'length':
|
||||
def _index(self, obj, idx, allow_undefined=True):
|
||||
if idx == 'length' and isinstance(obj, list):
|
||||
return len(obj)
|
||||
try:
|
||||
return obj[int(idx)] if isinstance(obj, list) else obj[idx]
|
||||
except Exception as e:
|
||||
return obj[int(idx)] if isinstance(obj, list) else obj[compat_str(idx)]
|
||||
except (TypeError, KeyError, IndexError) as e:
|
||||
if allow_undefined:
|
||||
# when is not allowed?
|
||||
return JS_Undefined
|
||||
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
|
||||
|
||||
@ -513,7 +603,7 @@ class JSInterpreter(object):
|
||||
|
||||
# used below
|
||||
_VAR_RET_THROW_RE = re.compile(r'''(?x)
|
||||
(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["'])|$)|(?P<throw>throw\s+)
|
||||
(?:(?P<var>var|const|let)\s+|(?P<ret>return)(?:\s+|(?=["'])|$)|(?P<throw>throw)\s+)
|
||||
''')
|
||||
_COMPOUND_RE = re.compile(r'''(?x)
|
||||
(?P<try>try)\s*\{|
|
||||
@ -593,7 +683,7 @@ class JSInterpreter(object):
|
||||
expr = stmt[len(m.group(0)):].strip()
|
||||
if m.group('throw'):
|
||||
raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion))
|
||||
should_return = not m.group('var')
|
||||
should_return = 'return' if m.group('ret') else False
|
||||
if not expr:
|
||||
return None, should_return
|
||||
|
||||
@ -818,7 +908,7 @@ class JSInterpreter(object):
|
||||
start, end = m.span()
|
||||
sign = m.group('pre_sign') or m.group('post_sign')
|
||||
ret = local_vars[var]
|
||||
local_vars[var] += 1 if sign[0] == '+' else -1
|
||||
local_vars[var] = _js_add(ret, 1 if sign[0] == '+' else -1)
|
||||
if m.group('pre_sign'):
|
||||
ret = local_vars[var]
|
||||
expr = expr[:start] + self._dump(ret, local_vars) + expr[end:]
|
||||
@ -828,13 +918,13 @@ class JSInterpreter(object):
|
||||
|
||||
m = re.match(r'''(?x)
|
||||
(?P<assign>
|
||||
(?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s*
|
||||
(?P<out>{_NAME_RE})(?:\[(?P<out_idx>(?:.+?\]\s*\[)*.+?)\])?\s*
|
||||
(?P<op>{_OPERATOR_RE})?
|
||||
=(?!=)(?P<expr>.*)$
|
||||
)|(?P<return>
|
||||
(?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
|
||||
)|(?P<indexing>
|
||||
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
|
||||
(?P<in>{_NAME_RE})\[(?P<in_idx>(?:.+?\]\s*\[)*.+?)\]$
|
||||
)|(?P<attribute>
|
||||
(?P<var>{_NAME_RE})(?:(?P<nullish>\?)?\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
|
||||
)|(?P<function>
|
||||
@ -844,19 +934,23 @@ class JSInterpreter(object):
|
||||
if md.get('assign'):
|
||||
left_val = local_vars.get(m.group('out'))
|
||||
|
||||
if not m.group('index'):
|
||||
if not m.group('out_idx'):
|
||||
local_vars[m.group('out')] = self._operator(
|
||||
m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
|
||||
return local_vars[m.group('out')], should_return
|
||||
elif left_val in (None, JS_Undefined):
|
||||
raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)
|
||||
|
||||
idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
|
||||
if not isinstance(idx, (int, float)):
|
||||
raise self.Exception('List index %s must be integer' % (idx, ), expr=expr)
|
||||
idx = int(idx)
|
||||
indexes = re.split(r'\]\s*\[', m.group('out_idx'))
|
||||
for i, idx in enumerate(indexes, 1):
|
||||
idx = self.interpret_expression(idx, local_vars, allow_recursion)
|
||||
if i < len(indexes):
|
||||
left_val = self._index(left_val, idx)
|
||||
if isinstance(idx, float):
|
||||
idx = int(idx)
|
||||
left_val[idx] = self._operator(
|
||||
m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion)
|
||||
m.group('op'), self._index(left_val, idx) if m.group('op') else None,
|
||||
m.group('expr'), expr, local_vars, allow_recursion)
|
||||
return left_val[idx], should_return
|
||||
|
||||
elif expr.isdigit():
|
||||
@ -874,19 +968,27 @@ class JSInterpreter(object):
|
||||
return _Infinity, should_return
|
||||
|
||||
elif md.get('return'):
|
||||
return local_vars[m.group('name')], should_return
|
||||
ret = local_vars[m.group('name')]
|
||||
# challenge may try to force returning the original value
|
||||
# use an optional internal var to block this
|
||||
if should_return == 'return':
|
||||
if '_ytdl_do_not_return' not in local_vars:
|
||||
return ret, True
|
||||
return (ret, True) if ret != local_vars['_ytdl_do_not_return'] else (ret, False)
|
||||
else:
|
||||
return ret, should_return
|
||||
|
||||
try:
|
||||
with compat_contextlib_suppress(ValueError):
|
||||
ret = json.loads(js_to_json(expr)) # strict=True)
|
||||
if not md.get('attribute'):
|
||||
return ret, should_return
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if md.get('indexing'):
|
||||
val = local_vars[m.group('in')]
|
||||
idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion)
|
||||
return self._index(val, idx), should_return
|
||||
for idx in re.split(r'\]\s*\[', m.group('in_idx')):
|
||||
idx = self.interpret_expression(idx, local_vars, allow_recursion)
|
||||
val = self._index(val, idx)
|
||||
return val, should_return
|
||||
|
||||
op_result = self.handle_operators(expr, local_vars, allow_recursion)
|
||||
if op_result:
|
||||
@ -935,7 +1037,7 @@ class JSInterpreter(object):
|
||||
|
||||
# Member access
|
||||
if arg_str is None:
|
||||
return self._index(obj, member, nullish)
|
||||
return self._index(obj, member)
|
||||
|
||||
# Function call
|
||||
argvals = [
|
||||
@ -962,7 +1064,7 @@ class JSInterpreter(object):
|
||||
if obj is compat_str:
|
||||
if member == 'fromCharCode':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
return ''.join(map(compat_chr, argvals))
|
||||
return ''.join(compat_chr(int(n)) for n in argvals)
|
||||
raise self.Exception('Unsupported string method ' + member, expr=expr)
|
||||
elif obj is float:
|
||||
if member == 'pow':
|
||||
@ -971,13 +1073,47 @@ class JSInterpreter(object):
|
||||
raise self.Exception('Unsupported Math method ' + member, expr=expr)
|
||||
|
||||
if member == 'split':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
assertion(len(argvals) == 1, 'with limit argument is not implemented')
|
||||
return obj.split(argvals[0]) if argvals[0] else list(obj)
|
||||
assertion(len(argvals) <= 2, 'takes at most two arguments')
|
||||
if len(argvals) > 1:
|
||||
limit = argvals[1]
|
||||
assertion(isinstance(limit, int) and limit >= 0, 'integer limit >= 0')
|
||||
if limit == 0:
|
||||
return []
|
||||
else:
|
||||
limit = 0
|
||||
if len(argvals) == 0:
|
||||
argvals = [JS_Undefined]
|
||||
elif isinstance(argvals[0], self.JS_RegExp):
|
||||
# avoid re.split(), similar but not enough
|
||||
|
||||
def where():
|
||||
for m in argvals[0].finditer(obj):
|
||||
yield m.span(0)
|
||||
yield (None, None)
|
||||
|
||||
def splits(limit=limit):
|
||||
i = 0
|
||||
for j, jj in where():
|
||||
if j == jj == 0:
|
||||
continue
|
||||
if j is None and i >= len(obj):
|
||||
break
|
||||
yield obj[i:j]
|
||||
if jj is None or limit == 1:
|
||||
break
|
||||
limit -= 1
|
||||
i = jj
|
||||
|
||||
return list(splits())
|
||||
return (
|
||||
obj.split(argvals[0], limit - 1) if argvals[0] and argvals[0] != JS_Undefined
|
||||
else list(obj)[:limit or None])
|
||||
elif member == 'join':
|
||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||
assertion(len(argvals) == 1, 'takes exactly one argument')
|
||||
return argvals[0].join(obj)
|
||||
assertion(len(argvals) <= 1, 'takes at most one argument')
|
||||
return (',' if len(argvals) == 0 else argvals[0]).join(
|
||||
('' if x in (None, JS_Undefined) else _js_toString(x))
|
||||
for x in obj)
|
||||
elif member == 'reverse':
|
||||
assertion(not argvals, 'does not take any arguments')
|
||||
obj.reverse()
|
||||
@ -999,37 +1135,31 @@ class JSInterpreter(object):
|
||||
index, how_many = map(int, (argvals + [len(obj)])[:2])
|
||||
if index < 0:
|
||||
index += len(obj)
|
||||
add_items = argvals[2:]
|
||||
res = []
|
||||
for _ in range(index, min(index + how_many, len(obj))):
|
||||
res.append(obj.pop(index))
|
||||
for i, item in enumerate(add_items):
|
||||
obj.insert(index + i, item)
|
||||
res = [obj.pop(index)
|
||||
for _ in range(index, min(index + how_many, len(obj)))]
|
||||
obj[index:index] = argvals[2:]
|
||||
return res
|
||||
elif member == 'unshift':
|
||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
for item in reversed(argvals):
|
||||
obj.insert(0, item)
|
||||
return obj
|
||||
elif member == 'pop':
|
||||
elif member in ('shift', 'pop'):
|
||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||
assertion(not argvals, 'does not take any arguments')
|
||||
if not obj:
|
||||
return
|
||||
return obj.pop()
|
||||
return obj.pop(0 if member == 'shift' else -1) if len(obj) > 0 else JS_Undefined
|
||||
elif member == 'unshift':
|
||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||
# not enforced: assertion(argvals, 'takes one or more arguments')
|
||||
obj[0:0] = argvals
|
||||
return len(obj)
|
||||
elif member == 'push':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
# not enforced: assertion(argvals, 'takes one or more arguments')
|
||||
obj.extend(argvals)
|
||||
return obj
|
||||
return len(obj)
|
||||
elif member == 'forEach':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
|
||||
assertion(len(argvals) <= 2, 'takes at most 2 arguments')
|
||||
f, this = (argvals + [''])[:2]
|
||||
return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)]
|
||||
elif member == 'indexOf':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
|
||||
assertion(len(argvals) <= 2, 'takes at most 2 arguments')
|
||||
idx, start = (argvals + [0])[:2]
|
||||
try:
|
||||
return obj.index(idx, start)
|
||||
@ -1038,7 +1168,7 @@ class JSInterpreter(object):
|
||||
elif member == 'charCodeAt':
|
||||
assertion(isinstance(obj, compat_str), 'must be applied on a string')
|
||||
# assertion(len(argvals) == 1, 'takes exactly one argument') # but not enforced
|
||||
idx = argvals[0] if isinstance(argvals[0], int) else 0
|
||||
idx = argvals[0] if len(argvals) > 0 and isinstance(argvals[0], int) else 0
|
||||
if idx >= len(obj):
|
||||
return None
|
||||
return ord(obj[idx])
|
||||
@ -1089,7 +1219,7 @@ class JSInterpreter(object):
|
||||
yield self.interpret_expression(v, local_vars, allow_recursion)
|
||||
|
||||
def extract_object(self, objname):
|
||||
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
||||
_FUNC_NAME_RE = r'''(?:{n}|"{n}"|'{n}')'''.format(n=_NAME_RE)
|
||||
obj = {}
|
||||
fields = next(filter(None, (
|
||||
obj_m.group('fields') for obj_m in re.finditer(
|
||||
@ -1148,6 +1278,7 @@ class JSInterpreter(object):
|
||||
|
||||
def extract_function_from_code(self, argnames, code, *global_stack):
|
||||
local_vars = {}
|
||||
|
||||
while True:
|
||||
mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code)
|
||||
if mobj is None:
|
||||
@ -1158,10 +1289,11 @@ class JSInterpreter(object):
|
||||
[x.strip() for x in mobj.group('args').split(',')],
|
||||
body, local_vars, *global_stack))
|
||||
code = code[:start] + name + remaining
|
||||
|
||||
return self.build_function(argnames, code, local_vars, *global_stack)
|
||||
|
||||
def call_function(self, funcname, *args):
|
||||
return self.extract_function(funcname)(args)
|
||||
def call_function(self, funcname, *args, **kw_global_vars):
|
||||
return self.extract_function(funcname)(args, kw_global_vars)
|
||||
|
||||
@classmethod
|
||||
def build_arglist(cls, arg_text):
|
||||
@ -1180,8 +1312,9 @@ class JSInterpreter(object):
|
||||
global_stack = list(global_stack) or [{}]
|
||||
argnames = tuple(argnames)
|
||||
|
||||
def resf(args, kwargs={}, allow_recursion=100):
|
||||
global_stack[0].update(zip_longest(argnames, args, fillvalue=None))
|
||||
def resf(args, kwargs=None, allow_recursion=100):
|
||||
kwargs = kwargs or {}
|
||||
global_stack[0].update(zip_longest(argnames, args, fillvalue=JS_Undefined))
|
||||
global_stack[0].update(kwargs)
|
||||
var_stack = LocalNameSpace(*global_stack)
|
||||
ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
|
||||
|
Loading…
Reference in New Issue
Block a user