From e02b214fdff4b41719ac25737c3e928db1e6f3fe Mon Sep 17 00:00:00 2001 From: dirkf Date: Sat, 7 Dec 2024 03:37:39 +0000 Subject: [PATCH] [jsinterp] Implement `typeof` operator --- test/test_jsinterp.py | 15 +++- youtube_dl/jsinterp.py | 156 ++++++++++++++++++++++++++++------------- 2 files changed, 121 insertions(+), 50 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index c7a4f2cbf..d063bbd36 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -266,7 +266,20 @@ class TestJSInterpreter(unittest.TestCase): self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5) def test_void(self): - self._test('function f() { return void 42; }', None) + self._test('function f() { return void 42; }', JS_Undefined) + + def test_typeof(self): + self._test('function f() { return typeof undefined; }', 'undefined') + self._test('function f() { return typeof NaN; }', 'number') + self._test('function f() { return typeof Infinity; }', 'number') + self._test('function f() { return typeof true; }', 'boolean') + self._test('function f() { return typeof null; }', 'object') + self._test('function f() { return typeof "a string"; }', 'string') + self._test('function f() { return typeof 42; }', 'number') + self._test('function f() { return typeof 42.42; }', 'number') + self._test('function f() { var g = function(){}; return typeof g; }', 'function') + self._test('function f() { return typeof {key: "value"}; }', 'object') + # not yet implemented: Symbol, BigInt def test_return_function(self): jsi = JSInterpreter(''' diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index a616ad070..2e6a3f56b 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -5,7 +5,7 @@ import json import operator import re -from functools import update_wrapper +from functools import update_wrapper, wraps from .utils import ( error_to_compat_str, @@ -23,6 +23,7 @@ from .compat import ( compat_filter as filter, compat_itertools_zip_longest as zip_longest, compat_map as map, + compat_numeric_types, compat_str, ) @@ -138,6 +139,43 @@ def _js_ternary(cndn, if_true=True, if_false=False): return if_true +def _js_unary_op(op): + + @wraps_op(op) + def wrapped(_, a): + return op(a) + + return wrapped + + +# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/typeof +def _js_typeof(expr): + try: + result = { + JS_Undefined: 'undefined', + _NaN: 'number', + _Infinity: 'number', + True: 'boolean', + False: 'boolean', + None: 'object', + }[expr] + except (TypeError, KeyError): + result = None + if result is None: + for t, n in ( + (compat_basestring, 'string'), + (compat_numeric_types, 'number'), + ): + if isinstance(expr, t): + result = n + break + else: + if callable(expr): + result = 'function' + # TODO: Symbol, BigInt + return 'object' if result is None else result + + # (op, definition) in order of binding priority, tightest first # avoid dict to maintain order # definition None => Defined in JSInterpreter._operator @@ -176,6 +214,11 @@ _SC_OPERATORS = ( ('&&', None), ) +_UNARY_OPERATORS_X = ( + ('void', _js_unary_op(lambda _: JS_Undefined)), + ('typeof', _js_unary_op(_js_typeof)), +) + _OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS)) _NAME_RE = r'[a-zA-Z_$][\w$]*' @@ -242,6 +285,7 @@ class Debugger(object): @classmethod def wrap_interpreter(cls, f): + @wraps(f) def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs): if cls.ENABLED and stmt.strip(): cls.write(stmt, level=allow_recursion) @@ -347,6 +391,8 @@ class JSInterpreter(object): def __op_chars(cls): op_chars = set(';,[') for op in cls._all_operators(): + if op[0].isalpha(): + continue op_chars.update(op[0]) return op_chars @@ -425,7 +471,7 @@ class JSInterpreter(object): if not _cached: _cached.extend(itertools.chain( # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence - _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS)) + _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS, _UNARY_OPERATORS_X)) return _cached def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion): @@ -479,6 +525,52 @@ class JSInterpreter(object): _FINALLY_RE = re.compile(r'finally\s*\{') _SWITCH_RE = re.compile(r'switch\s*\(') + def handle_operators(self, expr, local_vars, allow_recursion): + + for op, _ in self._all_operators(): + # hackety: have higher priority than <>, but don't confuse them + skip_delim = (op + op) if op in '<>*?' else None + if op == '?': + skip_delim = (skip_delim, '?.') + separated = list(self._separate(expr, op, skip_delims=skip_delim)) + if len(separated) < 2: + continue + + right_expr = separated.pop() + # handle operators that are both unary and binary, minimal BODMAS + if op in ('+', '-'): + # simplify/adjust consecutive instances of these operators + undone = 0 + separated = [s.strip() for s in separated] + while len(separated) > 1 and not separated[-1]: + undone += 1 + separated.pop() + if op == '-' and undone % 2 != 0: + right_expr = op + right_expr + elif op == '+': + while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS: + right_expr = separated.pop() + right_expr + if separated[-1][-1:] in self.OP_CHARS: + right_expr = separated.pop() + right_expr + # hanging op at end of left => unary + (strip) or - (push right) + left_val = separated[-1] if separated else '' + for dm_op in ('*', '%', '/', '**'): + bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim)) + if len(bodmas) > 1 and not bodmas[-1].strip(): + expr = op.join(separated) + op + right_expr + if len(separated) > 1: + separated.pop() + right_expr = op.join((left_val, right_expr)) + else: + separated = [op.join((left_val, right_expr))] + right_expr = None + break + if right_expr is None: + continue + + left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion) + return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), True + @Debugger.wrap_interpreter def interpret_statement(self, stmt, local_vars, allow_recursion=100): if allow_recursion < 0: @@ -533,9 +625,15 @@ class JSInterpreter(object): else: raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr) - if expr.startswith('void '): - left = self.interpret_expression(expr[5:], local_vars, allow_recursion) - return None, should_return + for op, _ in _UNARY_OPERATORS_X: + if not expr.startswith(op): + continue + operand = expr[len(op):] + if not operand or operand[0] != ' ': + continue + op_result = self.handle_operators(expr, local_vars, allow_recursion) + if op_result: + return op_result[0], should_return if expr.startswith('{'): inner, outer = self._separate_at_paren(expr) @@ -582,7 +680,7 @@ class JSInterpreter(object): if_expr, expr = self._separate_at_paren(expr) else: # may lose ... else ... because of ll.368-374 - if_expr, expr = self._separate_at_paren(expr, delim=';') + if_expr, expr = self._separate_at_paren(' %s;' % (expr,), delim=';') else_expr = None m = re.match(r'else\s*(?P\{)?', expr) if m: @@ -790,49 +888,9 @@ class JSInterpreter(object): idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion) return self._index(val, idx), should_return - for op, _ in self._all_operators(): - # hackety: have higher priority than <>, but don't confuse them - skip_delim = (op + op) if op in '<>*?' else None - if op == '?': - skip_delim = (skip_delim, '?.') - separated = list(self._separate(expr, op, skip_delims=skip_delim)) - if len(separated) < 2: - continue - - right_expr = separated.pop() - # handle operators that are both unary and binary, minimal BODMAS - if op in ('+', '-'): - # simplify/adjust consecutive instances of these operators - undone = 0 - separated = [s.strip() for s in separated] - while len(separated) > 1 and not separated[-1]: - undone += 1 - separated.pop() - if op == '-' and undone % 2 != 0: - right_expr = op + right_expr - elif op == '+': - while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS: - right_expr = separated.pop() + right_expr - if separated[-1][-1:] in self.OP_CHARS: - right_expr = separated.pop() + right_expr - # hanging op at end of left => unary + (strip) or - (push right) - left_val = separated[-1] if separated else '' - for dm_op in ('*', '%', '/', '**'): - bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim)) - if len(bodmas) > 1 and not bodmas[-1].strip(): - expr = op.join(separated) + op + right_expr - if len(separated) > 1: - separated.pop() - right_expr = op.join((left_val, right_expr)) - else: - separated = [op.join((left_val, right_expr))] - right_expr = None - break - if right_expr is None: - continue - - left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion) - return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return + op_result = self.handle_operators(expr, local_vars, allow_recursion) + if op_result: + return op_result[0], should_return if md.get('attribute'): variable, member, nullish = m.group('var', 'member', 'nullish')