Compare commits

..

No commits in common. "d1c6c5c4d618fa950813c0c71aede34a5ac851e9" and "211cbfd5d46025a8e4d8f9f3d424aaada4698974" have entirely different histories.

5 changed files with 64 additions and 155 deletions

View File

@ -18,7 +18,6 @@ class TestJSInterpreter(unittest.TestCase):
def test_basic(self): def test_basic(self):
jsi = JSInterpreter('function x(){;}') jsi = JSInterpreter('function x(){;}')
self.assertEqual(jsi.call_function('x'), None) self.assertEqual(jsi.call_function('x'), None)
self.assertEqual(repr(jsi.extract_function('x')), 'F<x>')
jsi = JSInterpreter('function x3(){return 42;}') jsi = JSInterpreter('function x3(){return 42;}')
self.assertEqual(jsi.call_function('x3'), 42) self.assertEqual(jsi.call_function('x3'), 42)
@ -506,16 +505,6 @@ class TestJSInterpreter(unittest.TestCase):
jsi = JSInterpreter('function x(){return 1236566549 << 5}') jsi = JSInterpreter('function x(){return 1236566549 << 5}')
self.assertEqual(jsi.call_function('x'), 915423904) self.assertEqual(jsi.call_function('x'), 915423904)
def test_bitwise_operators_madness(self):
jsi = JSInterpreter('function x(){return null << 5}')
self.assertEqual(jsi.call_function('x'), 0)
jsi = JSInterpreter('function x(){return undefined >> 5}')
self.assertEqual(jsi.call_function('x'), 0)
jsi = JSInterpreter('function x(){return 42 << NaN}')
self.assertEqual(jsi.call_function('x'), 42)
def test_32066(self): def test_32066(self):
jsi = JSInterpreter("function x(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}") jsi = JSInterpreter("function x(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}")
self.assertEqual(jsi.call_function('x'), 70) self.assertEqual(jsi.call_function('x'), 70)

View File

@ -25,7 +25,6 @@ import tokenize
import traceback import traceback
import random import random
from ssl import OPENSSL_VERSION
from string import ascii_letters from string import ascii_letters
from .compat import ( from .compat import (
@ -67,7 +66,6 @@ from .utils import (
HEADRequest, HEADRequest,
int_or_none, int_or_none,
ISO3166Utils, ISO3166Utils,
join_nonempty,
locked_file, locked_file,
LazyList, LazyList,
make_HTTPS_handler, make_HTTPS_handler,
@ -2397,20 +2395,9 @@ class YoutubeDL(object):
return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3] return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
return impl_name return impl_name
def libc_ver(): self._write_string('[debug] Python version %s (%s) - %s\n' % (
try: platform.python_version(), python_implementation(),
return platform.libc_ver() platform_name()))
except OSError: # We may not have access to the executable
return []
self._write_string('[debug] Python %s (%s %s) - %s (%s%s)\n' % (
platform.python_version(),
python_implementation(),
platform.architecture()[0],
platform_name(),
OPENSSL_VERSION,
', %s' % (join_nonempty(*libc_ver(), delim=' ') or '-'),
))
exe_versions = FFmpegPostProcessor.get_versions(self) exe_versions = FFmpegPostProcessor.get_versions(self)
exe_versions['rtmpdump'] = rtmpdump_version() exe_versions['rtmpdump'] = rtmpdump_version()

View File

@ -31,7 +31,6 @@ from ..utils import (
extract_attributes, extract_attributes,
get_element_by_attribute, get_element_by_attribute,
int_or_none, int_or_none,
join_nonempty,
js_to_json, js_to_json,
LazyList, LazyList,
merge_dicts, merge_dicts,
@ -46,7 +45,6 @@ from ..utils import (
str_to_int, str_to_int,
traverse_obj, traverse_obj,
try_get, try_get,
txt_or_none,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
unsmuggle_url, unsmuggle_url,
@ -2610,17 +2608,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'uploader_id': '@lexwill718', 'uploader_id': '@lexwill718',
}, },
'playlist_mincount': 75, 'playlist_mincount': 75,
}, {
# Releases tab
'url': 'https://www.youtube.com/@daftpunk/releases',
'info_dict': {
'id': 'UC_kRDKYrUlrbtrSiyu5Tflg',
'title': 'Daft Punk - Releases',
'description': 'Daft Punk (1993 - 2021) - Official YouTube Channel',
'uploader_id': '@daftpunk',
'uploader': 'Daft Punk',
},
'playlist_mincount': 36,
}, { }, {
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA', 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'only_matching': True, 'only_matching': True,
@ -2835,12 +2822,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
continue continue
return renderer return renderer
@staticmethod
def _get_text(r, k):
return traverse_obj(
r, (k, 'runs', 0, 'text'), (k, 'simpleText'),
expected_type=txt_or_none)
def _grid_entries(self, grid_renderer): def _grid_entries(self, grid_renderer):
for item in grid_renderer['items']: for item in grid_renderer['items']:
if not isinstance(item, dict): if not isinstance(item, dict):
@ -2848,7 +2829,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
renderer = self._extract_grid_item_renderer(item) renderer = self._extract_grid_item_renderer(item)
if not isinstance(renderer, dict): if not isinstance(renderer, dict):
continue continue
title = self._get_text(renderer, 'title') title = try_get(
renderer, (lambda x: x['title']['runs'][0]['text'],
lambda x: x['title']['simpleText']), compat_str)
# playlist # playlist
playlist_id = renderer.get('playlistId') playlist_id = renderer.get('playlistId')
if playlist_id: if playlist_id:
@ -2865,7 +2848,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
# channel # channel
channel_id = renderer.get('channelId') channel_id = renderer.get('channelId')
if channel_id: if channel_id:
title = self._get_text(renderer, 'title') title = try_get(
renderer, lambda x: x['title']['simpleText'], compat_str)
yield self.url_result( yield self.url_result(
'https://www.youtube.com/channel/%s' % channel_id, 'https://www.youtube.com/channel/%s' % channel_id,
ie=YoutubeTabIE.ie_key(), video_title=title) ie=YoutubeTabIE.ie_key(), video_title=title)
@ -2974,26 +2958,15 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
def _rich_grid_entries(self, contents): def _rich_grid_entries(self, contents):
for content in contents: for content in contents:
content = traverse_obj( video_renderer = try_get(
content, ('richItemRenderer', 'content'), content,
expected_type=dict) or {} (lambda x: x['richItemRenderer']['content']['videoRenderer'],
video_renderer = traverse_obj( lambda x: x['richItemRenderer']['content']['reelItemRenderer']),
content, 'videoRenderer', 'reelItemRenderer', dict)
expected_type=dict)
if video_renderer: if video_renderer:
entry = self._video_entry(video_renderer) entry = self._video_entry(video_renderer)
if entry: if entry:
yield entry yield entry
# playlist
renderer = traverse_obj(
content, 'playlistRenderer', expected_type=dict) or {}
title = self._get_text(renderer, 'title')
playlist_id = renderer.get('playlistId')
if playlist_id:
yield self.url_result(
'https://www.youtube.com/playlist?list=%s' % playlist_id,
ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
video_title=title)
@staticmethod @staticmethod
def _build_continuation_query(continuation, ctp=None): def _build_continuation_query(continuation, ctp=None):
@ -3098,7 +3071,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
return return
for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []): for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
yield entry yield entry
continuation = self._extract_continuation(rich_grid_renderer) continuation = self._extract_continuation(rich_grid_renderer)
ytcfg = self._extract_ytcfg(item_id, webpage) ytcfg = self._extract_ytcfg(item_id, webpage)
@ -3241,41 +3213,50 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
uploader['channel'] = uploader['uploader'] uploader['channel'] = uploader['uploader']
return uploader return uploader
@classmethod @staticmethod
def _extract_alert(cls, data): def _extract_alert(data):
alerts = [] alerts = []
for alert in traverse_obj(data, ('alerts', Ellipsis), expected_type=dict): for alert in try_get(data, lambda x: x['alerts'], list) or []:
alert_text = traverse_obj( if not isinstance(alert, dict):
alert, (None, lambda x: x['alertRenderer']['text']), get_all=False) continue
alert_text = try_get(
alert, lambda x: x['alertRenderer']['text'], dict)
if not alert_text: if not alert_text:
continue continue
text = cls._get_text(alert_text, 'text') text = try_get(
alert_text,
(lambda x: x['simpleText'], lambda x: x['runs'][0]['text']),
compat_str)
if text: if text:
alerts.append(text) alerts.append(text)
return '\n'.join(alerts) return '\n'.join(alerts)
def _extract_from_tabs(self, item_id, webpage, data, tabs): def _extract_from_tabs(self, item_id, webpage, data, tabs):
selected_tab = self._extract_selected_tab(tabs) selected_tab = self._extract_selected_tab(tabs)
renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), renderer = try_get(
expected_type=dict) or {} data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
playlist_id = item_id playlist_id = item_id
title = description = None title = description = None
if renderer: if renderer:
channel_title = txt_or_none(renderer.get('title')) or item_id channel_title = renderer.get('title') or item_id
tab_title = txt_or_none(selected_tab.get('title')) tab_title = selected_tab.get('title')
title = join_nonempty( title = channel_title or item_id
channel_title or item_id, tab_title, if tab_title:
txt_or_none(selected_tab.get('expandedText')), title += ' - %s' % tab_title
delim=' - ') if selected_tab.get('expandedText'):
description = txt_or_none(renderer.get('description')) title += ' - %s' % selected_tab['expandedText']
playlist_id = txt_or_none(renderer.get('externalId')) or playlist_id description = renderer.get('description')
playlist_id = renderer.get('externalId')
else: else:
renderer = traverse_obj(data, renderer = try_get(
('metadata', 'playlistMetadataRenderer'), data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
('header', 'hashtagHeaderRenderer'), if renderer:
expected_type=dict) or {} title = renderer.get('title')
title = traverse_obj(renderer, 'title', ('hashtag', 'simpleText'), else:
expected_type=txt_or_none) renderer = try_get(
data, lambda x: x['header']['hashtagHeaderRenderer'], dict)
if renderer:
title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
playlist = self.playlist_result( playlist = self.playlist_result(
self._entries(selected_tab, item_id, webpage), self._entries(selected_tab, item_id, webpage),
playlist_id=playlist_id, playlist_title=title, playlist_id=playlist_id, playlist_title=title,
@ -3283,16 +3264,15 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
return merge_dicts(playlist, self._extract_uploader(renderer, data)) return merge_dicts(playlist, self._extract_uploader(renderer, data))
def _extract_from_playlist(self, item_id, url, data, playlist): def _extract_from_playlist(self, item_id, url, data, playlist):
title = traverse_obj((playlist, data), title = playlist.get('title') or try_get(
(0, 'title'), (1, 'titleText', 'simpleText'), data, lambda x: x['titleText']['simpleText'], compat_str)
expected_type=txt_or_none) playlist_id = playlist.get('playlistId') or item_id
playlist_id = txt_or_none(playlist.get('playlistId')) or item_id
# Inline playlist rendition continuation does not always work # Inline playlist rendition continuation does not always work
# at Youtube side, so delegating regular tab-based playlist URL # at Youtube side, so delegating regular tab-based playlist URL
# processing whenever possible. # processing whenever possible.
playlist_url = urljoin(url, traverse_obj( playlist_url = urljoin(url, try_get(
playlist, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
expected_type=url_or_none)) compat_str))
if playlist_url and playlist_url != url: if playlist_url and playlist_url != url:
return self.url_result( return self.url_result(
playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id, playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

View File

@ -1,13 +1,12 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from functools import update_wrapper
import itertools import itertools
import json import json
import math import math
import operator import operator
import re import re
from functools import update_wrapper
from .utils import ( from .utils import (
error_to_compat_str, error_to_compat_str,
ExtractorError, ExtractorError,
@ -25,22 +24,6 @@ from .compat import (
) )
# name JS functions
class function_with_repr(object):
# from yt_dlp/utils.py, but in this module
# repr_ is always set
def __init__(self, func, repr_):
update_wrapper(self, func)
self.func, self.__repr = func, repr_
def __call__(self, *args, **kwargs):
return self.func(*args, **kwargs)
def __repr__(self):
return self.__repr
# name JS operators
def wraps_op(op): def wraps_op(op):
def update_and_rename_wrapper(w): def update_and_rename_wrapper(w):
@ -52,13 +35,10 @@ def wraps_op(op):
return update_and_rename_wrapper return update_and_rename_wrapper
_NaN = float('nan')
def _js_bit_op(op): def _js_bit_op(op):
def zeroise(x): def zeroise(x):
return 0 if x in (None, JS_Undefined, _NaN) else x return 0 if x in (None, JS_Undefined) else x
@wraps_op(op) @wraps_op(op)
def wrapped(a, b): def wrapped(a, b):
@ -72,7 +52,7 @@ def _js_arith_op(op):
@wraps_op(op) @wraps_op(op)
def wrapped(a, b): def wrapped(a, b):
if JS_Undefined in (a, b): if JS_Undefined in (a, b):
return _NaN return float('nan')
return op(a or 0, b or 0) return op(a or 0, b or 0)
return wrapped return wrapped
@ -80,13 +60,13 @@ def _js_arith_op(op):
def _js_div(a, b): def _js_div(a, b):
if JS_Undefined in (a, b) or not (a and b): if JS_Undefined in (a, b) or not (a and b):
return _NaN return float('nan')
return operator.truediv(a or 0, b) if b else float('inf') return operator.truediv(a or 0, b) if b else float('inf')
def _js_mod(a, b): def _js_mod(a, b):
if JS_Undefined in (a, b) or not b: if JS_Undefined in (a, b) or not b:
return _NaN return float('nan')
return (a or 0) % b return (a or 0) % b
@ -94,7 +74,7 @@ def _js_exp(a, b):
if not b: if not b:
return 1 # even 0 ** 0 !! return 1 # even 0 ** 0 !!
elif JS_Undefined in (a, b): elif JS_Undefined in (a, b):
return _NaN return float('nan')
return (a or 0) ** b return (a or 0) ** b
@ -305,8 +285,6 @@ class JSInterpreter(object):
def _named_object(self, namespace, obj): def _named_object(self, namespace, obj):
self.__named_object_counter += 1 self.__named_object_counter += 1
name = '%s%d' % (self._OBJ_NAME, self.__named_object_counter) name = '%s%d' % (self._OBJ_NAME, self.__named_object_counter)
if callable(obj) and not isinstance(obj, function_with_repr):
obj = function_with_repr(obj, 'F<%s>' % (self.__named_object_counter, ))
namespace[name] = obj namespace[name] = obj
return name return name
@ -502,13 +480,6 @@ class JSInterpreter(object):
expr = self._dump(inner, local_vars) + outer expr = self._dump(inner, local_vars) + outer
if expr.startswith('('): if expr.startswith('('):
m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr)
if m:
# short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig`
outer = None
inner, should_abort = self._offset_e_by_d(m.group('d'), m.group('e'), local_vars)
else:
inner, outer = self._separate_at_paren(expr) inner, outer = self._separate_at_paren(expr)
inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion) inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
if not outer or should_abort: if not outer or should_abort:
@ -722,7 +693,7 @@ class JSInterpreter(object):
elif expr == 'undefined': elif expr == 'undefined':
return JS_Undefined, should_return return JS_Undefined, should_return
elif expr == 'NaN': elif expr == 'NaN':
return _NaN, should_return return float('NaN'), should_return
elif md.get('return'): elif md.get('return'):
return local_vars[m.group('name')], should_return return local_vars[m.group('name')], should_return
@ -964,17 +935,6 @@ class JSInterpreter(object):
return obj return obj
@staticmethod
def _offset_e_by_d(d, e, local_vars):
""" Short-cut eval: (d%e.length+e.length)%e.length """
try:
d = local_vars[d]
e = local_vars[e]
e = len(e)
return _js_mod(_js_mod(d, e) + e, e), False
except Exception:
return None, True
def extract_function_code(self, funcname): def extract_function_code(self, funcname):
""" @returns argnames, code """ """ @returns argnames, code """
func_m = re.search( func_m = re.search(
@ -993,9 +953,7 @@ class JSInterpreter(object):
return self.build_arglist(func_m.group('args')), code return self.build_arglist(func_m.group('args')), code
def extract_function(self, funcname): def extract_function(self, funcname):
return function_with_repr( return self.extract_function_from_code(*self.extract_function_code(funcname))
self.extract_function_from_code(*self.extract_function_code(funcname)),
'F<%s>' % (funcname, ))
def extract_function_from_code(self, argnames, code, *global_stack): def extract_function_from_code(self, argnames, code, *global_stack):
local_vars = {} local_vars = {}
@ -1030,6 +988,7 @@ class JSInterpreter(object):
def build_function(self, argnames, code, *global_stack): def build_function(self, argnames, code, *global_stack):
global_stack = list(global_stack) or [{}] global_stack = list(global_stack) or [{}]
argnames = tuple(argnames) argnames = tuple(argnames)
# import pdb; pdb.set_trace()
def resf(args, kwargs={}, allow_recursion=100): def resf(args, kwargs={}, allow_recursion=100):
global_stack[0].update( global_stack[0].update(

View File

@ -56,7 +56,6 @@ from .compat import (
compat_kwargs, compat_kwargs,
compat_os_name, compat_os_name,
compat_re_Match, compat_re_Match,
compat_re_Pattern,
compat_shlex_quote, compat_shlex_quote,
compat_str, compat_str,
compat_struct_pack, compat_struct_pack,
@ -87,7 +86,7 @@ def register_socks_protocols():
# Unfavoured alias # Unfavoured alias
compiled_regex_type = compat_re_Pattern compiled_regex_type = compat_re_Match
def random_user_agent(): def random_user_agent():
@ -3754,11 +3753,6 @@ def strip_or_none(v, default=None):
return v.strip() if isinstance(v, compat_str) else default return v.strip() if isinstance(v, compat_str) else default
def txt_or_none(v, default=None):
""" Combine str/strip_or_none, disallow blank value (for traverse_obj) """
return default if v is None else (compat_str(v).strip() or default)
def url_or_none(url): def url_or_none(url):
if not url or not isinstance(url, compat_str): if not url or not isinstance(url, compat_str):
return None return None
@ -4102,8 +4096,8 @@ def escape_url(url):
).geturl() ).geturl()
def parse_qs(url, **kwargs): def parse_qs(url):
return compat_parse_qs(compat_urllib_parse.urlparse(url).query, **kwargs) return compat_parse_qs(compat_urllib_parse.urlparse(url).query)
def read_batch_urls(batch_fd): def read_batch_urls(batch_fd):