From 26b6f15d14e30dc3349abe2933edb051b46be4b8 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 3 Feb 2025 18:10:15 +0000 Subject: [PATCH 1/4] [compat] Make casefold private * if required, not supported: `from youtube_dl.casefold import _casefold as casefold` --- youtube_dl/casefold.py | 12 ++++-------- youtube_dl/compat.py | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/youtube_dl/casefold.py b/youtube_dl/casefold.py index ad9c66f8e..712b2e7fa 100644 --- a/youtube_dl/casefold.py +++ b/youtube_dl/casefold.py @@ -10,9 +10,10 @@ from .compat import ( # https://github.com/unicode-org/icu/blob/main/icu4c/source/data/unidata/CaseFolding.txt # In case newly foldable Unicode characters are defined, paste the new version # of the text inside the ''' marks. -# The text is expected to have only blank lines andlines with 1st character #, +# The text is expected to have only blank lines and lines with 1st character #, # all ignored, and fold definitions like this: -# `from_hex_code; space_separated_to_hex_code_list; comment` +# `from_hex_code; status; space_separated_to_hex_code_list; comment` +# Only `status` C/F are used. _map_str = ''' # CaseFolding-15.0.0.txt @@ -1657,11 +1658,6 @@ _map = dict( del _map_str -def casefold(s): +def _casefold(s): assert isinstance(s, compat_str) return ''.join((_map.get(c, c) for c in s)) - - -__all__ = [ - 'casefold', -] diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index ed1a33cf2..c621f7476 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -44,7 +44,7 @@ try: compat_str.casefold compat_casefold = lambda s: s.casefold() except AttributeError: - from .casefold import casefold as compat_casefold + from .casefold import _casefold as compat_casefold try: import collections.abc as compat_collections_abc From 711e72c292327674c4a0593fdbb83d6347738ec9 Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 6 Feb 2025 21:09:00 +0000 Subject: [PATCH 2/4] [JSInterp] Fix bit-shift coercion for player 9c6dfc4a --- test/test_jsinterp.py | 4 ++++ test/test_youtube_signature.py | 4 ++++ youtube_dl/compat.py | 5 +++++ youtube_dl/jsinterp.py | 27 +++++++++++++++++++++------ 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 12e7b9b94..6c34bc896 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -459,6 +459,10 @@ class TestJSInterpreter(unittest.TestCase): self._test('function f(){return undefined >> 5}', 0) self._test('function f(){return 42 << NaN}', 42) self._test('function f(){return 42 << Infinity}', 42) + self._test('function f(){return 0.0 << null}', 0) + self._test('function f(){return NaN << 42}', 0) + self._test('function f(){return "21.9" << 1}', 42) + self._test('function f(){return 21 << 4294967297}', 42) def test_negative(self): self._test('function f(){return 2 * -2.0 ;}', -4) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index fcbc9d7a8..67ef75fde 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -219,6 +219,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js', 'YWt1qdbe8SAfkoPHW5d', 'RrRjWQOJmBiP', ), + ( + 'https://www.youtube.com/s/player/9c6dfc4a/player_ias.vflset/en_US/base.js', + 'jbu7ylIosQHyJyJV', 'uwI0ESiynAmhNg', + ), ] diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index c621f7476..26b655fb6 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -3116,17 +3116,21 @@ else: compat_kwargs = lambda kwargs: kwargs +# compat_numeric_types try: compat_numeric_types = (int, float, long, complex) except NameError: # Python 3 compat_numeric_types = (int, float, complex) +# compat_integer_types try: compat_integer_types = (int, long) except NameError: # Python 3 compat_integer_types = (int, ) +# compat_int +compat_int = compat_integer_types[-1] if sys.version_info < (2, 7): def compat_socket_create_connection(address, timeout, source_address=None): @@ -3532,6 +3536,7 @@ __all__ = [ 'compat_http_client', 'compat_http_server', 'compat_input', + 'compat_int', 'compat_integer_types', 'compat_itertools_count', 'compat_itertools_zip_longest', diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 7835187f5..2859bc734 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -24,6 +24,8 @@ from .compat import ( compat_collections_chain_map as ChainMap, compat_contextlib_suppress, compat_filter as filter, + compat_int, + compat_integer_types, compat_itertools_zip_longest as zip_longest, compat_map as map, compat_numeric_types, @@ -70,14 +72,27 @@ class JS_Undefined(object): pass -def _js_bit_op(op): +def _js_bit_op(op, is_shift=False): - def zeroise(x): - return 0 if x in (None, JS_Undefined, _NaN, _Infinity) else x + def zeroise(x, is_shift_arg=False): + if isinstance(x, compat_integer_types): + return (x % 32) if is_shift_arg else (x & 0xffffffff) + try: + x = float(x) + if is_shift_arg: + x = int(x % 32) + elif x < 0: + x = -compat_int(-x % 0xffffffff) + else: + x = compat_int(x % 0xffffffff) + except (ValueError, TypeError): + # also here for int(NaN), including float('inf') % 32 + x = 0 + return x @wraps_op(op) def wrapped(a, b): - return op(zeroise(a), zeroise(b)) & 0xffffffff + return op(zeroise(a), zeroise(b, is_shift)) & 0xffffffff return wrapped @@ -253,8 +268,8 @@ def _js_typeof(expr): # avoid dict to maintain order # definition None => Defined in JSInterpreter._operator _OPERATORS = ( - ('>>', _js_bit_op(operator.rshift)), - ('<<', _js_bit_op(operator.lshift)), + ('>>', _js_bit_op(operator.rshift, True)), + ('<<', _js_bit_op(operator.lshift, True)), ('+', _js_add), ('-', _js_arith_op(operator.sub)), ('*', _js_arith_op(operator.mul)), From 91b1569f68471d685382b738806b2e07d8f52707 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 28 Feb 2025 00:02:10 +0000 Subject: [PATCH 3/4] [YouTube] Fix channel playlist extraction (#33074) * [YouTube] Extract playlist items from LOCKUP_VIEW_MODEL_... * resolves #33073 * thx seproDev (yt-dlp/yt-dlp#11615) Co-authored-by: sepro --- youtube_dl/extractor/youtube.py | 49 +++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c93a2a1f9..cc84a193a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -27,6 +27,7 @@ from ..compat import ( ) from ..jsinterp import JSInterpreter from ..utils import ( + bug_reports_message, clean_html, dict_get, error_to_compat_str, @@ -65,6 +66,7 @@ from ..utils import ( url_or_none, urlencode_postdata, urljoin, + variadic, ) @@ -460,6 +462,26 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'uploader': uploader, } + @staticmethod + def _extract_thumbnails(data, *path_list, **kw_final_key): + """ + Extract thumbnails from thumbnails dict + @param path_list: path list to level that contains 'thumbnails' key + """ + final_key = kw_final_key.get('final_key', 'thumbnails') + + return traverse_obj(data, (( + tuple(variadic(path) + (final_key, Ellipsis) + for path in path_list or [()])), { + 'url': ('url', T(url_or_none), + # Sometimes youtube gives a wrong thumbnail URL. See: + # https://github.com/yt-dlp/yt-dlp/issues/233 + # https://github.com/ytdl-org/youtube-dl/issues/28023 + T(lambda u: update_url(u, query=None) if u and 'maxresdefault' in u else u)), + 'height': ('height', T(int_or_none)), + 'width': ('width', T(int_or_none)), + }, T(lambda t: t if t.get('url') else None))) + def _search_results(self, query, params): data = { 'context': { @@ -3183,8 +3205,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): expected_type=txt_or_none) def _grid_entries(self, grid_renderer): - for item in grid_renderer['items']: - if not isinstance(item, dict): + for item in traverse_obj(grid_renderer, ('items', Ellipsis, T(dict))): + lockup_view_model = traverse_obj(item, ('lockupViewModel', T(dict))) + if lockup_view_model: + entry = self._extract_lockup_view_model(lockup_view_model) + if entry: + yield entry continue renderer = self._extract_grid_item_renderer(item) if not isinstance(renderer, dict): @@ -3268,6 +3294,25 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): continue yield self._extract_video(renderer) + def _extract_lockup_view_model(self, view_model): + content_id = view_model.get('contentId') + if not content_id: + return + content_type = view_model.get('contentType') + if content_type not in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'): + self.report_warning( + 'Unsupported lockup view model content type "{0}"{1}'.format(content_type, bug_reports_message()), only_once=True) + return + return merge_dicts(self.url_result( + update_url_query('https://www.youtube.com/playlist', {'list': content_id}), + ie=YoutubeTabIE, video_id=content_id), { + 'title': traverse_obj(view_model, ( + 'metadata', 'lockupMetadataViewModel', 'title', 'content', T(compat_str))), + 'thumbnails': self._extract_thumbnails(view_model, ( + 'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail', + 'thumbnailViewModel', 'image'), final_key='sources'), + }) + def _video_entry(self, video_renderer): video_id = video_renderer.get('videoId') if video_id: From 673277e510ebd996b62a2fcc76169bf3cce29910 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 28 Feb 2025 01:02:20 +0000 Subject: [PATCH 4/4] [YouTube] Fix 91b1569 --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index cc84a193a..5f8c08201 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -3305,7 +3305,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): return return merge_dicts(self.url_result( update_url_query('https://www.youtube.com/playlist', {'list': content_id}), - ie=YoutubeTabIE, video_id=content_id), { + ie=YoutubeTabIE.ie_key(), video_id=content_id), { 'title': traverse_obj(view_model, ( 'metadata', 'lockupMetadataViewModel', 'title', 'content', T(compat_str))), 'thumbnails': self._extract_thumbnails(view_model, (