From 45495228b7a6728b7e764bbcf1f38490cd3d8697 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 19 Mar 2023 00:51:44 +0000 Subject: [PATCH 01/88] [downloader/http] Only check for resumability when actually resuming --- test/test_downloader_http.py | 2 +- youtube_dl/downloader/http.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py index 4e6d7a2a0..6af86ae48 100644 --- a/test/test_downloader_http.py +++ b/test/test_downloader_http.py @@ -88,7 +88,7 @@ class TestHttpFD(unittest.TestCase): self.assertTrue(downloader.real_download(filename, { 'url': 'http://127.0.0.1:%d/%s' % (self.port, ep), })) - self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE) + self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep) try_rm(encodeFilename(filename)) def download_all(self, params): diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 440471aa0..28a49b9e8 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -141,7 +141,8 @@ class HttpFD(FileDownloader): # Content-Range is either not present or invalid. Assuming remote webserver is # trying to send the whole file, resume is not possible, so wiping the local file # and performing entire redownload - self.report_unable_to_resume() + if range_start > 0: + self.report_unable_to_resume() ctx.resume_len = 0 ctx.open_mode = 'wb' ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None)) From f35b757c826027ab5263d431bbe363c6403bd66d Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 19 Mar 2023 02:27:46 +0000 Subject: [PATCH 02/88] [utils] Ensure `allow_types` for `variadic()` is a tuple --- test/test_utils.py | 1 + youtube_dl/utils.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index ea2b96ed2..b85d397d0 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1563,6 +1563,7 @@ Line 1 self.assertEqual(variadic(None), (None, )) self.assertEqual(variadic('spam'), ('spam', )) self.assertEqual(variadic('spam', allowed_types=dict), 'spam') + self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam') def test_traverse_obj(self): _TEST_DATA = { diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 761edcd49..f3c7af437 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -4213,6 +4213,8 @@ def multipart_encode(data, boundary=None): def variadic(x, allowed_types=(compat_str, bytes, dict)): + if not isinstance(allowed_types, tuple) and isinstance(allowed_types, compat_collections_abc.Iterable): + allowed_types = tuple(allowed_types) return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,) From 88f28f620bcae7ba7302f8b049b74f0f8a12831f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 12 Mar 2023 14:46:09 +0530 Subject: [PATCH 03/88] [extractor/youtube] Construct fragment list lazily Ref: yt-dlp/yt-dlp/commit/e389d17 See: yt-dlp/yt-dlp#6517 --- youtube_dl/extractor/youtube.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 89711c84e..6b153193c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -31,6 +31,7 @@ from ..utils import ( get_element_by_attribute, int_or_none, js_to_json, + LazyList, merge_dicts, mimetype2ext, parse_codecs, @@ -1986,9 +1987,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): itags = [] itag_qualities = {} q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres']) + CHUNK_SIZE = 10 << 20 + streaming_data = player_response.get('streamingData') or {} streaming_formats = streaming_data.get('formats') or [] streaming_formats.extend(streaming_data.get('adaptiveFormats') or []) + + def build_fragments(f): + return LazyList({ + 'url': update_url_query(f['url'], { + 'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, f['filesize'])) + }) + } for range_start in range(0, f['filesize'], CHUNK_SIZE)) + for fmt in streaming_formats: if fmt.get('targetDurationSec') or fmt.get('drmFamilies'): continue @@ -2048,15 +2059,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if no_video: dct['abr'] = tbr if no_audio or no_video: - CHUNK_SIZE = 10 << 20 # avoid Youtube throttling dct.update({ 'protocol': 'http_dash_segments', - 'fragments': [{ - 'url': update_url_query(dct['url'], { - 'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, dct['filesize'])) - }) - } for range_start in range(0, dct['filesize'], CHUNK_SIZE)] + 'fragments': build_fragments(dct), } if dct['filesize'] else { 'downloader_options': {'http_chunk_size': CHUNK_SIZE} # No longer useful? }) From 3f6d2bd76f3393eef90896dfabc2d8dde37c2009 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 9 Mar 2023 22:09:23 +0530 Subject: [PATCH 04/88] [extractor/youtube] Bypass throttling for `-f17` and related cleanup Thanks @AudricV for the finding Ref: yt-dlp/yt-dlp/commit/c9abebb --- youtube_dl/extractor/youtube.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6b153193c..ae3416b20 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2052,13 +2052,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if mobj: dct['ext'] = mimetype2ext(mobj.group(1)) dct.update(parse_codecs(mobj.group(2))) - no_audio = dct.get('acodec') == 'none' - no_video = dct.get('vcodec') == 'none' - if no_audio: - dct['vbr'] = tbr - if no_video: - dct['abr'] = tbr - if no_audio or no_video: + single_stream = 'none' in (dct.get(c) for c in ('acodec', 'vcodec')) + if single_stream and dct.get('ext'): + dct['container'] = dct['ext'] + '_dash' + if single_stream or itag == '17': # avoid Youtube throttling dct.update({ 'protocol': 'http_dash_segments', @@ -2067,8 +2064,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'downloader_options': {'http_chunk_size': CHUNK_SIZE} # No longer useful? }) - if dct.get('ext'): - dct['container'] = dct['ext'] + '_dash' formats.append(dct) hls_manifest_url = streaming_data.get('hlsManifestUrl') From cdf40b6aa651d949ce01e9bec1a11f792e8af899 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 3 Apr 2023 21:07:10 +0100 Subject: [PATCH 05/88] [test] Update tests for Ubuntu 20.04 * 18.04 test runner was withdrawn * for now, disable Py 3.3/3.4 tests --- .github/workflows/ci.yml | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a609f3704..51abdce1d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,9 +7,10 @@ jobs: strategy: fail-fast: true matrix: - os: [ubuntu-18.04] + os: [ubuntu-20.04] # TODO: python 2.6 - python-version: [2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7] + # TODO: restore support for 3.3, 3.4 + python-version: [2.7, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7] python-impl: [cpython] ytdl-test-set: [core, download] run-tests-ext: [sh] @@ -26,26 +27,27 @@ jobs: ytdl-test-set: download run-tests-ext: bat # jython - - os: ubuntu-18.04 + - os: ubuntu-20.04 python-impl: jython ytdl-test-set: core run-tests-ext: sh - - os: ubuntu-18.04 + - os: ubuntu-20.04 python-impl: jython ytdl-test-set: download run-tests-ext: sh steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - if: ${{ matrix.python-impl == 'cpython' }} + - uses: actions/checkout@v3 + - name: Set up supported Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + if: ${{ matrix.python-impl == 'cpython' && ! contains(fromJSON('["3.3", "3.4"]'), matrix.python-version) }} with: python-version: ${{ matrix.python-version }} - name: Set up Java 8 if: ${{ matrix.python-impl == 'jython' }} - uses: actions/setup-java@v1 + uses: actions/setup-java@v2 with: java-version: 8 + distribution: 'zulu' - name: Install Jython if: ${{ matrix.python-impl == 'jython' }} run: | @@ -70,9 +72,9 @@ jobs: name: Linter runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: 3.9 - name: Install flake8 From 557dbac173c30a51acd284b46f2d5460e539f51a Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 5 Apr 2023 18:29:24 +0100 Subject: [PATCH 06/88] [FragmentFD] Fix iteration with infinite limit * fixes ytdl-org/youtube-dl/baa6c5e * resolves #31885 --- youtube_dl/downloader/dash.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index cc30485f8..67a8e173f 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import itertools + from .fragment import FragmentFD from ..compat import compat_urllib_error from ..utils import ( @@ -30,15 +32,13 @@ class DashSegmentsFD(FragmentFD): fragment_retries = self.params.get('fragment_retries', 0) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - frag_index = 0 - for i, fragment in enumerate(fragments): - frag_index += 1 + for frag_index, fragment in enumerate(fragments, 1): if frag_index <= ctx['fragment_index']: continue # In DASH, the first segment contains necessary headers to # generate a valid MP4 file, so always abort for the first segment - fatal = i == 0 or not skip_unavailable_fragments - for count in range(fragment_retries + 1): + fatal = frag_index == 1 or not skip_unavailable_fragments + for count in itertools.count(): try: fragment_url = fragment.get('url') if not fragment_url: @@ -48,7 +48,6 @@ class DashSegmentsFD(FragmentFD): if not success: return False self._append_fragment(ctx, frag_content) - break except compat_urllib_error.HTTPError as err: # YouTube may often return 404 HTTP error for a fragment causing the # whole download to fail. However if the same fragment is immediately @@ -58,13 +57,14 @@ class DashSegmentsFD(FragmentFD): # HTTP error. if count < fragment_retries: self.report_retry_fragment(err, frag_index, count + 1, fragment_retries) + continue except DownloadError: # Don't retry fragment if error occurred during HTTP downloading - # itself since it has own retry settings - if not fatal: - self.report_skip_fragment(frag_index) - break - raise + # itself since it has its own retry settings + if fatal: + raise + self.report_skip_fragment(frag_index) + break if count >= fragment_retries: if not fatal: From 78da22489b483988e198a8352893df9c6cf34032 Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 5 Apr 2023 18:39:54 +0100 Subject: [PATCH 07/88] [compat] Add and use `compat_open()` like Py3 `open()` * resolves FIXME: ytdl-org/youtube-dl/commit/dfe5fa4 --- youtube_dl/compat.py | 11 +++++++++++ youtube_dl/options.py | 6 ++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 39551f810..fe62caf80 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -3127,6 +3127,16 @@ else: return ctypes.WINFUNCTYPE(*args, **kwargs) +if sys.version_info < (3, 0): + # open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None + def compat_open(file_, *args, **kwargs): + if len(args) > 6 or 'opener' in kwargs: + raise ValueError('open: unsupported argument "opener"') + return io.open(file_, *args, **kwargs) +else: + compat_open = open + + legacy = [ 'compat_HTMLParseError', 'compat_HTMLParser', @@ -3185,6 +3195,7 @@ __all__ = [ 'compat_kwargs', 'compat_map', 'compat_numeric_types', + 'compat_open', 'compat_ord', 'compat_os_name', 'compat_os_path_expanduser', diff --git a/youtube_dl/options.py b/youtube_dl/options.py index f6d2b0898..7b059b51e 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -11,6 +11,7 @@ from .compat import ( compat_get_terminal_size, compat_getenv, compat_kwargs, + compat_open as open, compat_shlex_split, ) from .utils import ( @@ -41,14 +42,11 @@ def _hide_login_info(opts): def parseOpts(overrideArguments=None): def _readOptions(filename_bytes, default=[]): try: - optionf = open(filename_bytes) + optionf = open(filename_bytes, encoding=preferredencoding()) except IOError: return default # silently skip if file is not present try: - # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56 contents = optionf.read() - if sys.version_info < (3,): - contents = contents.decode(preferredencoding()) res = compat_shlex_split(contents, comments=True) finally: optionf.close() From 25124bd640acf2fbae71b2a52738ee41da548fb1 Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 5 Apr 2023 18:47:49 +0100 Subject: [PATCH 08/88] [devscripts] Improve hack to convert command-line options to API options * define equality for DateRange * don't show default DateRange --- devscripts/cli_to_api.py | 25 ++++++++++++++++++++++--- youtube_dl/utils.py | 4 ++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py index 2f4d6a458..9fb1d2ba8 100755 --- a/devscripts/cli_to_api.py +++ b/devscripts/cli_to_api.py @@ -49,15 +49,34 @@ def cli_to_api(*opts): # from https://github.com/yt-dlp/yt-dlp/issues/5859#issuecomment-1363938900 default = parsed_options([]) - diff = dict((k, v) for k, v in parsed_options(opts).items() if default[k] != v) + + def neq_opt(a, b): + if a == b: + return False + if a is None and repr(type(object)).endswith(".utils.DateRange'>"): + return '0001-01-01 - 9999-12-31' != '{0}'.format(b) + return a != b + + diff = dict((k, v) for k, v in parsed_options(opts).items() if neq_opt(default[k], v)) if 'postprocessors' in diff: diff['postprocessors'] = [pp for pp in diff['postprocessors'] if pp not in default['postprocessors']] return diff def main(): - from pprint import pprint - pprint(cli_to_api(*sys.argv)) + from pprint import PrettyPrinter + + pprint = PrettyPrinter() + super_format = pprint.format + + def format(object, context, maxlevels, level): + if repr(type(object)).endswith(".utils.DateRange'>"): + return '{0}: {1}>'.format(repr(object)[:-2], object), True, False + return super_format(object, context, maxlevels, level) + + pprint.format = format + + pprint.pprint(cli_to_api(*sys.argv)) if __name__ == '__main__': diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f3c7af437..d80ceb007 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3190,6 +3190,10 @@ class DateRange(object): def __str__(self): return '%s - %s' % (self.start.isoformat(), self.end.isoformat()) + def __eq__(self, other): + return (isinstance(other, DateRange) + and self.start == other.start and self.end == other.end) + def platform_name(): """ Returns the platform name as a compat_str """ From 9f4d83ff4255d8840c0fa9b367722c129ebecdb2 Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 5 Apr 2023 18:50:25 +0100 Subject: [PATCH 09/88] [options] Add --mtime option, unsets default --no-mtime * resolves #1709 (!) --- youtube_dl/options.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 7b059b51e..d802b7e59 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -731,9 +731,13 @@ def parseOpts(overrideArguments=None): '--no-part', action='store_true', dest='nopart', default=False, help='Do not use .part files - write directly into output file') + filesystem.add_option( + '--mtime', + action='store_true', dest='updatetime', default=True, + help='Use the Last-modified header to set the file modification time (default)') filesystem.add_option( '--no-mtime', - action='store_false', dest='updatetime', default=True, + action='store_false', dest='updatetime', help='Do not use the Last-modified header to set the file modification time') filesystem.add_option( '--write-description', From d6ae3b77cd50083ef245c28f904ee0b70a77d5c6 Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 6 Apr 2023 14:11:18 +0100 Subject: [PATCH 10/88] [core] Avoid deepcopy of ctx dict (fix f35b757) * may now contain `LazyList`s * resolves #31999 --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index bcf781744..2c0d4926c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1399,7 +1399,7 @@ class YoutubeDL(object): filters = [self._build_format_filter(f) for f in selector.filters] def final_selector(ctx): - ctx_copy = copy.deepcopy(ctx) + ctx_copy = dict(ctx) for _filter in filters: ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats'])) return selector_function(ctx_copy) From f8253a528935f78e1a3b724db8c1f0089f99314a Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 6 Apr 2023 19:42:36 +0100 Subject: [PATCH 11/88] [core] Avoid deepcopy of ctx dict (fix f35b757) (Pt 2) --- youtube_dl/YoutubeDL.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2c0d4926c..927b19417 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1389,11 +1389,10 @@ class YoutubeDL(object): 'abr': formats_info[1].get('abr'), 'ext': output_ext, } - video_selector, audio_selector = map(_build_selector_function, selector.selector) def selector_function(ctx): - for pair in itertools.product( - video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))): + selector_fn = lambda x: _build_selector_function(x)(ctx) + for pair in itertools.product(*map(selector_fn, selector.selector)): yield _merge(pair) filters = [self._build_format_filter(f) for f in selector.filters] From 213d1d91bfc4a00fefc72fa2730555d51060b42d Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 6 Apr 2023 19:49:46 +0100 Subject: [PATCH 12/88] [core] No longer importing copy --- youtube_dl/YoutubeDL.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 927b19417..2a1e59bf8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -5,7 +5,6 @@ from __future__ import absolute_import, unicode_literals import collections import contextlib -import copy import datetime import errno import fileinput From fe7e13066c20b10fe48bc154431440da36baec53 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 10 Apr 2023 17:12:31 +0100 Subject: [PATCH 13/88] [core] Add and use sanitize_info() method from yt-dlp --- youtube_dl/YoutubeDL.py | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2a1e59bf8..2719d546f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -30,9 +30,12 @@ from string import ascii_letters from .compat import ( compat_basestring, compat_cookiejar, + compat_filter as filter, compat_get_terminal_size, compat_http_client, + compat_integer_types, compat_kwargs, + compat_map as map, compat_numeric_types, compat_os_name, compat_str, @@ -64,6 +67,7 @@ from .utils import ( int_or_none, ISO3166Utils, locked_file, + LazyList, make_HTTPS_handler, MaxDownloadsReached, orderedSet, @@ -2109,10 +2113,36 @@ class YoutubeDL(object): return self._download_retcode @staticmethod - def filter_requested_info(info_dict): - return dict( - (k, v) for k, v in info_dict.items() - if k not in ['requested_formats', 'requested_subtitles']) + def sanitize_info(info_dict, remove_private_keys=False): + ''' Sanitize the infodict for converting to json ''' + if info_dict is None: + return info_dict + + if remove_private_keys: + reject = lambda k, v: (v is None + or k.startswith('__') + or k in ('requested_formats', + 'requested_subtitles')) + else: + reject = lambda k, v: False + + def filter_fn(obj): + if isinstance(obj, dict): + return dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)) + elif isinstance(obj, (list, tuple, set, LazyList)): + return list(map(filter_fn, obj)) + elif obj is None or any(isinstance(obj, c) + for c in (compat_integer_types, + (compat_str, float, bool))): + return obj + else: + return repr(obj) + + return filter_fn(info_dict) + + @classmethod + def filter_requested_info(cls, info_dict): + return cls.sanitize_info(info_dict, True) def post_process(self, filename, ie_info): """Run all the postprocessors on the given file.""" From 735e87adfc44b284dcdb4d9a0155ce0616e3af97 Mon Sep 17 00:00:00 2001 From: Gabriel Nagy Date: Thu, 13 Apr 2023 01:40:38 +0300 Subject: [PATCH 14/88] [core] Sanitize info dict before dumping JSON (fixes fe7e130) (#32032) * follow up to fe7e130 which didn't fix everything. Co-authored-by: dirkf --- youtube_dl/YoutubeDL.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2719d546f..117f1c513 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1777,7 +1777,7 @@ class YoutubeDL(object): self.to_stdout(formatSeconds(info_dict['duration'])) print_mandatory('format') if self.params.get('forcejson', False): - self.to_stdout(json.dumps(info_dict)) + self.to_stdout(json.dumps(self.sanitize_info(info_dict))) def process_info(self, info_dict): """Process a single resolved IE result.""" @@ -2091,7 +2091,7 @@ class YoutubeDL(object): raise else: if self.params.get('dump_single_json', False): - self.to_stdout(json.dumps(res)) + self.to_stdout(json.dumps(self.sanitize_info(res))) return self._download_retcode @@ -2100,6 +2100,7 @@ class YoutubeDL(object): [info_filename], mode='r', openhook=fileinput.hook_encoded('utf-8'))) as f: # FileInput doesn't have a read method, we can't call json.load + # TODO: let's use io.open(), then info = self.filter_requested_info(json.loads('\n'.join(f))) try: self.process_ie_result(info, download=True) From 2da3fa04a68ff0652f49d6874d82b7a0edb85ea3 Mon Sep 17 00:00:00 2001 From: dirkf Date: Tue, 11 Apr 2023 17:36:27 +0100 Subject: [PATCH 15/88] [YouTube] Simplify signature patterns --- youtube_dl/extractor/youtube.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ae3416b20..80fff7ada 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -19,6 +19,7 @@ from ..compat import ( compat_urllib_parse_parse_qs as compat_parse_qs, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlparse, + compat_zip as zip, ) from ..jsinterp import JSInterpreter from ..utils import ( @@ -1555,17 +1556,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', r'\bm=(?P[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)', r'\bc&&\(c=(?P[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)', - r'(?:\b|[^a-zA-Z0-9$])(?P[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)', - r'(?:\b|[^a-zA-Z0-9$])(?P[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', + r'(?:\b|[^a-zA-Z0-9$])(?P[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?', r'(?P[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # Obsolete patterns - r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'("|\')signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(', r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') From 26035bde46c0acc30dc053618451d9aeca4b7709 Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 13 Apr 2023 00:15:07 +0100 Subject: [PATCH 16/88] [DashSegmentsFD] Correctly detect errors when `fragment_retries` == 0 * use the success flag instead of the retry count * establish the fragment_url outside the retry loop * only report skipping a fragment once. * resolves #32033 --- youtube_dl/downloader/dash.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 67a8e173f..2800d4260 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -38,12 +38,13 @@ class DashSegmentsFD(FragmentFD): # In DASH, the first segment contains necessary headers to # generate a valid MP4 file, so always abort for the first segment fatal = frag_index == 1 or not skip_unavailable_fragments + fragment_url = fragment.get('url') + if not fragment_url: + assert fragment_base_url + fragment_url = urljoin(fragment_base_url, fragment['path']) + success = False for count in itertools.count(): try: - fragment_url = fragment.get('url') - if not fragment_url: - assert fragment_base_url - fragment_url = urljoin(fragment_base_url, fragment['path']) success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) if not success: return False @@ -63,14 +64,13 @@ class DashSegmentsFD(FragmentFD): # itself since it has its own retry settings if fatal: raise - self.report_skip_fragment(frag_index) break - if count >= fragment_retries: + if not success: if not fatal: self.report_skip_fragment(frag_index) continue - self.report_error('giving up after %s fragment retries' % fragment_retries) + self.report_error('giving up after %s fragment retries' % count) return False self._finish_frag_download(ctx) From 211cbfd5d46025a8e4d8f9f3d424aaada4698974 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 21 Apr 2023 14:04:30 +0100 Subject: [PATCH 17/88] [jsinterp] Minimally handle arithmetic operator precedence Resolves #32066 --- test/test_jsinterp.py | 11 +++++++++++ youtube_dl/jsinterp.py | 40 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 5d129433d..e121358d7 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -505,6 +505,17 @@ class TestJSInterpreter(unittest.TestCase): jsi = JSInterpreter('function x(){return 1236566549 << 5}') self.assertEqual(jsi.call_function('x'), 915423904) + def test_32066(self): + jsi = JSInterpreter("function x(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}") + self.assertEqual(jsi.call_function('x'), 70) + + def test_unary_operators(self): + jsi = JSInterpreter('function f(){return 2 - - - 2;}') + self.assertEqual(jsi.call_function('f'), 0) + # fails + # jsi = JSInterpreter('function f(){return 2 + - + - - 2;}') + # self.assertEqual(jsi.call_function('f'), 0) + """ # fails so far def test_packed(self): jsi = JSInterpreter('''function x(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''') diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index ab7d6f926..a06fc4ff5 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +from functools import update_wrapper import itertools import json import math @@ -23,11 +24,23 @@ from .compat import ( ) +def wraps_op(op): + + def update_and_rename_wrapper(w): + f = update_wrapper(w, op) + # fn names are str in both Py 2/3 + f.__name__ = str('JS_') + f.__name__ + return f + + return update_and_rename_wrapper + + def _js_bit_op(op): def zeroise(x): return 0 if x in (None, JS_Undefined) else x + @wraps_op(op) def wrapped(a, b): return op(zeroise(a), zeroise(b)) & 0xffffffff @@ -36,6 +49,7 @@ def _js_bit_op(op): def _js_arith_op(op): + @wraps_op(op) def wrapped(a, b): if JS_Undefined in (a, b): return float('nan') @@ -66,6 +80,7 @@ def _js_exp(a, b): def _js_eq_op(op): + @wraps_op(op) def wrapped(a, b): if set((a, b)) <= set((None, JS_Undefined)): return op(a, a) @@ -76,6 +91,7 @@ def _js_eq_op(op): def _js_comp_op(op): + @wraps_op(op) def wrapped(a, b): if JS_Undefined in (a, b): return False @@ -356,6 +372,7 @@ class JSInterpreter(object): return right_val try: + # print('Eval:', opfunc.__name__, left_val, right_val) return opfunc(left_val, right_val) except Exception as e: raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e) @@ -395,6 +412,7 @@ class JSInterpreter(object): raise self.Exception('Recursion limit reached') allow_recursion -= 1 + # print('At: ' + stmt[:60]) should_return = False # fails on (eg) if (...) stmt1; else stmt2; sub_statements = list(self._separate(stmt, ';')) or [''] @@ -702,9 +720,24 @@ class JSInterpreter(object): continue right_expr = separated.pop() - while op == '-' and len(separated) > 1 and not separated[-1].strip(): - right_expr = '-' + right_expr - separated.pop() + # handle operators that are both unary and binary, minimal BODMAS + if op in ('+', '-'): + undone = 0 + while len(separated) > 1 and not separated[-1].strip(): + undone += 1 + separated.pop() + if op == '-' and undone % 2 != 0: + right_expr = op + right_expr + left_val = separated[-1] + for dm_op in ('*', '%', '/', '**'): + bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim)) + if len(bodmas) > 1 and not bodmas[-1].strip(): + expr = op.join(separated) + op + right_expr + right_expr = None + break + if right_expr is None: + continue + left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion) return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return @@ -955,6 +988,7 @@ class JSInterpreter(object): def build_function(self, argnames, code, *global_stack): global_stack = list(global_stack) or [{}] argnames = tuple(argnames) + # import pdb; pdb.set_trace() def resf(args, kwargs={}, allow_recursion=100): global_stack[0].update( From 64d6dd64c8b7a35a87655d27fc83f2e98ef6ce13 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 23 Apr 2023 22:58:35 +0100 Subject: [PATCH 18/88] [YouTube] Support Releases tab --- youtube_dl/extractor/youtube.py | 114 +++++++++++++++++++------------- youtube_dl/utils.py | 9 ++- 2 files changed, 74 insertions(+), 49 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 80fff7ada..0411c49f1 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -31,6 +31,7 @@ from ..utils import ( extract_attributes, get_element_by_attribute, int_or_none, + join_nonempty, js_to_json, LazyList, merge_dicts, @@ -45,6 +46,7 @@ from ..utils import ( str_to_int, traverse_obj, try_get, + txt_or_none, unescapeHTML, unified_strdate, unsmuggle_url, @@ -2608,6 +2610,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): 'uploader_id': '@lexwill718', }, 'playlist_mincount': 75, + }, { + # Releases tab + 'url': 'https://www.youtube.com/@daftpunk/releases', + 'info_dict': { + 'id': 'UC_kRDKYrUlrbtrSiyu5Tflg', + 'title': 'Daft Punk - Releases', + 'description': 'Daft Punk (1993 - 2021) - Official YouTube Channel', + 'uploader_id': '@daftpunk', + 'uploader': 'Daft Punk', + }, + 'playlist_mincount': 36, }, { 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA', 'only_matching': True, @@ -2822,6 +2835,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): continue return renderer + @staticmethod + def _get_text(r, k): + return traverse_obj( + r, (k, 'runs', 0, 'text'), (k, 'simpleText'), + expected_type=txt_or_none) + def _grid_entries(self, grid_renderer): for item in grid_renderer['items']: if not isinstance(item, dict): @@ -2829,9 +2848,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): renderer = self._extract_grid_item_renderer(item) if not isinstance(renderer, dict): continue - title = try_get( - renderer, (lambda x: x['title']['runs'][0]['text'], - lambda x: x['title']['simpleText']), compat_str) + title = self._get_text(renderer, 'title') # playlist playlist_id = renderer.get('playlistId') if playlist_id: @@ -2848,8 +2865,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): # channel channel_id = renderer.get('channelId') if channel_id: - title = try_get( - renderer, lambda x: x['title']['simpleText'], compat_str) + title = self._get_text(renderer, 'title') yield self.url_result( 'https://www.youtube.com/channel/%s' % channel_id, ie=YoutubeTabIE.ie_key(), video_title=title) @@ -2958,15 +2974,26 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): def _rich_grid_entries(self, contents): for content in contents: - video_renderer = try_get( - content, - (lambda x: x['richItemRenderer']['content']['videoRenderer'], - lambda x: x['richItemRenderer']['content']['reelItemRenderer']), - dict) + content = traverse_obj( + content, ('richItemRenderer', 'content'), + expected_type=dict) or {} + video_renderer = traverse_obj( + content, 'videoRenderer', 'reelItemRenderer', + expected_type=dict) if video_renderer: entry = self._video_entry(video_renderer) if entry: yield entry + # playlist + renderer = traverse_obj( + content, 'playlistRenderer', expected_type=dict) or {} + title = self._get_text(renderer, 'title') + playlist_id = renderer.get('playlistId') + if playlist_id: + yield self.url_result( + 'https://www.youtube.com/playlist?list=%s' % playlist_id, + ie=YoutubeTabIE.ie_key(), video_id=playlist_id, + video_title=title) @staticmethod def _build_continuation_query(continuation, ctp=None): @@ -3071,6 +3098,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): return for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []): yield entry + continuation = self._extract_continuation(rich_grid_renderer) ytcfg = self._extract_ytcfg(item_id, webpage) @@ -3213,50 +3241,41 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): uploader['channel'] = uploader['uploader'] return uploader - @staticmethod - def _extract_alert(data): + @classmethod + def _extract_alert(cls, data): alerts = [] - for alert in try_get(data, lambda x: x['alerts'], list) or []: - if not isinstance(alert, dict): - continue - alert_text = try_get( - alert, lambda x: x['alertRenderer']['text'], dict) + for alert in traverse_obj(data, ('alerts', Ellipsis), expected_type=dict): + alert_text = traverse_obj( + alert, (None, lambda x: x['alertRenderer']['text']), get_all=False) if not alert_text: continue - text = try_get( - alert_text, - (lambda x: x['simpleText'], lambda x: x['runs'][0]['text']), - compat_str) + text = cls._get_text(alert_text, 'text') if text: alerts.append(text) return '\n'.join(alerts) def _extract_from_tabs(self, item_id, webpage, data, tabs): selected_tab = self._extract_selected_tab(tabs) - renderer = try_get( - data, lambda x: x['metadata']['channelMetadataRenderer'], dict) + renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), + expected_type=dict) or {} playlist_id = item_id title = description = None if renderer: - channel_title = renderer.get('title') or item_id - tab_title = selected_tab.get('title') - title = channel_title or item_id - if tab_title: - title += ' - %s' % tab_title - if selected_tab.get('expandedText'): - title += ' - %s' % selected_tab['expandedText'] - description = renderer.get('description') - playlist_id = renderer.get('externalId') + channel_title = txt_or_none(renderer.get('title')) or item_id + tab_title = txt_or_none(selected_tab.get('title')) + title = join_nonempty( + channel_title or item_id, tab_title, + txt_or_none(selected_tab.get('expandedText')), + delim=' - ') + description = txt_or_none(renderer.get('description')) + playlist_id = txt_or_none(renderer.get('externalId')) or playlist_id else: - renderer = try_get( - data, lambda x: x['metadata']['playlistMetadataRenderer'], dict) - if renderer: - title = renderer.get('title') - else: - renderer = try_get( - data, lambda x: x['header']['hashtagHeaderRenderer'], dict) - if renderer: - title = try_get(renderer, lambda x: x['hashtag']['simpleText']) + renderer = traverse_obj(data, + ('metadata', 'playlistMetadataRenderer'), + ('header', 'hashtagHeaderRenderer'), + expected_type=dict) or {} + title = traverse_obj(renderer, 'title', ('hashtag', 'simpleText'), + expected_type=txt_or_none) playlist = self.playlist_result( self._entries(selected_tab, item_id, webpage), playlist_id=playlist_id, playlist_title=title, @@ -3264,15 +3283,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): return merge_dicts(playlist, self._extract_uploader(renderer, data)) def _extract_from_playlist(self, item_id, url, data, playlist): - title = playlist.get('title') or try_get( - data, lambda x: x['titleText']['simpleText'], compat_str) - playlist_id = playlist.get('playlistId') or item_id + title = traverse_obj((playlist, data), + (0, 'title'), (1, 'titleText', 'simpleText'), + expected_type=txt_or_none) + playlist_id = txt_or_none(playlist.get('playlistId')) or item_id # Inline playlist rendition continuation does not always work # at Youtube side, so delegating regular tab-based playlist URL # processing whenever possible. - playlist_url = urljoin(url, try_get( - playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], - compat_str)) + playlist_url = urljoin(url, traverse_obj( + playlist, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), + expected_type=url_or_none)) if playlist_url and playlist_url != url: return self.url_result( playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d80ceb007..65ddb3b0f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3753,6 +3753,11 @@ def strip_or_none(v, default=None): return v.strip() if isinstance(v, compat_str) else default +def txt_or_none(v, default=None): + """ Combine str/strip_or_none, disallow blank value (for traverse_obj) """ + return default if v is None else (compat_str(v).strip() or default) + + def url_or_none(url): if not url or not isinstance(url, compat_str): return None @@ -4096,8 +4101,8 @@ def escape_url(url): ).geturl() -def parse_qs(url): - return compat_parse_qs(compat_urllib_parse.urlparse(url).query) +def parse_qs(url, **kwargs): + return compat_parse_qs(compat_urllib_parse.urlparse(url).query, **kwargs) def read_batch_urls(batch_fd): From 11cc3f3ad03a88d6cb1eab18a8e5dd6bf148ac54 Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 11 May 2023 20:53:07 +0100 Subject: [PATCH 19/88] [utils] Fix `compiled_regex_type` in 249f2b6 --- youtube_dl/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 65ddb3b0f..584581b6a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -56,6 +56,7 @@ from .compat import ( compat_kwargs, compat_os_name, compat_re_Match, + compat_re_Pattern, compat_shlex_quote, compat_str, compat_struct_pack, @@ -86,7 +87,7 @@ def register_socks_protocols(): # Unfavoured alias -compiled_regex_type = compat_re_Match +compiled_regex_type = compat_re_Pattern def random_user_agent(): From a85a875fef2e9b097c3f6f93f1d0cead06f84e43 Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 11 May 2023 20:59:30 +0100 Subject: [PATCH 20/88] [jsinterp] Handle NaN in bitwise operators * also add _NaN * also pull function naming from yt-dlp --- test/test_jsinterp.py | 11 +++++++++++ youtube_dl/jsinterp.py | 41 ++++++++++++++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index e121358d7..a8f312fde 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -18,6 +18,7 @@ class TestJSInterpreter(unittest.TestCase): def test_basic(self): jsi = JSInterpreter('function x(){;}') self.assertEqual(jsi.call_function('x'), None) + self.assertEqual(repr(jsi.extract_function('x')), 'F') jsi = JSInterpreter('function x3(){return 42;}') self.assertEqual(jsi.call_function('x3'), 42) @@ -505,6 +506,16 @@ class TestJSInterpreter(unittest.TestCase): jsi = JSInterpreter('function x(){return 1236566549 << 5}') self.assertEqual(jsi.call_function('x'), 915423904) + def test_bitwise_operators_madness(self): + jsi = JSInterpreter('function x(){return null << 5}') + self.assertEqual(jsi.call_function('x'), 0) + + jsi = JSInterpreter('function x(){return undefined >> 5}') + self.assertEqual(jsi.call_function('x'), 0) + + jsi = JSInterpreter('function x(){return 42 << NaN}') + self.assertEqual(jsi.call_function('x'), 42) + def test_32066(self): jsi = JSInterpreter("function x(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}") self.assertEqual(jsi.call_function('x'), 70) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index a06fc4ff5..bb406647a 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -1,12 +1,13 @@ from __future__ import unicode_literals -from functools import update_wrapper import itertools import json import math import operator import re +from functools import update_wrapper + from .utils import ( error_to_compat_str, ExtractorError, @@ -24,6 +25,22 @@ from .compat import ( ) +# name JS functions +class function_with_repr(object): + # from yt_dlp/utils.py, but in this module + # repr_ is always set + def __init__(self, func, repr_): + update_wrapper(self, func) + self.func, self.__repr = func, repr_ + + def __call__(self, *args, **kwargs): + return self.func(*args, **kwargs) + + def __repr__(self): + return self.__repr + + +# name JS operators def wraps_op(op): def update_and_rename_wrapper(w): @@ -35,10 +52,13 @@ def wraps_op(op): return update_and_rename_wrapper +_NaN = float('nan') + + def _js_bit_op(op): def zeroise(x): - return 0 if x in (None, JS_Undefined) else x + return 0 if x in (None, JS_Undefined, _NaN) else x @wraps_op(op) def wrapped(a, b): @@ -52,7 +72,7 @@ def _js_arith_op(op): @wraps_op(op) def wrapped(a, b): if JS_Undefined in (a, b): - return float('nan') + return _NaN return op(a or 0, b or 0) return wrapped @@ -60,13 +80,13 @@ def _js_arith_op(op): def _js_div(a, b): if JS_Undefined in (a, b) or not (a and b): - return float('nan') + return _NaN return operator.truediv(a or 0, b) if b else float('inf') def _js_mod(a, b): if JS_Undefined in (a, b) or not b: - return float('nan') + return _NaN return (a or 0) % b @@ -74,7 +94,7 @@ def _js_exp(a, b): if not b: return 1 # even 0 ** 0 !! elif JS_Undefined in (a, b): - return float('nan') + return _NaN return (a or 0) ** b @@ -285,6 +305,8 @@ class JSInterpreter(object): def _named_object(self, namespace, obj): self.__named_object_counter += 1 name = '%s%d' % (self._OBJ_NAME, self.__named_object_counter) + if callable(obj) and not isinstance(obj, function_with_repr): + obj = function_with_repr(obj, 'F<%s>' % (self.__named_object_counter, )) namespace[name] = obj return name @@ -693,7 +715,7 @@ class JSInterpreter(object): elif expr == 'undefined': return JS_Undefined, should_return elif expr == 'NaN': - return float('NaN'), should_return + return _NaN, should_return elif md.get('return'): return local_vars[m.group('name')], should_return @@ -953,7 +975,9 @@ class JSInterpreter(object): return self.build_arglist(func_m.group('args')), code def extract_function(self, funcname): - return self.extract_function_from_code(*self.extract_function_code(funcname)) + return function_with_repr( + self.extract_function_from_code(*self.extract_function_code(funcname)), + 'F<%s>' % (funcname, )) def extract_function_from_code(self, argnames, code, *global_stack): local_vars = {} @@ -988,7 +1012,6 @@ class JSInterpreter(object): def build_function(self, argnames, code, *global_stack): global_stack = list(global_stack) or [{}] argnames = tuple(argnames) - # import pdb; pdb.set_trace() def resf(args, kwargs={}, allow_recursion=100): global_stack[0].update( From 6ed34338285f722d0da312ce0af3a15a077a3e2a Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 11 May 2023 21:02:01 +0100 Subject: [PATCH 21/88] [jsinterp] Add short-cut evaluation for common expression * special handling for (d%e.length+e.length)%e.length speeds up ~6% --- youtube_dl/jsinterp.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index bb406647a..f837865c4 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -502,8 +502,15 @@ class JSInterpreter(object): expr = self._dump(inner, local_vars) + outer if expr.startswith('('): - inner, outer = self._separate_at_paren(expr) - inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion) + + m = re.match(r'\((?P[a-z])%(?P[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr) + if m: + # short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig` + outer = None + inner, should_abort = self._offset_e_by_d(m.group('d'), m.group('e'), local_vars) + else: + inner, outer = self._separate_at_paren(expr) + inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion) if not outer or should_abort: return inner, should_abort or should_return else: @@ -957,6 +964,17 @@ class JSInterpreter(object): return obj + @staticmethod + def _offset_e_by_d(d, e, local_vars): + """ Short-cut eval: (d%e.length+e.length)%e.length """ + try: + d = local_vars[d] + e = local_vars[e] + e = len(e) + return _js_mod(_js_mod(d, e) + e, e), False + except Exception: + return None, True + def extract_function_code(self, funcname): """ @returns argnames, code """ func_m = re.search( From d1c6c5c4d618fa950813c0c71aede34a5ac851e9 Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 11 May 2023 21:17:31 +0100 Subject: [PATCH 22/88] [core] Improve platform debug log, based on yt-dlp --- youtube_dl/YoutubeDL.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 117f1c513..212c04298 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -25,6 +25,7 @@ import tokenize import traceback import random +from ssl import OPENSSL_VERSION from string import ascii_letters from .compat import ( @@ -66,6 +67,7 @@ from .utils import ( HEADRequest, int_or_none, ISO3166Utils, + join_nonempty, locked_file, LazyList, make_HTTPS_handler, @@ -2395,9 +2397,20 @@ class YoutubeDL(object): return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3] return impl_name - self._write_string('[debug] Python version %s (%s) - %s\n' % ( - platform.python_version(), python_implementation(), - platform_name())) + def libc_ver(): + try: + return platform.libc_ver() + except OSError: # We may not have access to the executable + return [] + + self._write_string('[debug] Python %s (%s %s) - %s (%s%s)\n' % ( + platform.python_version(), + python_implementation(), + platform.architecture()[0], + platform_name(), + OPENSSL_VERSION, + ', %s' % (join_nonempty(*libc_ver(), delim=' ') or '-'), + )) exe_versions = FFmpegPostProcessor.get_versions(self) exe_versions['rtmpdump'] = rtmpdump_version() From d89c2137ba4c1def185358a9ff48642e05ac65a2 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 19 May 2023 13:09:18 +0100 Subject: [PATCH 23/88] [jsinterp] Small updates for a85a875 * update signature tests * clarify NaN handling --- test/test_jsinterp.py | 3 +++ test/test_youtube_signature.py | 8 ++++++++ youtube_dl/jsinterp.py | 12 +++++------- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index a8f312fde..1cc148b15 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -516,6 +516,9 @@ class TestJSInterpreter(unittest.TestCase): jsi = JSInterpreter('function x(){return 42 << NaN}') self.assertEqual(jsi.call_function('x'), 42) + jsi = JSInterpreter('function x(){return 42 << Infinity}') + self.assertEqual(jsi.call_function('x'), 42) + def test_32066(self): jsi = JSInterpreter("function x(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}") self.assertEqual(jsi.call_function('x'), 70) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index decf7ee38..d41d708a0 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -143,6 +143,14 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js', 'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A', ), + ( + 'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js', + 'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw', + ), + ( + 'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js', + 'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ', + ), ] diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index f837865c4..dc580943e 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import itertools import json -import math import operator import re @@ -52,6 +51,10 @@ def wraps_op(op): return update_and_rename_wrapper +# NB In principle NaN cannot be checked by membership. +# Here all NaN values are actually this one, so _NaN is _NaN, +# although _NaN != _NaN. + _NaN = float('nan') @@ -126,13 +129,8 @@ def _js_comp_op(op): def _js_ternary(cndn, if_true=True, if_false=False): """Simulate JS's ternary operator (cndn?if_true:if_false)""" - if cndn in (False, None, 0, '', JS_Undefined): + if cndn in (False, None, 0, '', JS_Undefined, _NaN): return if_false - try: - if math.isnan(cndn): # NB: NaN cannot be checked by membership - return if_false - except TypeError: - pass return if_true From 1f7c6f8b2ba5bedc9b4da279659688fbbf06a059 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 19 May 2023 13:12:59 +0100 Subject: [PATCH 24/88] [core] Further improve platform debug log * see d1c6c5c --- youtube_dl/YoutubeDL.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 212c04298..1b3ef94b4 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -102,6 +102,7 @@ from .utils import ( YoutubeDLCookieProcessor, YoutubeDLHandler, YoutubeDLRedirectHandler, + ytdl_is_updateable, ) from .cache import Cache from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER @@ -2373,9 +2374,11 @@ class YoutubeDL(object): self.get_encoding())) write_string(encoding_str, encoding=None) - self._write_string('[debug] youtube-dl version ' + __version__ + '\n') + writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), )) + + writeln_debug('youtube-dl version ', __version__, (' (single file build)' if ytdl_is_updateable() else '')) if _LAZY_LOADER: - self._write_string('[debug] Lazy loading extractors enabled' + '\n') + writeln_debug('Lazy loading extractors enabled') try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'], @@ -2384,7 +2387,7 @@ class YoutubeDL(object): out, err = process_communicate_or_kill(sp) out = out.decode().strip() if re.match('[0-9a-f]+', out): - self._write_string('[debug] Git HEAD: ' + out + '\n') + writeln_debug('Git HEAD: ', out) except Exception: try: sys.exc_clear() @@ -2403,13 +2406,15 @@ class YoutubeDL(object): except OSError: # We may not have access to the executable return [] - self._write_string('[debug] Python %s (%s %s) - %s (%s%s)\n' % ( + libc = join_nonempty(*libc_ver(), delim=' ') + writeln_debug('Python %s (%s %s %s) - %s - %s%s' % ( platform.python_version(), python_implementation(), + platform.machine(), platform.architecture()[0], platform_name(), OPENSSL_VERSION, - ', %s' % (join_nonempty(*libc_ver(), delim=' ') or '-'), + (' - %s' % (libc, )) if libc else '' )) exe_versions = FFmpegPostProcessor.get_versions(self) @@ -2422,17 +2427,17 @@ class YoutubeDL(object): ) if not exe_str: exe_str = 'none' - self._write_string('[debug] exe versions: %s\n' % exe_str) + writeln_debug('exe versions: %s' % (exe_str, )) proxy_map = {} for handler in self._opener.handlers: if hasattr(handler, 'proxies'): proxy_map.update(handler.proxies) - self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n') + writeln_debug('Proxy map: ', compat_str(proxy_map)) if self.params.get('call_home', False): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8') - self._write_string('[debug] Public IP address: %s\n' % ipaddr) + writeln_debug('Public IP address: %s' % (ipaddr, )) latest_version = self.urlopen( 'https://yt-dl.org/latest/version').read().decode('utf-8') if version_tuple(latest_version) > version_tuple(__version__): From ee731f3d00064f446faa9ffb4c21ce4ca388bf5d Mon Sep 17 00:00:00 2001 From: dirkf Date: Tue, 23 May 2023 16:19:55 +0100 Subject: [PATCH 25/88] [ITV] Fix UA capitalisation in 384f632 --- youtube_dl/extractor/itv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index 7026139ea..c64af3be6 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -59,7 +59,7 @@ class ITVBaseIE(InfoExtractor): @staticmethod def _vanilla_ua_header(): - return {'User-agent': 'Mozilla/5.0'} + return {'User-Agent': 'Mozilla/5.0'} def _download_webpage_handle(self, url, video_id, *args, **kwargs): # specialised to (a) use vanilla UA (b) detect geo-block @@ -69,7 +69,7 @@ class ITVBaseIE(InfoExtractor): 'user_agent' not in params and not any(re.match(r'(?i)user-agent\s*:', h) for h in (params.get('headers') or [])) - and 'User-agent' not in (kwargs.get('headers') or {})): + and 'User-Agent' not in (kwargs.get('headers') or {})): kwargs.setdefault('headers', {}) kwargs['headers'] = self._vanilla_ua_header() From 2389c7cbd30813435c50848a9b276bcfe2a810db Mon Sep 17 00:00:00 2001 From: dirkf Date: Tue, 23 May 2023 17:11:22 +0100 Subject: [PATCH 26/88] [compat] Fix casefold import __all__ syntax in a19855f --- youtube_dl/casefold.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/casefold.py b/youtube_dl/casefold.py index 748c2d491..ad9c66f8e 100644 --- a/youtube_dl/casefold.py +++ b/youtube_dl/casefold.py @@ -1663,5 +1663,5 @@ def casefold(s): __all__ = [ - casefold + 'casefold', ] From b8a86dcf1aa837577178ae25357d8241ab4ba6c1 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 26 May 2023 20:25:25 +0100 Subject: [PATCH 27/88] [core] Revise 1f7c6f8 to help downstream merger (possibly) --- youtube_dl/YoutubeDL.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 1b3ef94b4..98b878fc1 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -2374,11 +2374,10 @@ class YoutubeDL(object): self.get_encoding())) write_string(encoding_str, encoding=None) - writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), )) - - writeln_debug('youtube-dl version ', __version__, (' (single file build)' if ytdl_is_updateable() else '')) + self._write_string('[debug] youtube-dl version ' + __version__ + (' (single file build)\n' if ytdl_is_updateable() else '\n')) if _LAZY_LOADER: - writeln_debug('Lazy loading extractors enabled') + self._write_string('[debug] Lazy loading extractors enabled\n') + writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), )) # moved down for easier merge try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'], From a2534f7b888416e872d5afd1862eb3e30fc69fc7 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 11 Jun 2023 13:33:50 +0100 Subject: [PATCH 28/88] [jsinterp] Fix div bug breaking player 8c7583ff Thx bashonly: https://github.com/ytdl-org/youtube-dl/issues/32292#issuecomment-1585639223 Fixes #32292 --- test/test_jsinterp.py | 49 ++++++++++++++++++++++++++++++++++ test/test_youtube_signature.py | 4 +++ youtube_dl/jsinterp.py | 2 +- 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 1cc148b15..ecd6ab3c9 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -33,6 +33,55 @@ class TestJSInterpreter(unittest.TestCase): jsi = JSInterpreter('function x4(a){return 2*a+1;}') self.assertEqual(jsi.call_function('x4', 3), 7) + def test_add(self): + jsi = JSInterpreter('function f(){return 42 + 7;}') + self.assertEqual(jsi.call_function('f'), 49) + jsi = JSInterpreter('function f(){return 42 + undefined;}') + self.assertTrue(math.isnan(jsi.call_function('f'))) + jsi = JSInterpreter('function f(){return 42 + null;}') + self.assertEqual(jsi.call_function('f'), 42) + + def test_sub(self): + jsi = JSInterpreter('function f(){return 42 - 7;}') + self.assertEqual(jsi.call_function('f'), 35) + jsi = JSInterpreter('function f(){return 42 - undefined;}') + self.assertTrue(math.isnan(jsi.call_function('f'))) + jsi = JSInterpreter('function f(){return 42 - null;}') + self.assertEqual(jsi.call_function('f'), 42) + + def test_mul(self): + jsi = JSInterpreter('function f(){return 42 * 7;}') + self.assertEqual(jsi.call_function('f'), 294) + jsi = JSInterpreter('function f(){return 42 * undefined;}') + self.assertTrue(math.isnan(jsi.call_function('f'))) + jsi = JSInterpreter('function f(){return 42 * null;}') + self.assertEqual(jsi.call_function('f'), 0) + + def test_div(self): + jsi = JSInterpreter('function f(a, b){return a / b;}') + self.assertTrue(math.isnan(jsi.call_function('f', 0, 0))) + self.assertTrue(math.isnan(jsi.call_function('f', JS_Undefined, 1))) + self.assertTrue(math.isinf(jsi.call_function('f', 2, 0))) + self.assertEqual(jsi.call_function('f', 0, 3), 0) + + def test_mod(self): + jsi = JSInterpreter('function f(){return 42 % 7;}') + self.assertEqual(jsi.call_function('f'), 0) + jsi = JSInterpreter('function f(){return 42 % 0;}') + self.assertTrue(math.isnan(jsi.call_function('f'))) + jsi = JSInterpreter('function f(){return 42 % undefined;}') + self.assertTrue(math.isnan(jsi.call_function('f'))) + + def test_exp(self): + jsi = JSInterpreter('function f(){return 42 ** 2;}') + self.assertEqual(jsi.call_function('f'), 1764) + jsi = JSInterpreter('function f(){return 42 ** undefined;}') + self.assertTrue(math.isnan(jsi.call_function('f'))) + jsi = JSInterpreter('function f(){return 42 ** null;}') + self.assertEqual(jsi.call_function('f'), 1) + jsi = JSInterpreter('function f(){return undefined ** 42;}') + self.assertTrue(math.isnan(jsi.call_function('f'))) + def test_empty_return(self): jsi = JSInterpreter('function f(){return; y()}') self.assertEqual(jsi.call_function('f'), None) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index d41d708a0..e7bce9d68 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -151,6 +151,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js', 'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ', ), + ( + 'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js', + 'E2AQVN6y_zM7uN9w8z', '9A2dbY5GDZrt9A', + ), ] diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index dc580943e..9d4a5bc57 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -82,7 +82,7 @@ def _js_arith_op(op): def _js_div(a, b): - if JS_Undefined in (a, b) or not (a and b): + if JS_Undefined in (a, b) or not (a or b): return _NaN return operator.truediv(a or 0, b) if b else float('inf') From ff75c300f52321dc7322e28d1df153cf0ea65a6d Mon Sep 17 00:00:00 2001 From: dirkf Date: Sat, 17 Jun 2023 15:34:11 +0100 Subject: [PATCH 29/88] [jsinterp] Fix test for failed match in extract_object() --- youtube_dl/jsinterp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 9d4a5bc57..c18c4fef1 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -985,9 +985,9 @@ class JSInterpreter(object): \((?P[^)]*)\)\s* (?P{.+})''' % {'name': re.escape(funcname)}, self.code) - code, _ = self._separate_at_paren(func_m.group('code')) # refine the match if func_m is None: raise self.Exception('Could not find JS function "{funcname}"'.format(**locals())) + code, _ = self._separate_at_paren(func_m.group('code')) # refine the match return self.build_arglist(func_m.group('args')), code def extract_function(self, funcname): From d6433cbb2c4440056a38846e35bb5a3efa9bcac2 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sat, 17 Jun 2023 15:43:10 +0100 Subject: [PATCH 30/88] [jsinterp] Don't find unrelated objects --- youtube_dl/jsinterp.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index c18c4fef1..00f219440 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -941,15 +941,15 @@ class JSInterpreter(object): _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' obj = {} obj_m = re.search( - r'''(?x) - (?(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*) - }\s*; - ''' % (re.escape(objname), _FUNC_NAME_RE), + r'''(?xs) + (?:{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s* + (?P({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*) + }}\s*); + '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE), self.code) - if not obj_m: + fields = obj_m and obj_m.group('fields') + if fields is None: raise self.Exception('Could not find object ' + objname) - fields = obj_m.group('fields') # Currently, it only supports function definitions fields_m = re.finditer( r'''(?x) From ae8ba2c31977b68b75221f80c488c0b12385269c Mon Sep 17 00:00:00 2001 From: dirkf Date: Sat, 17 Jun 2023 15:36:39 +0100 Subject: [PATCH 31/88] [YouTube] Fix `KeyError QV` in signature extraction failed * temporarily force missing global definition into sig JS * improve test: thanks https://github.com/yt-dlp/yt-dlp/issues/7327#issuecomment-1595274615 * resolves #32314 --- test/test_youtube_signature.py | 7 ++++++- youtube_dl/extractor/youtube.py | 6 +++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index e7bce9d68..4ba586e53 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -63,6 +63,11 @@ _SIG_TESTS = [ 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', + ), + ( + 'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js', + '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', + 'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0', ) ] @@ -231,7 +236,7 @@ def n_sig(jscode, sig_input): make_sig_test = t_factory( - 'signature', signature, re.compile(r'.*-(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$')) + 'signature', signature, re.compile(r'(?s).*(?:-|/player/)(?P[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$')) for test_spec in _SIG_TESTS: make_sig_test(*test_spec) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0411c49f1..0bbce71a3 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1569,8 +1569,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') - jsi = JSInterpreter(jscode) + # temporary (please) hack for player 6ed0d907 #32314 + ah = 'var AH={LR:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c},QV:function(a){a.reverse()},pO:function(a,b){a.splice(0,b)}};' + jsi = JSInterpreter(ah + jscode) + initial_function = jsi.extract_function(funcname) + return lambda s: initial_function([s]) def _decrypt_signature(self, s, video_id, player_url): From 07af47960f3bb262ead02490ce65c8c45c01741e Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 18 Jun 2023 00:52:18 +0100 Subject: [PATCH 32/88] [YouTube] Improve fix for ae8ba2c Thx: https://github.com/yt-dlp/yt-dlp/commit/01aba25 --- youtube_dl/extractor/youtube.py | 4 +--- youtube_dl/jsinterp.py | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0bbce71a3..1855fca7f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1569,9 +1569,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') - # temporary (please) hack for player 6ed0d907 #32314 - ah = 'var AH={LR:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c},QV:function(a){a.reverse()},pO:function(a,b){a.splice(0,b)}};' - jsi = JSInterpreter(ah + jscode) + jsi = JSInterpreter(jscode) initial_function = jsi.extract_function(funcname) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 00f219440..1ba9c3d67 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -940,15 +940,18 @@ class JSInterpreter(object): def extract_object(self, objname): _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' obj = {} - obj_m = re.search( - r'''(?xs) - (?:{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s* - (?P({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*) - }}\s*); - '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE), - self.code) - fields = obj_m and obj_m.group('fields') - if fields is None: + fields = None + for obj_m in re.finditer( + r'''(?xs) + {0}\s*\.\s*{1}|{1}\s*=\s*\{{\s* + (?P({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*) + }}\s*; + '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE), + self.code): + fields = obj_m.group('fields') + if fields: + break + else: raise self.Exception('Could not find object ' + objname) # Currently, it only supports function definitions fields_m = re.finditer( From 9112e668a5ea6376017718db9ff13b369d53ad7a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 22 Jun 2023 13:23:31 +0530 Subject: [PATCH 33/88] [YouTube] Improve nsig function name extraction Fixes player b7910ca8, using `,` vs `;` See https://github.com/ytdl-org/youtube-dl/issues/32292#issuecomment-1602231170 Co-authored-by: dirkf --- test/test_youtube_signature.py | 11 +++-------- youtube_dl/extractor/youtube.py | 19 +++++++++++++------ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 4ba586e53..5dcabaf95 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -63,11 +63,6 @@ _SIG_TESTS = [ 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', - ), - ( - 'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js', - '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', - 'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0', ) ] @@ -157,8 +152,8 @@ _NSIG_TESTS = [ 'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ', ), ( - 'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js', - 'E2AQVN6y_zM7uN9w8z', '9A2dbY5GDZrt9A', + 'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js', + '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ', ), ] @@ -236,7 +231,7 @@ def n_sig(jscode, sig_input): make_sig_test = t_factory( - 'signature', signature, re.compile(r'(?s).*(?:-|/player/)(?P[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$')) + 'signature', signature, re.compile(r'.*-(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$')) for test_spec in _SIG_TESTS: make_sig_test(*test_spec) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1855fca7f..24e2efbd9 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1623,15 +1623,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx') if not idx: return nfunc + + VAR_RE_TMPL = r'var\s+%s\s*=\s*(?P\[(?P%s)\])[;,]' + note = 'Initial JS player n function {0} (%s[%s])' % (nfunc, idx) + + def search_function_code(needle, group): + return self._search_regex( + VAR_RE_TMPL % (re.escape(nfunc), needle), jscode, + note.format(group), group=group) + if int_or_none(idx) == 0: - real_nfunc = self._search_regex( - r'var %s\s*=\s*\[([a-zA-Z_$][\w$]*)\];' % (re.escape(nfunc), ), jscode, - 'Initial JS player n function alias ({nfunc}[{idx}])'.format(**locals())) + real_nfunc = search_function_code(r'[a-zA-Z_$][\w$]*', group='alias') if real_nfunc: return real_nfunc - return self._parse_json(self._search_regex( - r'var %s\s*=\s*(\[.+?\]);' % (re.escape(nfunc), ), jscode, - 'Initial JS player n function name ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)] + return self._parse_json( + search_function_code('.+?', group='name'), + nfunc, transform_source=js_to_json)[int(idx)] def _extract_n_function(self, video_id, player_url): player_id = self._extract_player_info(player_url) From ebdc82c58684b4e202fabc046f9a40fc73cccde5 Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 22 Jun 2023 17:24:48 +0100 Subject: [PATCH 34/88] [workflows/ci.yml] Replace actions/setup-python for legacy Pythons Thanks MatteoH2O1999: https://github.com/MatteoH2O1999/setup-python --- .github/workflows/ci.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 51abdce1d..9be4eaa89 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,10 +38,12 @@ jobs: steps: - uses: actions/checkout@v3 - name: Set up supported Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - if: ${{ matrix.python-impl == 'cpython' && ! contains(fromJSON('["3.3", "3.4"]'), matrix.python-version) }} + # wrap broken actions/setup-python@v4 + uses: ytdl-org/setup-python@v1 with: python-version: ${{ matrix.python-version }} + cache-build: true + allow-build: info - name: Set up Java 8 if: ${{ matrix.python-impl == 'jython' }} uses: actions/setup-java@v2 From fa7f0effbe4e14fcf70e1dc4496371c9862b64b9 Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 22 Jun 2023 23:10:04 +0100 Subject: [PATCH 35/88] [YouTube] Avoid crash in author extraction --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 24e2efbd9..9c419c002 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -448,7 +448,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): extract_attributes(self._search_regex( r'''(?s)(]+\bitemprop\s*=\s*("|')%s\2[^>]*>)''' % re.escape(var_name), - get_element_by_attribute('itemprop', 'author', webpage) or '', + get_element_by_attribute('itemprop', 'author', webpage or '') or '', 'author link', default='')), paths[var_name][0]) From 58fc5bde47215d9e7c60647dd21202a254b3b066 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 23 Jun 2023 00:15:06 +0100 Subject: [PATCH 36/88] [workflows/ci.yml] Restore test support for Py 3.3, 3.4, and add 2.6 --- .github/workflows/ci.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9be4eaa89..4008cc190 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,9 +8,7 @@ jobs: fail-fast: true matrix: os: [ubuntu-20.04] - # TODO: python 2.6 - # TODO: restore support for 3.3, 3.4 - python-version: [2.7, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7] + python-version: [2.6, 2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7] python-impl: [cpython] ytdl-test-set: [core, download] run-tests-ext: [sh] From 2500300c2a5986ace34390aa473a8bd51f83622c Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 29 Jun 2023 15:27:12 +0100 Subject: [PATCH 37/88] [workflows/ci.yml] Restore test support for Py 3.2 --- .github/workflows/ci.yml | 319 +++++++++++++++++++++++++++-- devscripts/make_lazy_extractors.py | 4 + test/test_execution.py | 8 +- test/test_unicode_literals.py | 1 + youtube_dl/__init__.py | 8 +- youtube_dl/compat.py | 18 +- 6 files changed, 328 insertions(+), 30 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4008cc190..8d8e654fb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,73 +1,349 @@ name: CI -on: [push, pull_request] + +env: + # add 3.10+ after patching nose (https://github.com/nose-devs/nose/issues/1099) + # or switching to fork of https://github.com/mdmintz/pynose + all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9 + main-cpython-versions: 2.7, 3.2, 3.5, 3.9 + pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7 + cpython-versions: all + # test-set: both + test-set: core + +on: + push: + pull_request: + workflow_dispatch: + inputs: + cpython-versions: + type: choice + description: CPython versions (main = 2.7, 3.2, 3.5, 3.9) + options: + - all + - main + required: true + default: main + test-set: + type: choice + description: core, download + options: + - both + - core + - download + required: true + default: core + +permissions: + contents: read + jobs: + select: + name: Select tests from inputs + runs-on: ubuntu-latest + outputs: + cpython-versions: ${{ steps.run.outputs.cpython-versions }} + test-set: ${{ steps.run.outputs.test-set }} + own-pip-versions: ${{ steps.run.outputs.own-pip-versions }} + steps: + - id: run + run: | + # Make a JSON Array from comma/space-separated string (no extra escaping) + json_list() { \ + ret=""; IFS="${IFS},"; set -- $*; \ + for a in "$@"; do \ + ret=$(printf '%s"%s"' "${ret}${ret:+, }" "$a"); \ + done; \ + printf '[%s]' "$ret"; } + tests="${{ inputs.test-set || env.test-set }}" + [ $tests = both ] && tests="core download" + printf 'test-set=%s\n' "$(json_list $tests)" >> "$GITHUB_OUTPUT" + versions="${{ inputs.cpython-versions || env.cpython-versions }}" + if [ "$versions" = all ]; then \ + versions="${{ env.all-cpython-versions }}"; else \ + versions="${{ env.main-cpython-versions }}"; \ + fi + printf 'cpython-versions=%s\n' \ + "$(json_list ${versions}${versions:+, }${{ env.pypy-versions }})" >> "$GITHUB_OUTPUT" + # versions with a special get-pip.py in a per-version subdirectory + printf 'own-pip-versions=%s\n' \ + "$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT" + tests: - name: Tests + name: Run tests + needs: select + permissions: + contents: read + packages: write runs-on: ${{ matrix.os }} strategy: fail-fast: true matrix: os: [ubuntu-20.04] - python-version: [2.6, 2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7] + # outside steps, use github.env...., not env.... + python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }} python-impl: [cpython] - ytdl-test-set: [core, download] + ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }} run-tests-ext: [sh] include: - # python 3.2 is only available on windows via setup-python - os: windows-2019 python-version: 3.2 python-impl: cpython - ytdl-test-set: core + ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }} run-tests-ext: bat - os: windows-2019 python-version: 3.2 python-impl: cpython - ytdl-test-set: download + ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }} run-tests-ext: bat # jython - os: ubuntu-20.04 python-impl: jython - ytdl-test-set: core + ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }} run-tests-ext: sh - os: ubuntu-20.04 python-impl: jython - ytdl-test-set: download + ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }} run-tests-ext: sh steps: - - uses: actions/checkout@v3 + - name: Checkout + uses: actions/checkout@v3 + #-------- Python 3 ----- - name: Set up supported Python ${{ matrix.python-version }} + id: setup-python + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7'}} # wrap broken actions/setup-python@v4 uses: ytdl-org/setup-python@v1 with: python-version: ${{ matrix.python-version }} cache-build: true allow-build: info + - name: Locate supported Python ${{ matrix.python-version }} + if: ${{ env.pythonLocation }} + shell: bash + run: | + echo "PYTHONHOME=${pythonLocation}" >> "$GITHUB_ENV" + export expected="${{ steps.setup-python.outputs.python-path }}" + dirname() { printf '%s\n' \ + 'import os, sys' \ + 'print(os.path.dirname(sys.argv[1]))' \ + | ${expected} - "$1"; } + expd="$(dirname "$expected")" + export python="$(command -v python)" + [ "$expd" = "$(dirname "$python")" ] || echo "PATH=$expd:${PATH}" >> "$GITHUB_ENV" + [ -x "$python" ] || printf '%s\n' \ + 'import os' \ + 'exp = os.environ["expected"]' \ + 'python = os.environ["python"]' \ + 'exps = os.path.split(exp)' \ + 'if python and (os.path.dirname(python) == exp[0]):' \ + ' exit(0)' \ + 'exps[1] = "python" + os.path.splitext(exps[1])[1]' \ + 'python = os.path.join(*exps)' \ + 'try:' \ + ' os.symlink(exp, python)' \ + 'except AttributeError:' \ + ' os.rename(exp, python)' \ + | ${expected} - + printf '%s\n' \ + 'import sys' \ + 'print(sys.path)' \ + | ${expected} - + #-------- Python 2.7 -- + - name: Set up Python 2.7 + if: ${{ matrix.python-version == '2.7' }} + # install 2.7 + run: | + sudo apt-get install -y python2 python-is-python2 + echo "PYTHONHOME=/usr" >> "$GITHUB_ENV" + #-------- Python 2.6 -- + - name: Set up Python 2.6 environment + if: ${{ matrix.python-version == '2.6' }} + run: | + openssl_name=openssl-1.0.2u + echo "openssl_name=${openssl_name}" >> "$GITHUB_ENV" + openssl_dir=$HOME/.local/opt/$openssl_name + echo "openssl_dir=${openssl_dir}" >> "$GITHUB_ENV" + PYENV_ROOT=$HOME/.local/share/pyenv + echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV" + sudo apt-get install -y openssl ca-certificates + - name: Cache Python 2.6 + id: cache26 + if: ${{ matrix.python-version == '2.6' }} + uses: actions/cache@v3 + with: + key: python-2.6.9 + path: | + ${{ env.openssl_dir }} + ${{ env.PYENV_ROOT }} + - name: Build and set up Python 2.6 + if: ${{ matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }} + # dl and build locally + run: | + # Install build environment + sudo apt-get install -y build-essential llvm libssl-dev tk-dev \ + libncursesw5-dev libreadline-dev libsqlite3-dev \ + libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev + # Download and install OpenSSL 1.0.2, back in time + openssl_name=${{ env.openssl_name }} + openssl_targz=${openssl_name}.tar.gz + openssl_dir=${{ env.openssl_dir }} + openssl_inc=$openssl_dir/include + openssl_lib=$openssl_dir/lib + openssl_ssl=$openssl_dir/ssl + curl -L "https://www.openssl.org/source/$openssl_targz" -o $openssl_targz + tar -xf $openssl_targz + ( cd $openssl_name; \ + ./config --prefix=$openssl_dir --openssldir=${openssl_dir}/ssl \ + --libdir=lib -Wl,-rpath=${openssl_dir}/lib shared zlib-dynamic && \ + make && \ + make install ) + rm -rf $openssl_name + rmdir $openssl_ssl/certs && ln -s /etc/ssl/certs $openssl_ssl/certs + + # Download PyEnv from its GitHub repository. + export PYENV_ROOT=${{ env.PYENV_ROOT }} + export PATH=$PYENV_ROOT/bin:$PATH + git clone https://github.com/pyenv/pyenv.git $PYENV_ROOT + eval "$(pyenv init --path)" + + # Prevent pyenv build trying (and failing) to update pip + export GET_PIP=get-pip-2.6.py + echo 'import sys; sys.exit(0)' > ${GET_PIP} + GET_PIP=$(realpath $GET_PIP) + + # Build and install Python + export CFLAGS="-I$openssl_inc" + export LDFLAGS="-L$openssl_lib" + export LD_LIBRARY_PATH="$openssl_lib" + pyenv install 2.6.9 + echo "PYTHONHOME=${PYENV_ROOT}" >> "$GITHUB_ENV" + echo "PATH=$PYENV_ROOT/bin:$PATH" >> "$GITHUB_ENV" + - name: Set up cached Python 2.6 + if: ${{ steps.cache26.outputs.cache-hit }} + run: | + export PYENV_ROOT + export PATH=$PYENV_ROOT/bin:$PATH + eval "$(pyenv init --path)" + pyenv local 2.6.9 + echo "PYTHONHOME=${PYENV_ROOT}" >> "$GITHUB_ENV" + echo "PATH=$PYENV_ROOT/bin:$PATH" >> "$GITHUB_ENV" + #-------- Jython ------ - name: Set up Java 8 if: ${{ matrix.python-impl == 'jython' }} uses: actions/setup-java@v2 with: java-version: 8 distribution: 'zulu' - - name: Install Jython + - name: Setup Jython environment if: ${{ matrix.python-impl == 'jython' }} run: | - wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar - java -jar jython-installer.jar -s -d "$HOME/jython" - echo "$HOME/jython/bin" >> $GITHUB_PATH - - name: Install nose - if: ${{ matrix.python-impl != 'jython' }} - run: pip install nose + echo "JYTHON_ROOT=${HOME}/jython" >> "$GITHUB_ENV" + - name: Cache Jython + id: cachejy + if: ${{ matrix.python-impl == 'jython' }} + uses: actions/cache@v3 + with: + # 2.7.3 now available, may solve SNI issue + key: jython-2.7.1 + path: | + ${{ env.JYTHON_ROOT }} + - name: Install Jython + if: ${{ matrix.python-impl == 'jython' && ! steps.cachejy.outputs.cache-hit }} + run: | + JYTHON_ROOT="${{ env.JYTHON_ROOT }}" + curl -L "https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar" -o jython-installer.jar + java -jar jython-installer.jar -s -d "${JYTHON_ROOT}" + echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH + - name: Set up cached Jython + if: ${{ steps.cachejy.outputs.cache-hit }} + run: | + JYTHON_ROOT="${{ env.JYTHON_ROOT }}" + echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH + #-------- pip --------- + - name: Set up supported Python ${{ matrix.python-version }} pip + if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.6' || matrix.python-version == '2.7' }} + # This step may run in either Linux or Windows + shell: bash + run: | + echo "$PATH" + echo "$PYTHONHOME" + # curl is available on both Windows and Linux, -L follows redirects, -O gets name + python -m ensurepip || python -m pip --version || { \ + get_pip="${{ contains(needs.select.outputs.own-pip-versions, matrix.python-version) && format('{0}/', matrix.python-version) || '' }}"; \ + curl -L -O "https://bootstrap.pypa.io/pip/${get_pip}get-pip.py"; \ + python get-pip.py; } + - name: Set up other Python ${{ matrix.python-version }} pip + if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }} + shell: bash + run: | + # https://files.pythonhosted.org/packages/8a/e9/8468cd68b582b06ef554be0b96b59f59779627131aad48f8a5bce4b13450/wheel-0.29.0-py2.py3-none-any.whl + # https://files.pythonhosted.org/packages/06/4b/86a670fd21f7849adb092e40883c48dcd0d66b8a878fc8d63b7f0ea04213/setuptools-29.0.1-py2.py3-none-any.whl + python -m pip --version || { \ + curl -L -O "https://bootstrap.pypa.io/pip/3.2/get-pip.py"; \ + curl -L -O "https://files.pythonhosted.org/packages/b2/d0/cd115fe345dd6f07ec1c780020a7dfe74966fceeb171e0f20d1d4905b0b7/pip-7.1.2-py2.py3-none-any.whl"; \ + python -v get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; } + + #-------- nose -------- + - name: Install nose for Python ${{ matrix.python-version }} + if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.6' || matrix.python-version == '2.7' }} + shell: bash + run: | + echo "$PATH" + echo "$PYTHONHOME" + python --version + python -m pip --version + python -m pip nose --version || python -m pip install nose + - name: Install nose for other Python ${{ matrix.python-version }} + if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }} + shell: bash + run: | + python -m pip nose --version || { \ + curl -L -O "https://files.pythonhosted.org/packages/15/d8/dd071918c040f50fa1cf80da16423af51ff8ce4a0f2399b7bf8de45ac3d9/nose-1.3.7-py3-none-any.whl"; \ + python --version; \ + printf '%s\n' \ + 'import sys' \ + 'print(sys.path)' \ + | python -; \ + python -m pip --version; \ + python -m pip install nose-1.3.7-py3-none-any.whl; } - name: Install nose (Jython) if: ${{ matrix.python-impl == 'jython' }} - # Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) + # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) run: | - wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl - pip install nose-1.3.7-py2-none-any.whl + pip nose --version || { \ + curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \ + pip --version; \ + pip install nose-1.3.7-py2-none-any.whl; } + - name: Set up nosetest test + if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }} + shell: bash + run: | + # define a test to validate the Python version used by nosetests + printf '%s\n' \ + 'from __future__ import unicode_literals' \ + 'import sys, os, platform, unittest' \ + 'class TestPython(unittest.TestCase):' \ + ' def setUp(self):' \ + ' self.ver = os.environ["PYTHON_VER"].split("-")' \ + ' def test_python_ver(self):' \ + ' self.assertEqual(sys.version[:3], self.ver[-1])' \ + ' self.assertTrue(sys.version.startswith(self.ver[-1]))' \ + ' self.assertIn(self.ver[0], sys.version.lower())' \ + ' def test_python_impl(self):' \ + ' self.assertIn(platform.python_implementation().lower(), (os.environ["PYTHON_IMPL"], self.ver[0]))' \ + > test/test_python.py + #-------- TESTS ------- - name: Run tests + if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }} continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }} env: YTDL_TEST_SET: ${{ matrix.ytdl-test-set }} - run: ./devscripts/run_tests.${{ matrix.run-tests-ext }} + PYTHON_VER: ${{ matrix.python-version }} + PYTHON_IMPL: ${{ matrix.python-impl }} + + run: | + ./devscripts/run_tests.${{ matrix.run-tests-ext }} + flake8: name: Linter runs-on: ubuntu-latest @@ -81,3 +357,4 @@ jobs: run: pip install flake8 - name: Run flake8 run: flake8 . + diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index edc19183d..4bddca047 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -6,6 +6,10 @@ import os from os.path import dirname as dirn import sys +from youtube_dl.compat import compat_register_utf8 + +compat_register_utf8() + print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr) sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) diff --git a/test/test_execution.py b/test/test_execution.py index 704e14612..1dee53a0f 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -10,10 +10,13 @@ import os import subprocess sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from youtube_dl.compat import compat_register_utf8 + from youtube_dl.utils import encodeArgument rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +compat_register_utf8() try: _DEV_NULL = subprocess.DEVNULL @@ -25,13 +28,14 @@ class TestExecution(unittest.TestCase): def test_import(self): subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir) + @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution') def test_module_exec(self): - if sys.version_info >= (2, 7): # Python 2.6 doesn't support package execution - subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_main_exec(self): subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL) + @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution') def test_cmdline_umlauts(self): p = subprocess.Popen( [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'], diff --git a/test/test_unicode_literals.py b/test/test_unicode_literals.py index 6c1b7ec91..c7c2252f5 100644 --- a/test/test_unicode_literals.py +++ b/test/test_unicode_literals.py @@ -15,6 +15,7 @@ IGNORED_FILES = [ 'setup.py', # http://bugs.python.org/issue13943 'conf.py', 'buildserver.py', + 'get-pip.py', ] IGNORED_DIRS = [ diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index e1bd67919..cc8285eba 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -5,7 +5,6 @@ from __future__ import unicode_literals __license__ = 'Public Domain' -import codecs import io import os import random @@ -17,6 +16,7 @@ from .options import ( ) from .compat import ( compat_getpass, + compat_register_utf8, compat_shlex_split, workaround_optparse_bug9161, ) @@ -46,10 +46,8 @@ from .YoutubeDL import YoutubeDL def _real_main(argv=None): - # Compatibility fixes for Windows - if sys.platform == 'win32': - # https://github.com/ytdl-org/youtube-dl/issues/820 - codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) + # Compatibility fix for Windows + compat_register_utf8() workaround_optparse_bug9161() diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index fe62caf80..0f4d3756f 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -31,13 +31,17 @@ try: compat_str, compat_basestring, compat_chr = ( unicode, basestring, unichr ) - from .casefold import casefold as compat_casefold - except NameError: compat_str, compat_basestring, compat_chr = ( str, str, chr ) + +# casefold +try: + compat_str.casefold compat_casefold = lambda s: s.casefold() +except AttributeError: + from .casefold import casefold as compat_casefold try: import collections.abc as compat_collections_abc @@ -3137,6 +3141,15 @@ else: compat_open = open +# compat_register_utf8 +def compat_register_utf8(): + if sys.platform == 'win32': + # https://github.com/ytdl-org/youtube-dl/issues/820 + from codecs import register, lookup + register( + lambda name: lookup('utf-8') if name == 'cp65001' else None) + + legacy = [ 'compat_HTMLParseError', 'compat_HTMLParser', @@ -3203,6 +3216,7 @@ __all__ = [ 'compat_print', 'compat_re_Match', 'compat_re_Pattern', + 'compat_register_utf8', 'compat_setenv', 'compat_shlex_quote', 'compat_shlex_split', From b08a58090635777f1001d5cde2cd141a5565177c Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 30 Jun 2023 03:52:39 +0100 Subject: [PATCH 38/88] [workflows/ci.yml] Fix test support for Py 2.6 --- .github/workflows/ci.yml | 115 ++++++++++++++++++----------- devscripts/make_lazy_extractors.py | 8 +- test/test_execution.py | 16 ++-- 3 files changed, 83 insertions(+), 56 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8d8e654fb..ce878c1b1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,9 +6,8 @@ env: all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9 main-cpython-versions: 2.7, 3.2, 3.5, 3.9 pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7 - cpython-versions: all - # test-set: both - test-set: core + cpython-versions: main + test-set: both on: push: @@ -75,6 +74,10 @@ jobs: contents: read packages: write runs-on: ${{ matrix.os }} + env: + PIP: python -m pip + PIP_DISABLE_PIP_VERSION_CHECK: true + PIP_NO_PYTHON_VERSION_WARNING: true strategy: fail-fast: true matrix: @@ -152,12 +155,14 @@ jobs: - name: Set up Python 2.7 if: ${{ matrix.python-version == '2.7' }} # install 2.7 + shell: bash run: | sudo apt-get install -y python2 python-is-python2 echo "PYTHONHOME=/usr" >> "$GITHUB_ENV" #-------- Python 2.6 -- - name: Set up Python 2.6 environment if: ${{ matrix.python-version == '2.6' }} + shell: bash run: | openssl_name=openssl-1.0.2u echo "openssl_name=${openssl_name}" >> "$GITHUB_ENV" @@ -178,6 +183,7 @@ jobs: - name: Build and set up Python 2.6 if: ${{ matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }} # dl and build locally + shell: bash run: | # Install build environment sudo apt-get install -y build-essential llvm libssl-dev tk-dev \ @@ -203,8 +209,7 @@ jobs: # Download PyEnv from its GitHub repository. export PYENV_ROOT=${{ env.PYENV_ROOT }} export PATH=$PYENV_ROOT/bin:$PATH - git clone https://github.com/pyenv/pyenv.git $PYENV_ROOT - eval "$(pyenv init --path)" + git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT" # Prevent pyenv build trying (and failing) to update pip export GET_PIP=get-pip-2.6.py @@ -216,17 +221,14 @@ jobs: export LDFLAGS="-L$openssl_lib" export LD_LIBRARY_PATH="$openssl_lib" pyenv install 2.6.9 - echo "PYTHONHOME=${PYENV_ROOT}" >> "$GITHUB_ENV" - echo "PATH=$PYENV_ROOT/bin:$PATH" >> "$GITHUB_ENV" - - name: Set up cached Python 2.6 - if: ${{ steps.cache26.outputs.cache-hit }} + - name: Locate Python 2.6 + if: ${{ matrix.python-version == '2.6' }} + shell: bash run: | - export PYENV_ROOT - export PATH=$PYENV_ROOT/bin:$PATH - eval "$(pyenv init --path)" - pyenv local 2.6.9 - echo "PYTHONHOME=${PYENV_ROOT}" >> "$GITHUB_ENV" - echo "PATH=$PYENV_ROOT/bin:$PATH" >> "$GITHUB_ENV" + PYTHONHOME="${{ env.PYENV_ROOT }}/versions/2.6.9" + echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV" + echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${{ env.openssl_dir }}/lib${LD_LIBRARY_PATH:+:}${LD_LIBRARY_PATH}" >> "$GITHUB_ENV" #-------- Jython ------ - name: Set up Java 8 if: ${{ matrix.python-impl == 'jython' }} @@ -236,8 +238,10 @@ jobs: distribution: 'zulu' - name: Setup Jython environment if: ${{ matrix.python-impl == 'jython' }} + shell: bash run: | echo "JYTHON_ROOT=${HOME}/jython" >> "$GITHUB_ENV" + echo "PIP=pip" >> "$GITHUB_ENV" - name: Cache Jython id: cachejy if: ${{ matrix.python-impl == 'jython' }} @@ -249,19 +253,21 @@ jobs: ${{ env.JYTHON_ROOT }} - name: Install Jython if: ${{ matrix.python-impl == 'jython' && ! steps.cachejy.outputs.cache-hit }} + shell: bash run: | JYTHON_ROOT="${{ env.JYTHON_ROOT }}" curl -L "https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar" -o jython-installer.jar java -jar jython-installer.jar -s -d "${JYTHON_ROOT}" - echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH + echo "${JYTHON_ROOT}/bin" >> "$GITHUB_PATH" - name: Set up cached Jython if: ${{ steps.cachejy.outputs.cache-hit }} + shell: bash run: | JYTHON_ROOT="${{ env.JYTHON_ROOT }}" echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH #-------- pip --------- - name: Set up supported Python ${{ matrix.python-version }} pip - if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.6' || matrix.python-version == '2.7' }} + if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }} # This step may run in either Linux or Windows shell: bash run: | @@ -272,48 +278,66 @@ jobs: get_pip="${{ contains(needs.select.outputs.own-pip-versions, matrix.python-version) && format('{0}/', matrix.python-version) || '' }}"; \ curl -L -O "https://bootstrap.pypa.io/pip/${get_pip}get-pip.py"; \ python get-pip.py; } + - name: Set up Python 2.6 pip + if: ${{ matrix.python-version == '2.6' }} + shell: bash + run: | + python -m pip --version || { \ + curl -L -O "https://bootstrap.pypa.io/pip/2.6/get-pip.py"; \ + curl -L -O "https://files.pythonhosted.org/packages/ac/95/a05b56bb975efa78d3557efa36acaf9cf5d2fd0ee0062060493687432e03/pip-9.0.3-py2.py3-none-any.whl"; \ + python get-pip.py --no-setuptools --no-wheel pip-9.0.3-py2.py3-none-any.whl; } + # work-around to invoke pip module on 2.6: https://bugs.python.org/issue2751 + echo "PIP=python -m pip.__main__" >> "$GITHUB_ENV" - name: Set up other Python ${{ matrix.python-version }} pip if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }} shell: bash run: | - # https://files.pythonhosted.org/packages/8a/e9/8468cd68b582b06ef554be0b96b59f59779627131aad48f8a5bce4b13450/wheel-0.29.0-py2.py3-none-any.whl - # https://files.pythonhosted.org/packages/06/4b/86a670fd21f7849adb092e40883c48dcd0d66b8a878fc8d63b7f0ea04213/setuptools-29.0.1-py2.py3-none-any.whl python -m pip --version || { \ curl -L -O "https://bootstrap.pypa.io/pip/3.2/get-pip.py"; \ curl -L -O "https://files.pythonhosted.org/packages/b2/d0/cd115fe345dd6f07ec1c780020a7dfe74966fceeb171e0f20d1d4905b0b7/pip-7.1.2-py2.py3-none-any.whl"; \ - python -v get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; } - + python get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; } + #-------- unittest ---- + - name: Upgrade Unittest for Python 2.6 + if: ${{ matrix.python-version == '2.6' }} + shell: bash + run: | + # see pip for Jython + $PIP -qq show unittest2 || { \ + for u in "65/26/32b8464df2a97e6dd1b656ed26b2c194606c16fe163c695a992b36c11cdf/six-1.13.0-py2.py3-none-any.whl" \ + "f2/94/3af39d34be01a24a6e65433d19e107099374224905f1e0cc6bbe1fd22a2f/argparse-1.4.0-py2.py3-none-any.whl" \ + "c7/a3/c5da2a44c85bfbb6eebcfc1dde24933f8704441b98fdde6528f4831757a6/linecache2-1.0.0-py2.py3-none-any.whl" \ + "17/0a/6ac05a3723017a967193456a2efa0aa9ac4b51456891af1e2353bb9de21e/traceback2-1.4.0-py2.py3-none-any.whl" \ + "72/20/7f0f433060a962200b7272b8c12ba90ef5b903e218174301d0abfd523813/unittest2-1.1.0-py2.py3-none-any.whl"; do \ + curl -L -O "https://files.pythonhosted.org/packages/${u}"; \ + $PIP install ${u##*/}; \ + done; } + # make tests use unittest2 + for test in ./test/test_*.py; do + sed -r -i -e '/^import unittest$/s/test/test2 as unittest/' "$test" + done #-------- nose -------- - name: Install nose for Python ${{ matrix.python-version }} - if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.6' || matrix.python-version == '2.7' }} + if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }} shell: bash run: | echo "$PATH" echo "$PYTHONHOME" - python --version - python -m pip --version - python -m pip nose --version || python -m pip install nose - - name: Install nose for other Python ${{ matrix.python-version }} + $PIP -qq show nose || $PIP install nose + - name: Install nose for other Python 2 + if: ${{ matrix.python-impl == 'jython' || matrix.python-version == '2.6' }} + shell: bash + run: | + # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) + $PIP -qq show nose || { \ + curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \ + $PIP install nose-1.3.7-py2-none-any.whl; } + - name: Install nose for other Python 3 if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }} shell: bash run: | - python -m pip nose --version || { \ + $PIP -qq show nose || { \ curl -L -O "https://files.pythonhosted.org/packages/15/d8/dd071918c040f50fa1cf80da16423af51ff8ce4a0f2399b7bf8de45ac3d9/nose-1.3.7-py3-none-any.whl"; \ - python --version; \ - printf '%s\n' \ - 'import sys' \ - 'print(sys.path)' \ - | python -; \ - python -m pip --version; \ - python -m pip install nose-1.3.7-py3-none-any.whl; } - - name: Install nose (Jython) - if: ${{ matrix.python-impl == 'jython' }} - # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) - run: | - pip nose --version || { \ - curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \ - pip --version; \ - pip install nose-1.3.7-py2-none-any.whl; } + $PIP install nose-1.3.7-py3-none-any.whl; } - name: Set up nosetest test if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }} shell: bash @@ -321,7 +345,11 @@ jobs: # define a test to validate the Python version used by nosetests printf '%s\n' \ 'from __future__ import unicode_literals' \ - 'import sys, os, platform, unittest' \ + 'import sys, os, platform' \ + 'try:' \ + ' import unittest2 as unittest' \ + 'except ImportError:' \ + ' import unittest' \ 'class TestPython(unittest.TestCase):' \ ' def setUp(self):' \ ' self.ver = os.environ["PYTHON_VER"].split("-")' \ @@ -340,7 +368,6 @@ jobs: YTDL_TEST_SET: ${{ matrix.ytdl-test-set }} PYTHON_VER: ${{ matrix.python-version }} PYTHON_IMPL: ${{ matrix.python-impl }} - run: | ./devscripts/run_tests.${{ matrix.run-tests-ext }} diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 4bddca047..a8b6ff1b9 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -6,10 +6,6 @@ import os from os.path import dirname as dirn import sys -from youtube_dl.compat import compat_register_utf8 - -compat_register_utf8() - print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr) sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) @@ -23,6 +19,10 @@ try: except OSError: pass +from youtube_dl.compat import compat_register_utf8 + +compat_register_utf8() + from youtube_dl.extractor import _ALL_CLASSES from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor diff --git a/test/test_execution.py b/test/test_execution.py index 1dee53a0f..35e7a5651 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -11,13 +11,12 @@ import subprocess sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.compat import compat_register_utf8 - from youtube_dl.utils import encodeArgument -rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - compat_register_utf8() +rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + try: _DEV_NULL = subprocess.DEVNULL except AttributeError: @@ -33,21 +32,22 @@ class TestExecution(unittest.TestCase): subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_main_exec(self): - subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, os.path.normpath('youtube_dl/__main__.py'), '--version'], cwd=rootDir, stdout=_DEV_NULL) @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution') def test_cmdline_umlauts(self): + os.environ['PYTHONIOENCODING'] = 'utf-8' p = subprocess.Popen( - [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'], + [sys.executable, os.path.normpath('youtube_dl/__main__.py'), encodeArgument('ä'), '--version'], cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) _, stderr = p.communicate() self.assertFalse(stderr) def test_lazy_extractors(self): - lazy_extractors = 'youtube_dl/extractor/lazy_extractors.py' + lazy_extractors = os.path.normpath('youtube_dl/extractor/lazy_extractors.py') try: - subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', lazy_extractors], cwd=rootDir, stdout=_DEV_NULL) - subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, os.path.normpath('devscripts/make_lazy_extractors.py'), lazy_extractors], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, os.path.normpath('test/test_all_urls.py')], cwd=rootDir, stdout=_DEV_NULL) finally: for x in ['', 'c'] if sys.version_info[0] < 3 else ['']: try: From f24bc9272e9b74efc4c4af87c862f5f78921d424 Mon Sep 17 00:00:00 2001 From: dirkf Date: Tue, 4 Jul 2023 16:06:21 +0100 Subject: [PATCH 39/88] [Misc] Fixes for 2.6 compatibility --- test/test_jsinterp.py | 10 ++++++---- test/test_utils.py | 2 +- youtube_dl/YoutubeDL.py | 6 +++++- youtube_dl/compat.py | 12 ++++++++++++ youtube_dl/jsinterp.py | 13 ++++++++++++- youtube_dl/utils.py | 3 ++- 6 files changed, 38 insertions(+), 8 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index ecd6ab3c9..91b12f544 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -492,10 +492,12 @@ class TestJSInterpreter(unittest.TestCase): jsi = JSInterpreter(''' function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; } ''') - attrs = set(('findall', 'finditer', 'flags', 'groupindex', - 'groups', 'match', 'pattern', 'scanner', - 'search', 'split', 'sub', 'subn')) - self.assertTrue(set(dir(jsi.call_function('x'))) > attrs) + attrs = set(('findall', 'finditer', 'match', 'scanner', 'search', + 'split', 'sub', 'subn')) + if sys.version_info >= (2, 7): + # documented for 2.6 but may not be found + attrs.update(('flags', 'groupindex', 'groups', 'pattern')) + self.assertSetEqual(set(dir(jsi.call_function('x'))) & attrs, attrs) jsi = JSInterpreter(''' function x() { let a=/,,[/,913,/](,)}/i; return a; } diff --git a/test/test_utils.py b/test/test_utils.py index b85d397d0..5fab05f7c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1612,7 +1612,7 @@ Line 1 self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)), [_TEST_DATA['urls']], msg='function as query key should perform a filter based on (key, value)') - self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), {'str'}, + self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), ('str',), msg='exceptions in the query function should be caught') # Test alternative paths diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 98b878fc1..068029d3e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -25,7 +25,11 @@ import tokenize import traceback import random -from ssl import OPENSSL_VERSION +try: + from ssl import OPENSSL_VERSION +except ImportError: + # Must be Python 2.6, should be built against 1.0.2 + OPENSSL_VERSION = 'OpenSSL 1.0.2(?)' from string import ascii_letters from .compat import ( diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 0f4d3756f..2554fd1c3 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -1,10 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals +from __future__ import division import base64 import binascii import collections import ctypes +import datetime import email import getpass import io @@ -3150,6 +3152,15 @@ def compat_register_utf8(): lambda name: lookup('utf-8') if name == 'cp65001' else None) +# compat_datetime_timedelta_total_seconds +try: + compat_datetime_timedelta_total_seconds = datetime.timedelta.total_seconds +except AttributeError: + # Py 2.6 + def compat_datetime_timedelta_total_seconds(td): + return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 + + legacy = [ 'compat_HTMLParseError', 'compat_HTMLParser', @@ -3187,6 +3198,7 @@ __all__ = [ 'compat_chr', 'compat_collections_abc', 'compat_collections_chain_map', + 'compat_datetime_timedelta_total_seconds', 'compat_http_cookiejar', 'compat_http_cookiejar_Cookie', 'compat_http_cookies', diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 1ba9c3d67..882432b80 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -277,9 +277,20 @@ class JSInterpreter(object): def __getattr__(self, name): self.__instantiate() + # make Py 2.6 conform to its lying documentation + if name == 'flags': + self.flags = self.__flags + elif name == 'pattern': + self.pattern = self.__pattern_txt + elif name in ('groupindex', 'groups'): + # in case these get set after a match? + if hasattr(self.__self, name): + setattr(self, name, getattr(self.__self, name)) + else: + return 0 if name == 'groupindex' else {} if hasattr(self, name): return getattr(self, name) - return super(JSInterpreter.JS_RegExp, self).__getattr__(name) + raise AttributeError('{0} has no attribute named {1}'.format(self, name)) @classmethod def regex_flags(cls, expr): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 584581b6a..83f67bd95 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -47,6 +47,7 @@ from .compat import ( compat_collections_abc, compat_cookiejar, compat_ctypes_WINFUNCTYPE, + compat_datetime_timedelta_total_seconds, compat_etree_fromstring, compat_expanduser, compat_html_entities, @@ -3102,7 +3103,7 @@ def unified_timestamp(date_str, day_first=True): pass timetuple = email.utils.parsedate_tz(date_str) if timetuple: - return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds() + return calendar.timegm(timetuple) + pm_delta * 3600 - compat_datetime_timedelta_total_seconds(timezone) def determine_ext(url, default_ext='unknown_video'): From b6dff4073d469cceadb099c00ccbf3bd6fc515a6 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 7 Jul 2023 18:41:32 +0100 Subject: [PATCH 40/88] [core] Revert version display from b8a86dc --- youtube_dl/YoutubeDL.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 068029d3e..4e7fd1063 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -2378,10 +2378,12 @@ class YoutubeDL(object): self.get_encoding())) write_string(encoding_str, encoding=None) - self._write_string('[debug] youtube-dl version ' + __version__ + (' (single file build)\n' if ytdl_is_updateable() else '\n')) + writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), )) + writeln_debug('youtube-dl version ', __version__) if _LAZY_LOADER: - self._write_string('[debug] Lazy loading extractors enabled\n') - writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), )) # moved down for easier merge + writeln_debug('Lazy loading extractors enabled') + if ytdl_is_updateable(): + writeln_debug('Single file build') try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'], From f47fdb9564d3ca1c0fa70ed6031148ec908fdc7b Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 6 Jul 2023 15:46:22 +0100 Subject: [PATCH 41/88] [utils] Add {expected_type} and Iterable support to traverse_obj() --- test/test_utils.py | 153 ++++++++++++++++++++++++++------ youtube_dl/utils.py | 211 +++++++++++++++++++++++++++++--------------- 2 files changed, 265 insertions(+), 99 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 5fab05f7c..1fc16ed05 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -79,10 +79,12 @@ from youtube_dl.utils import ( rot47, shell_quote, smuggle_url, + str_or_none, str_to_int, strip_jsonp, strip_or_none, subtitles_filename, + T, timeconvert, traverse_obj, try_call, @@ -1566,6 +1568,7 @@ Line 1 self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam') def test_traverse_obj(self): + str = compat_str _TEST_DATA = { 100: 100, 1.2: 1.2, @@ -1598,8 +1601,8 @@ Line 1 # Test Ellipsis behavior self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis), - (item for item in _TEST_DATA.values() if item is not None), - msg='`...` should give all values except `None`') + (item for item in _TEST_DATA.values() if item not in (None, {})), + msg='`...` should give all non discarded values') self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(), msg='`...` selection for dicts should select all values') self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')), @@ -1607,13 +1610,51 @@ Line 1 msg='nested `...` queries should work') self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), range(4), msg='`...` query result should be flattened') + self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)), + msg='`...` should accept iterables') # Test function as key self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)), [_TEST_DATA['urls']], msg='function as query key should perform a filter based on (key, value)') - self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), ('str',), - msg='exceptions in the query function should be caught') + self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'}, + msg='exceptions in the query function should be catched') + self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2], + msg='function key should accept iterables') + if __debug__: + with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'): + traverse_obj(_TEST_DATA, lambda a: Ellipsis) + with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'): + traverse_obj(_TEST_DATA, lambda a, b, c: Ellipsis) + + # Test set as key (transformation/type, like `expected_type`) + self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper), )), ['STR'], + msg='Function in set should be a transformation') + self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str))), ['str'], + msg='Type in set should be a type filter') + self.assertEqual(traverse_obj(_TEST_DATA, T(dict)), _TEST_DATA, + msg='A single set should be wrapped into a path') + self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper))), ['STR'], + msg='Transformation function should not raise') + self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str_or_none))), + [item for item in map(str_or_none, _TEST_DATA.values()) if item is not None], + msg='Function in set should be a transformation') + if __debug__: + with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'): + traverse_obj(_TEST_DATA, set()) + with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'): + traverse_obj(_TEST_DATA, {str.upper, str}) + + # Test `slice` as a key + _SLICE_DATA = [0, 1, 2, 3, 4] + self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None, + msg='slice on a dictionary should not throw') + self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1], + msg='slice key should apply slice to sequence') + self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2], + msg='slice key should apply slice to sequence') + self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2], + msg='slice key should apply slice to sequence') # Test alternative paths self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str', @@ -1659,15 +1700,23 @@ Line 1 {0: ['https://www.example.com/1', 'https://www.example.com/0']}, msg='triple nesting in dict path should be treated as branches') self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {}, - msg='remove `None` values when dict key') + msg='remove `None` values when top level dict key fails') self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis}, - msg='do not remove `None` values if `default`') - self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {0: {}}, - msg='do not remove empty values when dict key') - self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: {}}, - msg='do not remove empty values when dict key and a default') - self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {0: []}, - msg='if branch in dict key not successful, return `[]`') + msg='use `default` if key fails and `default`') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {}, + msg='remove empty values when dict key') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis}, + msg='use `default` when dict key and `default`') + self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {}, + msg='remove empty values when nested dict key fails') + self.assertEqual(traverse_obj(None, {0: 'fail'}), {}, + msg='default to dict if pruned') + self.assertEqual(traverse_obj(None, {0: 'fail'}, default=Ellipsis), {0: Ellipsis}, + msg='default to dict if pruned and default is given') + self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=Ellipsis), {0: {0: Ellipsis}}, + msg='use nested `default` when nested dict key fails and `default`') + self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {}, + msg='remove key if branch in dict key not successful') # Testing default parameter behavior _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []} @@ -1691,20 +1740,55 @@ Line 1 msg='if branched but not successful return `[]`, not `default`') self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [], msg='if branched but object is empty return `[]`, not `default`') + self.assertEqual(traverse_obj(None, Ellipsis), [], + msg='if branched but object is `None` return `[]`, not `default`') + self.assertEqual(traverse_obj({0: None}, (0, Ellipsis)), [], + msg='if branched but state is `None` return `[]`, not `default`') + + branching_paths = [ + ('fail', Ellipsis), + (Ellipsis, 'fail'), + 100 * ('fail',) + (Ellipsis,), + (Ellipsis,) + 100 * ('fail',), + ] + for branching_path in branching_paths: + self.assertEqual(traverse_obj({}, branching_path), [], + msg='if branched but state is `None`, return `[]` (not `default`)') + self.assertEqual(traverse_obj({}, 'fail', branching_path), [], + msg='if branching in last alternative and previous did not match, return `[]` (not `default`)') + self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x', + msg='if branching in last alternative and previous did match, return single value') + self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x', + msg='if branching in first alternative and non-branching path does match, return single value') + self.assertEqual(traverse_obj({}, branching_path, 'fail'), None, + msg='if branching in first alternative and non-branching path does not match, return `default`') # Testing expected_type behavior _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0} - self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=compat_str), 'str', - msg='accept matching `expected_type` type') - self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), None, - msg='reject non matching `expected_type` type') - self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: compat_str(x)), '0', - msg='transform type using type function') - self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', - expected_type=lambda _: 1 / 0), None, - msg='wrap expected_type function in try_call') - self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=compat_str), ['str'], - msg='eliminate items that expected_type fails on') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str), + 'str', msg='accept matching `expected_type` type') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), + None, msg='reject non matching `expected_type` type') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)), + '0', msg='transform type using type function') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0), + None, msg='wrap expected_type function in try_call') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=str), + ['str'], msg='eliminate items that expected_type fails on') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int), + {0: 100}, msg='type as expected_type should filter dict values') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none), + {0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values') + self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int), + 1, msg='expected_type should not filter non final dict values') + self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int), + {0: {0: 100}}, msg='expected_type should transform deep dict values') + self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)), + [{0: Ellipsis}, {0: Ellipsis}], msg='expected_type should transform branched dict values') + self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int), + [4], msg='expected_type regression for type matching in tuple branching') + self.assertEqual(traverse_obj(_TEST_DATA, ['data', Ellipsis], expected_type=int), + [], msg='expected_type regression for type matching in dict result') # Test get_all behavior _GET_ALL_DATA = {'key': [0, 1, 2]} @@ -1749,14 +1833,23 @@ Line 1 _traverse_string=True), '.', msg='traverse into converted data if `traverse_string`') self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis), - _traverse_string=True), list('str'), - msg='`...` branching into string should result in list') + _traverse_string=True), 'str', + msg='`...` should result in string (same value) if `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)), + _traverse_string=True), 'sr', + msg='`slice` should result in string if `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"), + _traverse_string=True), 'str', + msg='function should result in string if `traverse_string`') self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)), _traverse_string=True), ['s', 'r'], - msg='branching into string should result in list') - self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda _, x: x), - _traverse_string=True), list('str'), - msg='function branching into string should result in list') + msg='branching should result in list if `traverse_string`') + self.assertEqual(traverse_obj({}, (0, Ellipsis), _traverse_string=True), [], + msg='branching should result in list if `traverse_string`') + self.assertEqual(traverse_obj({}, (0, lambda x, y: True), _traverse_string=True), [], + msg='branching should result in list if `traverse_string`') + self.assertEqual(traverse_obj({}, (0, slice(1)), _traverse_string=True), [], + msg='branching should result in list if `traverse_string`') # Test is_user_input behavior _IS_USER_INPUT_DATA = {'range8': list(range(8))} @@ -1793,6 +1886,8 @@ Line 1 msg='failing str key on a `re.Match` should return `default`') self.assertEqual(traverse_obj(mobj, 8), None, msg='failing int key on a `re.Match` should return `default`') + self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'], + msg='function on a `re.Match` should give group name as well') def test_get_first(self): self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 83f67bd95..dbdbe5f59 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -16,6 +16,7 @@ import email.header import errno import functools import gzip +import inspect import io import itertools import json @@ -3881,7 +3882,7 @@ def detect_exe_version(output, version_re=None, unrecognized='present'): return unrecognized -class LazyList(compat_collections_abc.Sequence): +class LazyList(compat_collections_abc.Iterable): """Lazy immutable list from an iterable Note that slices of a LazyList are lists and not LazyList""" @@ -4223,10 +4224,16 @@ def multipart_encode(data, boundary=None): return out, content_type -def variadic(x, allowed_types=(compat_str, bytes, dict)): - if not isinstance(allowed_types, tuple) and isinstance(allowed_types, compat_collections_abc.Iterable): +def is_iterable_like(x, allowed_types=compat_collections_abc.Iterable, blocked_types=NO_DEFAULT): + if blocked_types is NO_DEFAULT: + blocked_types = (compat_str, bytes, compat_collections_abc.Mapping) + return isinstance(x, allowed_types) and not isinstance(x, blocked_types) + + +def variadic(x, allowed_types=NO_DEFAULT): + if isinstance(allowed_types, compat_collections_abc.Iterable): allowed_types = tuple(allowed_types) - return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,) + return x if is_iterable_like(x, blocked_types=allowed_types) else (x,) def dict_get(d, key_or_keys, default=None, skip_false_values=True): @@ -5993,7 +6000,7 @@ def clean_podcast_url(url): def traverse_obj(obj, *paths, **kwargs): """ - Safely traverse nested `dict`s and `Sequence`s + Safely traverse nested `dict`s and `Iterable`s >>> obj = [{}, {"key": "value"}] >>> traverse_obj(obj, (1, "key")) @@ -6001,14 +6008,17 @@ def traverse_obj(obj, *paths, **kwargs): Each of the provided `paths` is tested and the first producing a valid result will be returned. The next path will also be tested if the path branched but no results could be found. - Supported values for traversal are `Mapping`, `Sequence` and `re.Match`. - A value of None is treated as the absence of a value. + Supported values for traversal are `Mapping`, `Iterable` and `re.Match`. + Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded. The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`. The keys in the path can be one of: - `None`: Return the current object. - - `str`/`int`: Return `obj[key]`. For `re.Match, return `obj.group(key)`. + - `set`: Requires the only item in the set to be a type or function, + like `{type}`/`{func}`. If a `type`, returns only values + of this type. If a function, returns `func(obj)`. + - `str`/`int`: Return `obj[key]`. For `re.Match`, return `obj.group(key)`. - `slice`: Branch out and return all values in `obj[key]`. - `Ellipsis`: Branch out and return a list of all values. - `tuple`/`list`: Branch out and return a list of all matching values. @@ -6016,6 +6026,9 @@ def traverse_obj(obj, *paths, **kwargs): - `function`: Branch out and return values filtered by the function. Read as: `[value for key, value in obj if function(key, value)]`. For `Sequence`s, `key` is the index of the value. + For `Iterable`s, `key` is the enumeration count of the value. + For `re.Match`es, `key` is the group number (0 = full match) + as well as additionally any group names, if given. - `dict` Transform the current object and return a matching dict. Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`. @@ -6024,8 +6037,12 @@ def traverse_obj(obj, *paths, **kwargs): @params paths Paths which to traverse by. Keyword arguments: @param default Value to return if the paths do not match. + If the last key in the path is a `dict`, it will apply to each value inside + the dict instead, depth first. Try to avoid if using nested `dict` keys. @param expected_type If a `type`, only accept final values of this type. If any other callable, try to call the function on each result. + If the last key in the path is a `dict`, it will apply to each value inside + the dict instead, recursively. This does respect branching paths. @param get_all If `False`, return the first matching result, otherwise all matching ones. @param casesense If `False`, consider string dictionary keys as case insensitive. @@ -6036,12 +6053,15 @@ def traverse_obj(obj, *paths, **kwargs): @param _traverse_string Whether to traverse into objects as strings. If `True`, any non-compatible object will first be converted into a string and then traversed into. + The return value of that path will be a string instead, + not respecting any further branching. @returns The result of the object traversal. If successful, `get_all=True`, and the path branches at least once, then a list of results is returned instead. A list is always returned if the last path branches and no `default` is given. + If a path ends on a `dict` that result will always be a `dict`. """ # parameter defaults @@ -6055,7 +6075,6 @@ def traverse_obj(obj, *paths, **kwargs): # instant compat str = compat_str - is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes)) casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k if isinstance(expected_type, type): @@ -6063,128 +6082,180 @@ def traverse_obj(obj, *paths, **kwargs): else: type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,)) + def lookup_or_none(v, k, getter=None): + try: + return getter(v, k) if getter else v[k] + except IndexError: + return None + def from_iterable(iterables): # chain.from_iterable(['ABC', 'DEF']) --> A B C D E F for it in iterables: for item in it: yield item - def apply_key(key, obj): - if obj is None: - return + def apply_key(key, obj, is_last): + branching = False + + if obj is None and _traverse_string: + if key is Ellipsis or callable(key) or isinstance(key, slice): + branching = True + result = () + else: + result = None elif key is None: - yield obj + result = obj + + elif isinstance(key, set): + assert len(key) == 1, 'Set should only be used to wrap a single item' + item = next(iter(key)) + if isinstance(item, type): + result = obj if isinstance(obj, item) else None + else: + result = try_call(item, args=(obj,)) elif isinstance(key, (list, tuple)): - for branch in key: - _, result = apply_path(obj, branch) - for item in result: - yield item + branching = True + result = from_iterable( + apply_path(obj, branch, is_last)[0] for branch in key) elif key is Ellipsis: - result = [] + branching = True if isinstance(obj, compat_collections_abc.Mapping): result = obj.values() - elif is_sequence(obj): + elif is_iterable_like(obj): result = obj elif isinstance(obj, compat_re_Match): result = obj.groups() elif _traverse_string: + branching = False result = str(obj) - for item in result: - yield item + else: + result = () elif callable(key): - if is_sequence(obj): - iter_obj = enumerate(obj) - elif isinstance(obj, compat_collections_abc.Mapping): + branching = True + if isinstance(obj, compat_collections_abc.Mapping): iter_obj = obj.items() + elif is_iterable_like(obj): + iter_obj = enumerate(obj) elif isinstance(obj, compat_re_Match): - iter_obj = enumerate(itertools.chain([obj.group()], obj.groups())) + iter_obj = itertools.chain( + enumerate(itertools.chain((obj.group(),), obj.groups())), + obj.groupdict().items()) elif _traverse_string: + branching = False iter_obj = enumerate(str(obj)) else: - return - for item in (v for k, v in iter_obj if try_call(key, args=(k, v))): - yield item + iter_obj = () + + result = (v for k, v in iter_obj if try_call(key, args=(k, v))) + if not branching: # string traversal + result = ''.join(result) elif isinstance(key, dict): - iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items()) - yield dict((k, v if v is not None else default) for k, v in iter_obj - if v is not None or default is not NO_DEFAULT) + iter_obj = ((k, _traverse_obj(obj, v, False, is_last)) for k, v in key.items()) + result = dict((k, v if v is not None else default) for k, v in iter_obj + if v is not None or default is not NO_DEFAULT) or None elif isinstance(obj, compat_collections_abc.Mapping): - yield (obj.get(key) if casesense or (key in obj) - else next((v for k, v in obj.items() if casefold(k) == key), None)) + result = (try_call(obj.get, args=(key,)) + if casesense or try_call(obj.__contains__, args=(key,)) + else next((v for k, v in obj.items() if casefold(k) == key), None)) elif isinstance(obj, compat_re_Match): + result = None if isinstance(key, int) or casesense: - try: - yield obj.group(key) - return - except IndexError: - pass - if not isinstance(key, str): - return + result = lookup_or_none(obj, key, getter=compat_re_Match.group) - yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None) + elif isinstance(key, str): + result = next((v for k, v in obj.groupdict().items() + if casefold(k) == key), None) else: - if _is_user_input: - key = (int_or_none(key) if ':' not in key - else slice(*map(int_or_none, key.split(':')))) + result = None + if isinstance(key, (int, slice)): + if is_iterable_like(obj, compat_collections_abc.Sequence): + branching = isinstance(key, slice) + result = lookup_or_none(obj, key) + elif _traverse_string: + result = lookup_or_none(str(obj), key) - if not isinstance(key, (int, slice)): - return + return branching, result if branching else (result,) - if not is_sequence(obj): - if not _traverse_string: - return - obj = str(obj) + def lazy_last(iterable): + iterator = iter(iterable) + prev = next(iterator, NO_DEFAULT) + if prev is NO_DEFAULT: + return - try: - yield obj[key] - except IndexError: - pass + for item in iterator: + yield False, prev + prev = item - def apply_path(start_obj, path): + yield True, prev + + def apply_path(start_obj, path, test_type): objs = (start_obj,) has_branched = False - for key in variadic(path): - if _is_user_input and key == ':': - key = Ellipsis + key = None + for last, key in lazy_last(variadic(path, (str, bytes, dict, set))): + if _is_user_input and isinstance(key, str): + if key == ':': + key = Ellipsis + elif ':' in key: + key = slice(*map(int_or_none, key.split(':'))) + elif int_or_none(key) is not None: + key = int(key) if not casesense and isinstance(key, str): key = compat_casefold(key) - if key is Ellipsis or isinstance(key, (list, tuple)) or callable(key): - has_branched = True + if __debug__ and callable(key): + # Verify function signature + inspect.getcallargs(key, None, None) - key_func = functools.partial(apply_key, key) - objs = from_iterable(map(key_func, objs)) + new_objs = [] + for obj in objs: + branching, results = apply_key(key, obj, last) + has_branched |= branching + new_objs.append(results) - return has_branched, objs + objs = from_iterable(new_objs) - def _traverse_obj(obj, path, use_list=True): - has_branched, results = apply_path(obj, path) - results = LazyList(x for x in map(type_test, results) if x is not None) + if test_type and not isinstance(key, (dict, list, tuple)): + objs = map(type_test, objs) + + return objs, has_branched, isinstance(key, dict) + + def _traverse_obj(obj, path, allow_empty, test_type): + results, has_branched, is_dict = apply_path(obj, path, test_type) + results = LazyList(x for x in results if x not in (None, {})) if get_all and has_branched: - return results.exhaust() if results or use_list else None + if results: + return results.exhaust() + if allow_empty: + return [] if default is NO_DEFAULT else default + return None - return results[0] if results else None + return results[0] if results else {} if allow_empty and is_dict else None for index, path in enumerate(paths, 1): - use_list = default is NO_DEFAULT and index == len(paths) - result = _traverse_obj(obj, path, use_list) + result = _traverse_obj(obj, path, index == len(paths), True) if result is not None: return result return None if default is NO_DEFAULT else default +def T(x): + """ For use in yt-dl instead of {type} or set((type,)) """ + return set((x,)) + + def get_first(obj, keys, **kwargs): return traverse_obj(obj, (Ellipsis,) + tuple(variadic(keys)), get_all=False, **kwargs) From d5ef405c5d533c85cebd205a5b7958614c7013f3 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 7 Jul 2023 18:45:31 +0100 Subject: [PATCH 42/88] [core] Align error reporting methods with yt-dlp --- test/helper.py | 3 ++- test/test_YoutubeDL.py | 10 ++-------- youtube_dl/YoutubeDL.py | 39 ++++++++++++++++++++++++++++++++------- 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/test/helper.py b/test/helper.py index 883b2e877..e3314b03e 100644 --- a/test/helper.py +++ b/test/helper.py @@ -72,7 +72,8 @@ class FakeYDL(YoutubeDL): def to_screen(self, s, skip_eol=None): print(s) - def trouble(self, s, tb=None): + def trouble(self, *args, **kwargs): + s = args[0] if len(args) > 0 else kwargs.get('message', 'Missing message') raise Exception(s) def download(self, x): diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index f8c8e619c..60780b8a7 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -930,17 +930,11 @@ class TestYoutubeDL(unittest.TestCase): # Test case for https://github.com/ytdl-org/youtube-dl/issues/27064 def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self): - class _YDL(YDL): - def __init__(self, *args, **kwargs): - super(_YDL, self).__init__(*args, **kwargs) - - def trouble(self, s, tb=None): - pass - - ydl = _YDL({ + ydl = YDL({ 'format': 'extra', 'ignoreerrors': True, }) + ydl.trouble = lambda *_, **__: None class VideoIE(InfoExtractor): _VALID_URL = r'video:(?P\d+)' diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4e7fd1063..1435754c2 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -582,7 +582,7 @@ class YoutubeDL(object): if self.params.get('cookiefile') is not None: self.cookiejar.save(ignore_discard=True, ignore_expires=True) - def trouble(self, message=None, tb=None): + def trouble(self, *args, **kwargs): """Determine action to take when a download problem appears. Depending on if the downloader has been configured to ignore @@ -591,6 +591,11 @@ class YoutubeDL(object): tb, if given, is additional traceback information. """ + # message=None, tb=None, is_error=True + message = args[0] if len(args) > 0 else kwargs.get('message', None) + tb = args[1] if len(args) > 1 else kwargs.get('tb', None) + is_error = args[2] if len(args) > 2 else kwargs.get('is_error', True) + if message is not None: self.to_stderr(message) if self.params.get('verbose'): @@ -603,7 +608,10 @@ class YoutubeDL(object): else: tb_data = traceback.format_list(traceback.extract_stack()) tb = ''.join(tb_data) - self.to_stderr(tb) + if tb: + self.to_stderr(tb) + if not is_error: + return if not self.params.get('ignoreerrors', False): if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: exc_info = sys.exc_info()[1].exc_info @@ -612,11 +620,18 @@ class YoutubeDL(object): raise DownloadError(message, exc_info) self._download_retcode = 1 - def report_warning(self, message): + def report_warning(self, message, only_once=False, _cache={}): ''' Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored ''' + if only_once: + m_hash = hash((self, message)) + m_cnt = _cache.setdefault(m_hash, 0) + _cache[m_hash] = m_cnt + 1 + if m_cnt > 0: + return + if self.params.get('logger') is not None: self.params['logger'].warning(message) else: @@ -629,7 +644,7 @@ class YoutubeDL(object): warning_message = '%s %s' % (_msg_header, message) self.to_stderr(warning_message) - def report_error(self, message, tb=None): + def report_error(self, message, *args, **kwargs): ''' Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. @@ -638,8 +653,18 @@ class YoutubeDL(object): _msg_header = '\033[0;31mERROR:\033[0m' else: _msg_header = 'ERROR:' - error_message = '%s %s' % (_msg_header, message) - self.trouble(error_message, tb) + kwargs['message'] = '%s %s' % (_msg_header, message) + self.trouble(*args, **kwargs) + + def report_unscoped_cookies(self, *args, **kwargs): + # message=None, tb=False, is_error=False + if len(args) <= 2: + kwargs.setdefault('is_error', False) + if len(args) <= 0: + kwargs.setdefault( + 'message', + 'Unscoped cookies are not allowed: please specify some sort of scoping') + self.report_error(*args, **kwargs) def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" @@ -835,7 +860,7 @@ class YoutubeDL(object): msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' self.report_error(msg) except ExtractorError as e: # An error we somewhat expected - self.report_error(compat_str(e), e.format_traceback()) + self.report_error(compat_str(e), tb=e.format_traceback()) except MaxDownloadsReached: raise except Exception as e: From 1720c04dc56fa0d2caa0a455b1acbd569347482e Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 17 Jul 2023 20:47:58 +0100 Subject: [PATCH 43/88] [test] Make skipped tests in test_execution work with Py 2.6 --- test/test_execution.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/test/test_execution.py b/test/test_execution.py index 35e7a5651..ae59e562a 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -24,21 +24,24 @@ except AttributeError: class TestExecution(unittest.TestCase): + def setUp(self): + self.module = 'youtube_dl' + if sys.version_info < (2, 7): + self.module += '.__main__' + def test_import(self): subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir) - @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution') def test_module_exec(self): - subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, '-m', self.module, '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_main_exec(self): subprocess.check_call([sys.executable, os.path.normpath('youtube_dl/__main__.py'), '--version'], cwd=rootDir, stdout=_DEV_NULL) - @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution') def test_cmdline_umlauts(self): os.environ['PYTHONIOENCODING'] = 'utf-8' p = subprocess.Popen( - [sys.executable, os.path.normpath('youtube_dl/__main__.py'), encodeArgument('ä'), '--version'], + [sys.executable, '-m', self.module, encodeArgument('ä'), '--version'], cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) _, stderr = p.communicate() self.assertFalse(stderr) From 648dc5304cb2476592ff142988b8c62675011fcc Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 7 Jul 2023 18:51:38 +0100 Subject: [PATCH 44/88] [compat] Add Request and HTTPClient compat for redirect * support `method` parameter of `Request.__init__` (Py 2 and old Py 3) * support `getcode` method of compat_http_client.HTTPResponse (Py 2) --- youtube_dl/compat.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 2554fd1c3..cd11ba5aa 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -21,6 +21,7 @@ import socket import struct import subprocess import sys +import types import xml.etree.ElementTree # naming convention @@ -55,6 +56,22 @@ try: except ImportError: # Python 2 import urllib2 as compat_urllib_request +# Also fix up lack of method arg in old Pythons +try: + _req = compat_urllib_request.Request + _req('http://127.0.0.1', method='GET') +except TypeError: + class _request(object): + def __new__(cls, url, *args, **kwargs): + method = kwargs.pop('method', None) + r = _req(url, *args, **kwargs) + if method: + r.get_method = types.MethodType(lambda _: method, r) + return r + + compat_urllib_request.Request = _request + + try: import urllib.error as compat_urllib_error except ImportError: # Python 2 @@ -79,6 +96,12 @@ try: except ImportError: # Python 2 import urllib as compat_urllib_response +try: + compat_urllib_response.addinfourl.status +except AttributeError: + # .getcode() is deprecated in Py 3. + compat_urllib_response.addinfourl.status = property(lambda self: self.getcode()) + try: import http.cookiejar as compat_cookiejar except ImportError: # Python 2 @@ -2360,6 +2383,11 @@ try: import http.client as compat_http_client except ImportError: # Python 2 import httplib as compat_http_client +try: + compat_http_client.HTTPResponse.getcode +except AttributeError: + # Py < 3.1 + compat_http_client.HTTPResponse.getcode = lambda self: self.status try: from urllib.error import HTTPError as compat_HTTPError From 46fde7caeeab13a6277aab22a0e8a29e10c30cc3 Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 7 Jun 2023 14:51:50 +0100 Subject: [PATCH 45/88] [core] Update redirect handling from yt-dlp * Thx coletdjnz: https://github.com/yt-dlp/yt-dlp/pull/7094 * add test that redirected `POST` loses its `Content-Type` --- test/test_http.py | 489 +++++++++++++++++++++++++++++++++++++++----- youtube_dl/utils.py | 74 ++++--- 2 files changed, 484 insertions(+), 79 deletions(-) diff --git a/test/test_http.py b/test/test_http.py index 487a9bc77..1a65df9e0 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -8,33 +8,160 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import gzip +import io +import ssl +import tempfile +import threading +import zlib + +# avoid deprecated alias assertRaisesRegexp +if hasattr(unittest.TestCase, 'assertRaisesRegex'): + unittest.TestCase.assertRaisesRegexp = unittest.TestCase.assertRaisesRegex + +try: + import brotli +except ImportError: + brotli = None +try: + from urllib.request import pathname2url +except ImportError: + from urllib import pathname2url + +from youtube_dl.compat import ( + compat_http_cookiejar_Cookie, + compat_http_server, + compat_str as str, + compat_urllib_error, + compat_urllib_HTTPError, + compat_urllib_parse, + compat_urllib_request, +) + +from youtube_dl.utils import ( + sanitized_Request, + urlencode_postdata, +) + from test.helper import ( + FakeYDL, FakeLogger, http_server_port, ) from youtube_dl import YoutubeDL -from youtube_dl.compat import compat_http_server, compat_urllib_request -import ssl -import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + protocol_version = 'HTTP/1.1' + + # work-around old/new -style class inheritance + def super(self, meth_name, *args, **kwargs): + from types import MethodType + try: + super() + fn = lambda s, m, *a, **k: getattr(super(), m)(*a, **k) + except TypeError: + fn = lambda s, m, *a, **k: getattr(compat_http_server.BaseHTTPRequestHandler, m)(s, *a, **k) + self.super = MethodType(fn, self) + return self.super(meth_name, *args, **kwargs) + def log_message(self, format, *args): pass + def _headers(self): + payload = str(self.headers).encode('utf-8') + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def _redirect(self): + self.send_response(int(self.path[len('/redirect_'):])) + self.send_header('Location', '/method') + self.send_header('Content-Length', '0') + self.end_headers() + + def _method(self, method, payload=None): + self.send_response(200) + self.send_header('Content-Length', str(len(payload or ''))) + self.send_header('Method', method) + self.end_headers() + if payload: + self.wfile.write(payload) + + def _status(self, status): + payload = '{0} NOT FOUND'.format(status).encode('utf-8') + self.send_response(int(status)) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def _read_data(self): + if 'Content-Length' in self.headers: + return self.rfile.read(int(self.headers['Content-Length'])) + + def _test_url(self, path, host='127.0.0.1', scheme='http', port=None): + return '{0}://{1}:{2}/{3}'.format( + scheme, host, + port if port is not None + else http_server_port(self.server), path) + + def do_POST(self): + data = self._read_data() + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('POST', data) + elif self.path.startswith('/headers'): + self._headers() + else: + self._status(404) + + def do_HEAD(self): + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('HEAD') + else: + self._status(404) + + def do_PUT(self): + data = self._read_data() + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('PUT', data) + else: + self._status(404) + def do_GET(self): + + def respond(payload=b'