From 58fc5bde47215d9e7c60647dd21202a254b3b066 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 23 Jun 2023 00:15:06 +0100 Subject: [PATCH 01/47] [workflows/ci.yml] Restore test support for Py 3.3, 3.4, and add 2.6 --- .github/workflows/ci.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9be4eaa89..4008cc190 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,9 +8,7 @@ jobs: fail-fast: true matrix: os: [ubuntu-20.04] - # TODO: python 2.6 - # TODO: restore support for 3.3, 3.4 - python-version: [2.7, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7] + python-version: [2.6, 2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7] python-impl: [cpython] ytdl-test-set: [core, download] run-tests-ext: [sh] From 2500300c2a5986ace34390aa473a8bd51f83622c Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 29 Jun 2023 15:27:12 +0100 Subject: [PATCH 02/47] [workflows/ci.yml] Restore test support for Py 3.2 --- .github/workflows/ci.yml | 319 +++++++++++++++++++++++++++-- devscripts/make_lazy_extractors.py | 4 + test/test_execution.py | 8 +- test/test_unicode_literals.py | 1 + youtube_dl/__init__.py | 8 +- youtube_dl/compat.py | 18 +- 6 files changed, 328 insertions(+), 30 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4008cc190..8d8e654fb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,73 +1,349 @@ name: CI -on: [push, pull_request] + +env: + # add 3.10+ after patching nose (https://github.com/nose-devs/nose/issues/1099) + # or switching to fork of https://github.com/mdmintz/pynose + all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9 + main-cpython-versions: 2.7, 3.2, 3.5, 3.9 + pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7 + cpython-versions: all + # test-set: both + test-set: core + +on: + push: + pull_request: + workflow_dispatch: + inputs: + cpython-versions: + type: choice + description: CPython versions (main = 2.7, 3.2, 3.5, 3.9) + options: + - all + - main + required: true + default: main + test-set: + type: choice + description: core, download + options: + - both + - core + - download + required: true + default: core + +permissions: + contents: read + jobs: + select: + name: Select tests from inputs + runs-on: ubuntu-latest + outputs: + cpython-versions: ${{ steps.run.outputs.cpython-versions }} + test-set: ${{ steps.run.outputs.test-set }} + own-pip-versions: ${{ steps.run.outputs.own-pip-versions }} + steps: + - id: run + run: | + # Make a JSON Array from comma/space-separated string (no extra escaping) + json_list() { \ + ret=""; IFS="${IFS},"; set -- $*; \ + for a in "$@"; do \ + ret=$(printf '%s"%s"' "${ret}${ret:+, }" "$a"); \ + done; \ + printf '[%s]' "$ret"; } + tests="${{ inputs.test-set || env.test-set }}" + [ $tests = both ] && tests="core download" + printf 'test-set=%s\n' "$(json_list $tests)" >> "$GITHUB_OUTPUT" + versions="${{ inputs.cpython-versions || env.cpython-versions }}" + if [ "$versions" = all ]; then \ + versions="${{ env.all-cpython-versions }}"; else \ + versions="${{ env.main-cpython-versions }}"; \ + fi + printf 'cpython-versions=%s\n' \ + "$(json_list ${versions}${versions:+, }${{ env.pypy-versions }})" >> "$GITHUB_OUTPUT" + # versions with a special get-pip.py in a per-version subdirectory + printf 'own-pip-versions=%s\n' \ + "$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT" + tests: - name: Tests + name: Run tests + needs: select + permissions: + contents: read + packages: write runs-on: ${{ matrix.os }} strategy: fail-fast: true matrix: os: [ubuntu-20.04] - python-version: [2.6, 2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7] + # outside steps, use github.env...., not env.... + python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }} python-impl: [cpython] - ytdl-test-set: [core, download] + ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }} run-tests-ext: [sh] include: - # python 3.2 is only available on windows via setup-python - os: windows-2019 python-version: 3.2 python-impl: cpython - ytdl-test-set: core + ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }} run-tests-ext: bat - os: windows-2019 python-version: 3.2 python-impl: cpython - ytdl-test-set: download + ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }} run-tests-ext: bat # jython - os: ubuntu-20.04 python-impl: jython - ytdl-test-set: core + ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }} run-tests-ext: sh - os: ubuntu-20.04 python-impl: jython - ytdl-test-set: download + ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }} run-tests-ext: sh steps: - - uses: actions/checkout@v3 + - name: Checkout + uses: actions/checkout@v3 + #-------- Python 3 ----- - name: Set up supported Python ${{ matrix.python-version }} + id: setup-python + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7'}} # wrap broken actions/setup-python@v4 uses: ytdl-org/setup-python@v1 with: python-version: ${{ matrix.python-version }} cache-build: true allow-build: info + - name: Locate supported Python ${{ matrix.python-version }} + if: ${{ env.pythonLocation }} + shell: bash + run: | + echo "PYTHONHOME=${pythonLocation}" >> "$GITHUB_ENV" + export expected="${{ steps.setup-python.outputs.python-path }}" + dirname() { printf '%s\n' \ + 'import os, sys' \ + 'print(os.path.dirname(sys.argv[1]))' \ + | ${expected} - "$1"; } + expd="$(dirname "$expected")" + export python="$(command -v python)" + [ "$expd" = "$(dirname "$python")" ] || echo "PATH=$expd:${PATH}" >> "$GITHUB_ENV" + [ -x "$python" ] || printf '%s\n' \ + 'import os' \ + 'exp = os.environ["expected"]' \ + 'python = os.environ["python"]' \ + 'exps = os.path.split(exp)' \ + 'if python and (os.path.dirname(python) == exp[0]):' \ + ' exit(0)' \ + 'exps[1] = "python" + os.path.splitext(exps[1])[1]' \ + 'python = os.path.join(*exps)' \ + 'try:' \ + ' os.symlink(exp, python)' \ + 'except AttributeError:' \ + ' os.rename(exp, python)' \ + | ${expected} - + printf '%s\n' \ + 'import sys' \ + 'print(sys.path)' \ + | ${expected} - + #-------- Python 2.7 -- + - name: Set up Python 2.7 + if: ${{ matrix.python-version == '2.7' }} + # install 2.7 + run: | + sudo apt-get install -y python2 python-is-python2 + echo "PYTHONHOME=/usr" >> "$GITHUB_ENV" + #-------- Python 2.6 -- + - name: Set up Python 2.6 environment + if: ${{ matrix.python-version == '2.6' }} + run: | + openssl_name=openssl-1.0.2u + echo "openssl_name=${openssl_name}" >> "$GITHUB_ENV" + openssl_dir=$HOME/.local/opt/$openssl_name + echo "openssl_dir=${openssl_dir}" >> "$GITHUB_ENV" + PYENV_ROOT=$HOME/.local/share/pyenv + echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV" + sudo apt-get install -y openssl ca-certificates + - name: Cache Python 2.6 + id: cache26 + if: ${{ matrix.python-version == '2.6' }} + uses: actions/cache@v3 + with: + key: python-2.6.9 + path: | + ${{ env.openssl_dir }} + ${{ env.PYENV_ROOT }} + - name: Build and set up Python 2.6 + if: ${{ matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }} + # dl and build locally + run: | + # Install build environment + sudo apt-get install -y build-essential llvm libssl-dev tk-dev \ + libncursesw5-dev libreadline-dev libsqlite3-dev \ + libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev + # Download and install OpenSSL 1.0.2, back in time + openssl_name=${{ env.openssl_name }} + openssl_targz=${openssl_name}.tar.gz + openssl_dir=${{ env.openssl_dir }} + openssl_inc=$openssl_dir/include + openssl_lib=$openssl_dir/lib + openssl_ssl=$openssl_dir/ssl + curl -L "https://www.openssl.org/source/$openssl_targz" -o $openssl_targz + tar -xf $openssl_targz + ( cd $openssl_name; \ + ./config --prefix=$openssl_dir --openssldir=${openssl_dir}/ssl \ + --libdir=lib -Wl,-rpath=${openssl_dir}/lib shared zlib-dynamic && \ + make && \ + make install ) + rm -rf $openssl_name + rmdir $openssl_ssl/certs && ln -s /etc/ssl/certs $openssl_ssl/certs + + # Download PyEnv from its GitHub repository. + export PYENV_ROOT=${{ env.PYENV_ROOT }} + export PATH=$PYENV_ROOT/bin:$PATH + git clone https://github.com/pyenv/pyenv.git $PYENV_ROOT + eval "$(pyenv init --path)" + + # Prevent pyenv build trying (and failing) to update pip + export GET_PIP=get-pip-2.6.py + echo 'import sys; sys.exit(0)' > ${GET_PIP} + GET_PIP=$(realpath $GET_PIP) + + # Build and install Python + export CFLAGS="-I$openssl_inc" + export LDFLAGS="-L$openssl_lib" + export LD_LIBRARY_PATH="$openssl_lib" + pyenv install 2.6.9 + echo "PYTHONHOME=${PYENV_ROOT}" >> "$GITHUB_ENV" + echo "PATH=$PYENV_ROOT/bin:$PATH" >> "$GITHUB_ENV" + - name: Set up cached Python 2.6 + if: ${{ steps.cache26.outputs.cache-hit }} + run: | + export PYENV_ROOT + export PATH=$PYENV_ROOT/bin:$PATH + eval "$(pyenv init --path)" + pyenv local 2.6.9 + echo "PYTHONHOME=${PYENV_ROOT}" >> "$GITHUB_ENV" + echo "PATH=$PYENV_ROOT/bin:$PATH" >> "$GITHUB_ENV" + #-------- Jython ------ - name: Set up Java 8 if: ${{ matrix.python-impl == 'jython' }} uses: actions/setup-java@v2 with: java-version: 8 distribution: 'zulu' - - name: Install Jython + - name: Setup Jython environment if: ${{ matrix.python-impl == 'jython' }} run: | - wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar - java -jar jython-installer.jar -s -d "$HOME/jython" - echo "$HOME/jython/bin" >> $GITHUB_PATH - - name: Install nose - if: ${{ matrix.python-impl != 'jython' }} - run: pip install nose + echo "JYTHON_ROOT=${HOME}/jython" >> "$GITHUB_ENV" + - name: Cache Jython + id: cachejy + if: ${{ matrix.python-impl == 'jython' }} + uses: actions/cache@v3 + with: + # 2.7.3 now available, may solve SNI issue + key: jython-2.7.1 + path: | + ${{ env.JYTHON_ROOT }} + - name: Install Jython + if: ${{ matrix.python-impl == 'jython' && ! steps.cachejy.outputs.cache-hit }} + run: | + JYTHON_ROOT="${{ env.JYTHON_ROOT }}" + curl -L "https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar" -o jython-installer.jar + java -jar jython-installer.jar -s -d "${JYTHON_ROOT}" + echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH + - name: Set up cached Jython + if: ${{ steps.cachejy.outputs.cache-hit }} + run: | + JYTHON_ROOT="${{ env.JYTHON_ROOT }}" + echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH + #-------- pip --------- + - name: Set up supported Python ${{ matrix.python-version }} pip + if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.6' || matrix.python-version == '2.7' }} + # This step may run in either Linux or Windows + shell: bash + run: | + echo "$PATH" + echo "$PYTHONHOME" + # curl is available on both Windows and Linux, -L follows redirects, -O gets name + python -m ensurepip || python -m pip --version || { \ + get_pip="${{ contains(needs.select.outputs.own-pip-versions, matrix.python-version) && format('{0}/', matrix.python-version) || '' }}"; \ + curl -L -O "https://bootstrap.pypa.io/pip/${get_pip}get-pip.py"; \ + python get-pip.py; } + - name: Set up other Python ${{ matrix.python-version }} pip + if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }} + shell: bash + run: | + # https://files.pythonhosted.org/packages/8a/e9/8468cd68b582b06ef554be0b96b59f59779627131aad48f8a5bce4b13450/wheel-0.29.0-py2.py3-none-any.whl + # https://files.pythonhosted.org/packages/06/4b/86a670fd21f7849adb092e40883c48dcd0d66b8a878fc8d63b7f0ea04213/setuptools-29.0.1-py2.py3-none-any.whl + python -m pip --version || { \ + curl -L -O "https://bootstrap.pypa.io/pip/3.2/get-pip.py"; \ + curl -L -O "https://files.pythonhosted.org/packages/b2/d0/cd115fe345dd6f07ec1c780020a7dfe74966fceeb171e0f20d1d4905b0b7/pip-7.1.2-py2.py3-none-any.whl"; \ + python -v get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; } + + #-------- nose -------- + - name: Install nose for Python ${{ matrix.python-version }} + if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.6' || matrix.python-version == '2.7' }} + shell: bash + run: | + echo "$PATH" + echo "$PYTHONHOME" + python --version + python -m pip --version + python -m pip nose --version || python -m pip install nose + - name: Install nose for other Python ${{ matrix.python-version }} + if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }} + shell: bash + run: | + python -m pip nose --version || { \ + curl -L -O "https://files.pythonhosted.org/packages/15/d8/dd071918c040f50fa1cf80da16423af51ff8ce4a0f2399b7bf8de45ac3d9/nose-1.3.7-py3-none-any.whl"; \ + python --version; \ + printf '%s\n' \ + 'import sys' \ + 'print(sys.path)' \ + | python -; \ + python -m pip --version; \ + python -m pip install nose-1.3.7-py3-none-any.whl; } - name: Install nose (Jython) if: ${{ matrix.python-impl == 'jython' }} - # Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) + # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) run: | - wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl - pip install nose-1.3.7-py2-none-any.whl + pip nose --version || { \ + curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \ + pip --version; \ + pip install nose-1.3.7-py2-none-any.whl; } + - name: Set up nosetest test + if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }} + shell: bash + run: | + # define a test to validate the Python version used by nosetests + printf '%s\n' \ + 'from __future__ import unicode_literals' \ + 'import sys, os, platform, unittest' \ + 'class TestPython(unittest.TestCase):' \ + ' def setUp(self):' \ + ' self.ver = os.environ["PYTHON_VER"].split("-")' \ + ' def test_python_ver(self):' \ + ' self.assertEqual(sys.version[:3], self.ver[-1])' \ + ' self.assertTrue(sys.version.startswith(self.ver[-1]))' \ + ' self.assertIn(self.ver[0], sys.version.lower())' \ + ' def test_python_impl(self):' \ + ' self.assertIn(platform.python_implementation().lower(), (os.environ["PYTHON_IMPL"], self.ver[0]))' \ + > test/test_python.py + #-------- TESTS ------- - name: Run tests + if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }} continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }} env: YTDL_TEST_SET: ${{ matrix.ytdl-test-set }} - run: ./devscripts/run_tests.${{ matrix.run-tests-ext }} + PYTHON_VER: ${{ matrix.python-version }} + PYTHON_IMPL: ${{ matrix.python-impl }} + + run: | + ./devscripts/run_tests.${{ matrix.run-tests-ext }} + flake8: name: Linter runs-on: ubuntu-latest @@ -81,3 +357,4 @@ jobs: run: pip install flake8 - name: Run flake8 run: flake8 . + diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index edc19183d..4bddca047 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -6,6 +6,10 @@ import os from os.path import dirname as dirn import sys +from youtube_dl.compat import compat_register_utf8 + +compat_register_utf8() + print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr) sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) diff --git a/test/test_execution.py b/test/test_execution.py index 704e14612..1dee53a0f 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -10,10 +10,13 @@ import os import subprocess sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from youtube_dl.compat import compat_register_utf8 + from youtube_dl.utils import encodeArgument rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +compat_register_utf8() try: _DEV_NULL = subprocess.DEVNULL @@ -25,13 +28,14 @@ class TestExecution(unittest.TestCase): def test_import(self): subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir) + @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution') def test_module_exec(self): - if sys.version_info >= (2, 7): # Python 2.6 doesn't support package execution - subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_main_exec(self): subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL) + @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution') def test_cmdline_umlauts(self): p = subprocess.Popen( [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'], diff --git a/test/test_unicode_literals.py b/test/test_unicode_literals.py index 6c1b7ec91..c7c2252f5 100644 --- a/test/test_unicode_literals.py +++ b/test/test_unicode_literals.py @@ -15,6 +15,7 @@ IGNORED_FILES = [ 'setup.py', # http://bugs.python.org/issue13943 'conf.py', 'buildserver.py', + 'get-pip.py', ] IGNORED_DIRS = [ diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index e1bd67919..cc8285eba 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -5,7 +5,6 @@ from __future__ import unicode_literals __license__ = 'Public Domain' -import codecs import io import os import random @@ -17,6 +16,7 @@ from .options import ( ) from .compat import ( compat_getpass, + compat_register_utf8, compat_shlex_split, workaround_optparse_bug9161, ) @@ -46,10 +46,8 @@ from .YoutubeDL import YoutubeDL def _real_main(argv=None): - # Compatibility fixes for Windows - if sys.platform == 'win32': - # https://github.com/ytdl-org/youtube-dl/issues/820 - codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) + # Compatibility fix for Windows + compat_register_utf8() workaround_optparse_bug9161() diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index fe62caf80..0f4d3756f 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -31,13 +31,17 @@ try: compat_str, compat_basestring, compat_chr = ( unicode, basestring, unichr ) - from .casefold import casefold as compat_casefold - except NameError: compat_str, compat_basestring, compat_chr = ( str, str, chr ) + +# casefold +try: + compat_str.casefold compat_casefold = lambda s: s.casefold() +except AttributeError: + from .casefold import casefold as compat_casefold try: import collections.abc as compat_collections_abc @@ -3137,6 +3141,15 @@ else: compat_open = open +# compat_register_utf8 +def compat_register_utf8(): + if sys.platform == 'win32': + # https://github.com/ytdl-org/youtube-dl/issues/820 + from codecs import register, lookup + register( + lambda name: lookup('utf-8') if name == 'cp65001' else None) + + legacy = [ 'compat_HTMLParseError', 'compat_HTMLParser', @@ -3203,6 +3216,7 @@ __all__ = [ 'compat_print', 'compat_re_Match', 'compat_re_Pattern', + 'compat_register_utf8', 'compat_setenv', 'compat_shlex_quote', 'compat_shlex_split', From b08a58090635777f1001d5cde2cd141a5565177c Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 30 Jun 2023 03:52:39 +0100 Subject: [PATCH 03/47] [workflows/ci.yml] Fix test support for Py 2.6 --- .github/workflows/ci.yml | 115 ++++++++++++++++++----------- devscripts/make_lazy_extractors.py | 8 +- test/test_execution.py | 16 ++-- 3 files changed, 83 insertions(+), 56 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8d8e654fb..ce878c1b1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,9 +6,8 @@ env: all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9 main-cpython-versions: 2.7, 3.2, 3.5, 3.9 pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7 - cpython-versions: all - # test-set: both - test-set: core + cpython-versions: main + test-set: both on: push: @@ -75,6 +74,10 @@ jobs: contents: read packages: write runs-on: ${{ matrix.os }} + env: + PIP: python -m pip + PIP_DISABLE_PIP_VERSION_CHECK: true + PIP_NO_PYTHON_VERSION_WARNING: true strategy: fail-fast: true matrix: @@ -152,12 +155,14 @@ jobs: - name: Set up Python 2.7 if: ${{ matrix.python-version == '2.7' }} # install 2.7 + shell: bash run: | sudo apt-get install -y python2 python-is-python2 echo "PYTHONHOME=/usr" >> "$GITHUB_ENV" #-------- Python 2.6 -- - name: Set up Python 2.6 environment if: ${{ matrix.python-version == '2.6' }} + shell: bash run: | openssl_name=openssl-1.0.2u echo "openssl_name=${openssl_name}" >> "$GITHUB_ENV" @@ -178,6 +183,7 @@ jobs: - name: Build and set up Python 2.6 if: ${{ matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }} # dl and build locally + shell: bash run: | # Install build environment sudo apt-get install -y build-essential llvm libssl-dev tk-dev \ @@ -203,8 +209,7 @@ jobs: # Download PyEnv from its GitHub repository. export PYENV_ROOT=${{ env.PYENV_ROOT }} export PATH=$PYENV_ROOT/bin:$PATH - git clone https://github.com/pyenv/pyenv.git $PYENV_ROOT - eval "$(pyenv init --path)" + git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT" # Prevent pyenv build trying (and failing) to update pip export GET_PIP=get-pip-2.6.py @@ -216,17 +221,14 @@ jobs: export LDFLAGS="-L$openssl_lib" export LD_LIBRARY_PATH="$openssl_lib" pyenv install 2.6.9 - echo "PYTHONHOME=${PYENV_ROOT}" >> "$GITHUB_ENV" - echo "PATH=$PYENV_ROOT/bin:$PATH" >> "$GITHUB_ENV" - - name: Set up cached Python 2.6 - if: ${{ steps.cache26.outputs.cache-hit }} + - name: Locate Python 2.6 + if: ${{ matrix.python-version == '2.6' }} + shell: bash run: | - export PYENV_ROOT - export PATH=$PYENV_ROOT/bin:$PATH - eval "$(pyenv init --path)" - pyenv local 2.6.9 - echo "PYTHONHOME=${PYENV_ROOT}" >> "$GITHUB_ENV" - echo "PATH=$PYENV_ROOT/bin:$PATH" >> "$GITHUB_ENV" + PYTHONHOME="${{ env.PYENV_ROOT }}/versions/2.6.9" + echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV" + echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${{ env.openssl_dir }}/lib${LD_LIBRARY_PATH:+:}${LD_LIBRARY_PATH}" >> "$GITHUB_ENV" #-------- Jython ------ - name: Set up Java 8 if: ${{ matrix.python-impl == 'jython' }} @@ -236,8 +238,10 @@ jobs: distribution: 'zulu' - name: Setup Jython environment if: ${{ matrix.python-impl == 'jython' }} + shell: bash run: | echo "JYTHON_ROOT=${HOME}/jython" >> "$GITHUB_ENV" + echo "PIP=pip" >> "$GITHUB_ENV" - name: Cache Jython id: cachejy if: ${{ matrix.python-impl == 'jython' }} @@ -249,19 +253,21 @@ jobs: ${{ env.JYTHON_ROOT }} - name: Install Jython if: ${{ matrix.python-impl == 'jython' && ! steps.cachejy.outputs.cache-hit }} + shell: bash run: | JYTHON_ROOT="${{ env.JYTHON_ROOT }}" curl -L "https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar" -o jython-installer.jar java -jar jython-installer.jar -s -d "${JYTHON_ROOT}" - echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH + echo "${JYTHON_ROOT}/bin" >> "$GITHUB_PATH" - name: Set up cached Jython if: ${{ steps.cachejy.outputs.cache-hit }} + shell: bash run: | JYTHON_ROOT="${{ env.JYTHON_ROOT }}" echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH #-------- pip --------- - name: Set up supported Python ${{ matrix.python-version }} pip - if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.6' || matrix.python-version == '2.7' }} + if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }} # This step may run in either Linux or Windows shell: bash run: | @@ -272,48 +278,66 @@ jobs: get_pip="${{ contains(needs.select.outputs.own-pip-versions, matrix.python-version) && format('{0}/', matrix.python-version) || '' }}"; \ curl -L -O "https://bootstrap.pypa.io/pip/${get_pip}get-pip.py"; \ python get-pip.py; } + - name: Set up Python 2.6 pip + if: ${{ matrix.python-version == '2.6' }} + shell: bash + run: | + python -m pip --version || { \ + curl -L -O "https://bootstrap.pypa.io/pip/2.6/get-pip.py"; \ + curl -L -O "https://files.pythonhosted.org/packages/ac/95/a05b56bb975efa78d3557efa36acaf9cf5d2fd0ee0062060493687432e03/pip-9.0.3-py2.py3-none-any.whl"; \ + python get-pip.py --no-setuptools --no-wheel pip-9.0.3-py2.py3-none-any.whl; } + # work-around to invoke pip module on 2.6: https://bugs.python.org/issue2751 + echo "PIP=python -m pip.__main__" >> "$GITHUB_ENV" - name: Set up other Python ${{ matrix.python-version }} pip if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }} shell: bash run: | - # https://files.pythonhosted.org/packages/8a/e9/8468cd68b582b06ef554be0b96b59f59779627131aad48f8a5bce4b13450/wheel-0.29.0-py2.py3-none-any.whl - # https://files.pythonhosted.org/packages/06/4b/86a670fd21f7849adb092e40883c48dcd0d66b8a878fc8d63b7f0ea04213/setuptools-29.0.1-py2.py3-none-any.whl python -m pip --version || { \ curl -L -O "https://bootstrap.pypa.io/pip/3.2/get-pip.py"; \ curl -L -O "https://files.pythonhosted.org/packages/b2/d0/cd115fe345dd6f07ec1c780020a7dfe74966fceeb171e0f20d1d4905b0b7/pip-7.1.2-py2.py3-none-any.whl"; \ - python -v get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; } - + python get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; } + #-------- unittest ---- + - name: Upgrade Unittest for Python 2.6 + if: ${{ matrix.python-version == '2.6' }} + shell: bash + run: | + # see pip for Jython + $PIP -qq show unittest2 || { \ + for u in "65/26/32b8464df2a97e6dd1b656ed26b2c194606c16fe163c695a992b36c11cdf/six-1.13.0-py2.py3-none-any.whl" \ + "f2/94/3af39d34be01a24a6e65433d19e107099374224905f1e0cc6bbe1fd22a2f/argparse-1.4.0-py2.py3-none-any.whl" \ + "c7/a3/c5da2a44c85bfbb6eebcfc1dde24933f8704441b98fdde6528f4831757a6/linecache2-1.0.0-py2.py3-none-any.whl" \ + "17/0a/6ac05a3723017a967193456a2efa0aa9ac4b51456891af1e2353bb9de21e/traceback2-1.4.0-py2.py3-none-any.whl" \ + "72/20/7f0f433060a962200b7272b8c12ba90ef5b903e218174301d0abfd523813/unittest2-1.1.0-py2.py3-none-any.whl"; do \ + curl -L -O "https://files.pythonhosted.org/packages/${u}"; \ + $PIP install ${u##*/}; \ + done; } + # make tests use unittest2 + for test in ./test/test_*.py; do + sed -r -i -e '/^import unittest$/s/test/test2 as unittest/' "$test" + done #-------- nose -------- - name: Install nose for Python ${{ matrix.python-version }} - if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.6' || matrix.python-version == '2.7' }} + if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }} shell: bash run: | echo "$PATH" echo "$PYTHONHOME" - python --version - python -m pip --version - python -m pip nose --version || python -m pip install nose - - name: Install nose for other Python ${{ matrix.python-version }} + $PIP -qq show nose || $PIP install nose + - name: Install nose for other Python 2 + if: ${{ matrix.python-impl == 'jython' || matrix.python-version == '2.6' }} + shell: bash + run: | + # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) + $PIP -qq show nose || { \ + curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \ + $PIP install nose-1.3.7-py2-none-any.whl; } + - name: Install nose for other Python 3 if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }} shell: bash run: | - python -m pip nose --version || { \ + $PIP -qq show nose || { \ curl -L -O "https://files.pythonhosted.org/packages/15/d8/dd071918c040f50fa1cf80da16423af51ff8ce4a0f2399b7bf8de45ac3d9/nose-1.3.7-py3-none-any.whl"; \ - python --version; \ - printf '%s\n' \ - 'import sys' \ - 'print(sys.path)' \ - | python -; \ - python -m pip --version; \ - python -m pip install nose-1.3.7-py3-none-any.whl; } - - name: Install nose (Jython) - if: ${{ matrix.python-impl == 'jython' }} - # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) - run: | - pip nose --version || { \ - curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \ - pip --version; \ - pip install nose-1.3.7-py2-none-any.whl; } + $PIP install nose-1.3.7-py3-none-any.whl; } - name: Set up nosetest test if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }} shell: bash @@ -321,7 +345,11 @@ jobs: # define a test to validate the Python version used by nosetests printf '%s\n' \ 'from __future__ import unicode_literals' \ - 'import sys, os, platform, unittest' \ + 'import sys, os, platform' \ + 'try:' \ + ' import unittest2 as unittest' \ + 'except ImportError:' \ + ' import unittest' \ 'class TestPython(unittest.TestCase):' \ ' def setUp(self):' \ ' self.ver = os.environ["PYTHON_VER"].split("-")' \ @@ -340,7 +368,6 @@ jobs: YTDL_TEST_SET: ${{ matrix.ytdl-test-set }} PYTHON_VER: ${{ matrix.python-version }} PYTHON_IMPL: ${{ matrix.python-impl }} - run: | ./devscripts/run_tests.${{ matrix.run-tests-ext }} diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 4bddca047..a8b6ff1b9 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -6,10 +6,6 @@ import os from os.path import dirname as dirn import sys -from youtube_dl.compat import compat_register_utf8 - -compat_register_utf8() - print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr) sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) @@ -23,6 +19,10 @@ try: except OSError: pass +from youtube_dl.compat import compat_register_utf8 + +compat_register_utf8() + from youtube_dl.extractor import _ALL_CLASSES from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor diff --git a/test/test_execution.py b/test/test_execution.py index 1dee53a0f..35e7a5651 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -11,13 +11,12 @@ import subprocess sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.compat import compat_register_utf8 - from youtube_dl.utils import encodeArgument -rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - compat_register_utf8() +rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + try: _DEV_NULL = subprocess.DEVNULL except AttributeError: @@ -33,21 +32,22 @@ class TestExecution(unittest.TestCase): subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_main_exec(self): - subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, os.path.normpath('youtube_dl/__main__.py'), '--version'], cwd=rootDir, stdout=_DEV_NULL) @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution') def test_cmdline_umlauts(self): + os.environ['PYTHONIOENCODING'] = 'utf-8' p = subprocess.Popen( - [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'], + [sys.executable, os.path.normpath('youtube_dl/__main__.py'), encodeArgument('ä'), '--version'], cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) _, stderr = p.communicate() self.assertFalse(stderr) def test_lazy_extractors(self): - lazy_extractors = 'youtube_dl/extractor/lazy_extractors.py' + lazy_extractors = os.path.normpath('youtube_dl/extractor/lazy_extractors.py') try: - subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', lazy_extractors], cwd=rootDir, stdout=_DEV_NULL) - subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, os.path.normpath('devscripts/make_lazy_extractors.py'), lazy_extractors], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, os.path.normpath('test/test_all_urls.py')], cwd=rootDir, stdout=_DEV_NULL) finally: for x in ['', 'c'] if sys.version_info[0] < 3 else ['']: try: From f24bc9272e9b74efc4c4af87c862f5f78921d424 Mon Sep 17 00:00:00 2001 From: dirkf Date: Tue, 4 Jul 2023 16:06:21 +0100 Subject: [PATCH 04/47] [Misc] Fixes for 2.6 compatibility --- test/test_jsinterp.py | 10 ++++++---- test/test_utils.py | 2 +- youtube_dl/YoutubeDL.py | 6 +++++- youtube_dl/compat.py | 12 ++++++++++++ youtube_dl/jsinterp.py | 13 ++++++++++++- youtube_dl/utils.py | 3 ++- 6 files changed, 38 insertions(+), 8 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index ecd6ab3c9..91b12f544 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -492,10 +492,12 @@ class TestJSInterpreter(unittest.TestCase): jsi = JSInterpreter(''' function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; } ''') - attrs = set(('findall', 'finditer', 'flags', 'groupindex', - 'groups', 'match', 'pattern', 'scanner', - 'search', 'split', 'sub', 'subn')) - self.assertTrue(set(dir(jsi.call_function('x'))) > attrs) + attrs = set(('findall', 'finditer', 'match', 'scanner', 'search', + 'split', 'sub', 'subn')) + if sys.version_info >= (2, 7): + # documented for 2.6 but may not be found + attrs.update(('flags', 'groupindex', 'groups', 'pattern')) + self.assertSetEqual(set(dir(jsi.call_function('x'))) & attrs, attrs) jsi = JSInterpreter(''' function x() { let a=/,,[/,913,/](,)}/i; return a; } diff --git a/test/test_utils.py b/test/test_utils.py index b85d397d0..5fab05f7c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1612,7 +1612,7 @@ Line 1 self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)), [_TEST_DATA['urls']], msg='function as query key should perform a filter based on (key, value)') - self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), {'str'}, + self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), ('str',), msg='exceptions in the query function should be caught') # Test alternative paths diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 98b878fc1..068029d3e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -25,7 +25,11 @@ import tokenize import traceback import random -from ssl import OPENSSL_VERSION +try: + from ssl import OPENSSL_VERSION +except ImportError: + # Must be Python 2.6, should be built against 1.0.2 + OPENSSL_VERSION = 'OpenSSL 1.0.2(?)' from string import ascii_letters from .compat import ( diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 0f4d3756f..2554fd1c3 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -1,10 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals +from __future__ import division import base64 import binascii import collections import ctypes +import datetime import email import getpass import io @@ -3150,6 +3152,15 @@ def compat_register_utf8(): lambda name: lookup('utf-8') if name == 'cp65001' else None) +# compat_datetime_timedelta_total_seconds +try: + compat_datetime_timedelta_total_seconds = datetime.timedelta.total_seconds +except AttributeError: + # Py 2.6 + def compat_datetime_timedelta_total_seconds(td): + return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 + + legacy = [ 'compat_HTMLParseError', 'compat_HTMLParser', @@ -3187,6 +3198,7 @@ __all__ = [ 'compat_chr', 'compat_collections_abc', 'compat_collections_chain_map', + 'compat_datetime_timedelta_total_seconds', 'compat_http_cookiejar', 'compat_http_cookiejar_Cookie', 'compat_http_cookies', diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 1ba9c3d67..882432b80 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -277,9 +277,20 @@ class JSInterpreter(object): def __getattr__(self, name): self.__instantiate() + # make Py 2.6 conform to its lying documentation + if name == 'flags': + self.flags = self.__flags + elif name == 'pattern': + self.pattern = self.__pattern_txt + elif name in ('groupindex', 'groups'): + # in case these get set after a match? + if hasattr(self.__self, name): + setattr(self, name, getattr(self.__self, name)) + else: + return 0 if name == 'groupindex' else {} if hasattr(self, name): return getattr(self, name) - return super(JSInterpreter.JS_RegExp, self).__getattr__(name) + raise AttributeError('{0} has no attribute named {1}'.format(self, name)) @classmethod def regex_flags(cls, expr): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 584581b6a..83f67bd95 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -47,6 +47,7 @@ from .compat import ( compat_collections_abc, compat_cookiejar, compat_ctypes_WINFUNCTYPE, + compat_datetime_timedelta_total_seconds, compat_etree_fromstring, compat_expanduser, compat_html_entities, @@ -3102,7 +3103,7 @@ def unified_timestamp(date_str, day_first=True): pass timetuple = email.utils.parsedate_tz(date_str) if timetuple: - return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds() + return calendar.timegm(timetuple) + pm_delta * 3600 - compat_datetime_timedelta_total_seconds(timezone) def determine_ext(url, default_ext='unknown_video'): From b6dff4073d469cceadb099c00ccbf3bd6fc515a6 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 7 Jul 2023 18:41:32 +0100 Subject: [PATCH 05/47] [core] Revert version display from b8a86dc --- youtube_dl/YoutubeDL.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 068029d3e..4e7fd1063 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -2378,10 +2378,12 @@ class YoutubeDL(object): self.get_encoding())) write_string(encoding_str, encoding=None) - self._write_string('[debug] youtube-dl version ' + __version__ + (' (single file build)\n' if ytdl_is_updateable() else '\n')) + writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), )) + writeln_debug('youtube-dl version ', __version__) if _LAZY_LOADER: - self._write_string('[debug] Lazy loading extractors enabled\n') - writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), )) # moved down for easier merge + writeln_debug('Lazy loading extractors enabled') + if ytdl_is_updateable(): + writeln_debug('Single file build') try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'], From f47fdb9564d3ca1c0fa70ed6031148ec908fdc7b Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 6 Jul 2023 15:46:22 +0100 Subject: [PATCH 06/47] [utils] Add {expected_type} and Iterable support to traverse_obj() --- test/test_utils.py | 153 ++++++++++++++++++++++++++------ youtube_dl/utils.py | 211 +++++++++++++++++++++++++++++--------------- 2 files changed, 265 insertions(+), 99 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 5fab05f7c..1fc16ed05 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -79,10 +79,12 @@ from youtube_dl.utils import ( rot47, shell_quote, smuggle_url, + str_or_none, str_to_int, strip_jsonp, strip_or_none, subtitles_filename, + T, timeconvert, traverse_obj, try_call, @@ -1566,6 +1568,7 @@ Line 1 self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam') def test_traverse_obj(self): + str = compat_str _TEST_DATA = { 100: 100, 1.2: 1.2, @@ -1598,8 +1601,8 @@ Line 1 # Test Ellipsis behavior self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis), - (item for item in _TEST_DATA.values() if item is not None), - msg='`...` should give all values except `None`') + (item for item in _TEST_DATA.values() if item not in (None, {})), + msg='`...` should give all non discarded values') self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(), msg='`...` selection for dicts should select all values') self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')), @@ -1607,13 +1610,51 @@ Line 1 msg='nested `...` queries should work') self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), range(4), msg='`...` query result should be flattened') + self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)), + msg='`...` should accept iterables') # Test function as key self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)), [_TEST_DATA['urls']], msg='function as query key should perform a filter based on (key, value)') - self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), ('str',), - msg='exceptions in the query function should be caught') + self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'}, + msg='exceptions in the query function should be catched') + self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2], + msg='function key should accept iterables') + if __debug__: + with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'): + traverse_obj(_TEST_DATA, lambda a: Ellipsis) + with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'): + traverse_obj(_TEST_DATA, lambda a, b, c: Ellipsis) + + # Test set as key (transformation/type, like `expected_type`) + self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper), )), ['STR'], + msg='Function in set should be a transformation') + self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str))), ['str'], + msg='Type in set should be a type filter') + self.assertEqual(traverse_obj(_TEST_DATA, T(dict)), _TEST_DATA, + msg='A single set should be wrapped into a path') + self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper))), ['STR'], + msg='Transformation function should not raise') + self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str_or_none))), + [item for item in map(str_or_none, _TEST_DATA.values()) if item is not None], + msg='Function in set should be a transformation') + if __debug__: + with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'): + traverse_obj(_TEST_DATA, set()) + with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'): + traverse_obj(_TEST_DATA, {str.upper, str}) + + # Test `slice` as a key + _SLICE_DATA = [0, 1, 2, 3, 4] + self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None, + msg='slice on a dictionary should not throw') + self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1], + msg='slice key should apply slice to sequence') + self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2], + msg='slice key should apply slice to sequence') + self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2], + msg='slice key should apply slice to sequence') # Test alternative paths self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str', @@ -1659,15 +1700,23 @@ Line 1 {0: ['https://www.example.com/1', 'https://www.example.com/0']}, msg='triple nesting in dict path should be treated as branches') self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {}, - msg='remove `None` values when dict key') + msg='remove `None` values when top level dict key fails') self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis}, - msg='do not remove `None` values if `default`') - self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {0: {}}, - msg='do not remove empty values when dict key') - self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: {}}, - msg='do not remove empty values when dict key and a default') - self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {0: []}, - msg='if branch in dict key not successful, return `[]`') + msg='use `default` if key fails and `default`') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {}, + msg='remove empty values when dict key') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis}, + msg='use `default` when dict key and `default`') + self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {}, + msg='remove empty values when nested dict key fails') + self.assertEqual(traverse_obj(None, {0: 'fail'}), {}, + msg='default to dict if pruned') + self.assertEqual(traverse_obj(None, {0: 'fail'}, default=Ellipsis), {0: Ellipsis}, + msg='default to dict if pruned and default is given') + self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=Ellipsis), {0: {0: Ellipsis}}, + msg='use nested `default` when nested dict key fails and `default`') + self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {}, + msg='remove key if branch in dict key not successful') # Testing default parameter behavior _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []} @@ -1691,20 +1740,55 @@ Line 1 msg='if branched but not successful return `[]`, not `default`') self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [], msg='if branched but object is empty return `[]`, not `default`') + self.assertEqual(traverse_obj(None, Ellipsis), [], + msg='if branched but object is `None` return `[]`, not `default`') + self.assertEqual(traverse_obj({0: None}, (0, Ellipsis)), [], + msg='if branched but state is `None` return `[]`, not `default`') + + branching_paths = [ + ('fail', Ellipsis), + (Ellipsis, 'fail'), + 100 * ('fail',) + (Ellipsis,), + (Ellipsis,) + 100 * ('fail',), + ] + for branching_path in branching_paths: + self.assertEqual(traverse_obj({}, branching_path), [], + msg='if branched but state is `None`, return `[]` (not `default`)') + self.assertEqual(traverse_obj({}, 'fail', branching_path), [], + msg='if branching in last alternative and previous did not match, return `[]` (not `default`)') + self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x', + msg='if branching in last alternative and previous did match, return single value') + self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x', + msg='if branching in first alternative and non-branching path does match, return single value') + self.assertEqual(traverse_obj({}, branching_path, 'fail'), None, + msg='if branching in first alternative and non-branching path does not match, return `default`') # Testing expected_type behavior _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0} - self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=compat_str), 'str', - msg='accept matching `expected_type` type') - self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), None, - msg='reject non matching `expected_type` type') - self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: compat_str(x)), '0', - msg='transform type using type function') - self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', - expected_type=lambda _: 1 / 0), None, - msg='wrap expected_type function in try_call') - self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=compat_str), ['str'], - msg='eliminate items that expected_type fails on') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str), + 'str', msg='accept matching `expected_type` type') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), + None, msg='reject non matching `expected_type` type') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)), + '0', msg='transform type using type function') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0), + None, msg='wrap expected_type function in try_call') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=str), + ['str'], msg='eliminate items that expected_type fails on') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int), + {0: 100}, msg='type as expected_type should filter dict values') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none), + {0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values') + self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int), + 1, msg='expected_type should not filter non final dict values') + self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int), + {0: {0: 100}}, msg='expected_type should transform deep dict values') + self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)), + [{0: Ellipsis}, {0: Ellipsis}], msg='expected_type should transform branched dict values') + self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int), + [4], msg='expected_type regression for type matching in tuple branching') + self.assertEqual(traverse_obj(_TEST_DATA, ['data', Ellipsis], expected_type=int), + [], msg='expected_type regression for type matching in dict result') # Test get_all behavior _GET_ALL_DATA = {'key': [0, 1, 2]} @@ -1749,14 +1833,23 @@ Line 1 _traverse_string=True), '.', msg='traverse into converted data if `traverse_string`') self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis), - _traverse_string=True), list('str'), - msg='`...` branching into string should result in list') + _traverse_string=True), 'str', + msg='`...` should result in string (same value) if `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)), + _traverse_string=True), 'sr', + msg='`slice` should result in string if `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"), + _traverse_string=True), 'str', + msg='function should result in string if `traverse_string`') self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)), _traverse_string=True), ['s', 'r'], - msg='branching into string should result in list') - self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda _, x: x), - _traverse_string=True), list('str'), - msg='function branching into string should result in list') + msg='branching should result in list if `traverse_string`') + self.assertEqual(traverse_obj({}, (0, Ellipsis), _traverse_string=True), [], + msg='branching should result in list if `traverse_string`') + self.assertEqual(traverse_obj({}, (0, lambda x, y: True), _traverse_string=True), [], + msg='branching should result in list if `traverse_string`') + self.assertEqual(traverse_obj({}, (0, slice(1)), _traverse_string=True), [], + msg='branching should result in list if `traverse_string`') # Test is_user_input behavior _IS_USER_INPUT_DATA = {'range8': list(range(8))} @@ -1793,6 +1886,8 @@ Line 1 msg='failing str key on a `re.Match` should return `default`') self.assertEqual(traverse_obj(mobj, 8), None, msg='failing int key on a `re.Match` should return `default`') + self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'], + msg='function on a `re.Match` should give group name as well') def test_get_first(self): self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 83f67bd95..dbdbe5f59 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -16,6 +16,7 @@ import email.header import errno import functools import gzip +import inspect import io import itertools import json @@ -3881,7 +3882,7 @@ def detect_exe_version(output, version_re=None, unrecognized='present'): return unrecognized -class LazyList(compat_collections_abc.Sequence): +class LazyList(compat_collections_abc.Iterable): """Lazy immutable list from an iterable Note that slices of a LazyList are lists and not LazyList""" @@ -4223,10 +4224,16 @@ def multipart_encode(data, boundary=None): return out, content_type -def variadic(x, allowed_types=(compat_str, bytes, dict)): - if not isinstance(allowed_types, tuple) and isinstance(allowed_types, compat_collections_abc.Iterable): +def is_iterable_like(x, allowed_types=compat_collections_abc.Iterable, blocked_types=NO_DEFAULT): + if blocked_types is NO_DEFAULT: + blocked_types = (compat_str, bytes, compat_collections_abc.Mapping) + return isinstance(x, allowed_types) and not isinstance(x, blocked_types) + + +def variadic(x, allowed_types=NO_DEFAULT): + if isinstance(allowed_types, compat_collections_abc.Iterable): allowed_types = tuple(allowed_types) - return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,) + return x if is_iterable_like(x, blocked_types=allowed_types) else (x,) def dict_get(d, key_or_keys, default=None, skip_false_values=True): @@ -5993,7 +6000,7 @@ def clean_podcast_url(url): def traverse_obj(obj, *paths, **kwargs): """ - Safely traverse nested `dict`s and `Sequence`s + Safely traverse nested `dict`s and `Iterable`s >>> obj = [{}, {"key": "value"}] >>> traverse_obj(obj, (1, "key")) @@ -6001,14 +6008,17 @@ def traverse_obj(obj, *paths, **kwargs): Each of the provided `paths` is tested and the first producing a valid result will be returned. The next path will also be tested if the path branched but no results could be found. - Supported values for traversal are `Mapping`, `Sequence` and `re.Match`. - A value of None is treated as the absence of a value. + Supported values for traversal are `Mapping`, `Iterable` and `re.Match`. + Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded. The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`. The keys in the path can be one of: - `None`: Return the current object. - - `str`/`int`: Return `obj[key]`. For `re.Match, return `obj.group(key)`. + - `set`: Requires the only item in the set to be a type or function, + like `{type}`/`{func}`. If a `type`, returns only values + of this type. If a function, returns `func(obj)`. + - `str`/`int`: Return `obj[key]`. For `re.Match`, return `obj.group(key)`. - `slice`: Branch out and return all values in `obj[key]`. - `Ellipsis`: Branch out and return a list of all values. - `tuple`/`list`: Branch out and return a list of all matching values. @@ -6016,6 +6026,9 @@ def traverse_obj(obj, *paths, **kwargs): - `function`: Branch out and return values filtered by the function. Read as: `[value for key, value in obj if function(key, value)]`. For `Sequence`s, `key` is the index of the value. + For `Iterable`s, `key` is the enumeration count of the value. + For `re.Match`es, `key` is the group number (0 = full match) + as well as additionally any group names, if given. - `dict` Transform the current object and return a matching dict. Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`. @@ -6024,8 +6037,12 @@ def traverse_obj(obj, *paths, **kwargs): @params paths Paths which to traverse by. Keyword arguments: @param default Value to return if the paths do not match. + If the last key in the path is a `dict`, it will apply to each value inside + the dict instead, depth first. Try to avoid if using nested `dict` keys. @param expected_type If a `type`, only accept final values of this type. If any other callable, try to call the function on each result. + If the last key in the path is a `dict`, it will apply to each value inside + the dict instead, recursively. This does respect branching paths. @param get_all If `False`, return the first matching result, otherwise all matching ones. @param casesense If `False`, consider string dictionary keys as case insensitive. @@ -6036,12 +6053,15 @@ def traverse_obj(obj, *paths, **kwargs): @param _traverse_string Whether to traverse into objects as strings. If `True`, any non-compatible object will first be converted into a string and then traversed into. + The return value of that path will be a string instead, + not respecting any further branching. @returns The result of the object traversal. If successful, `get_all=True`, and the path branches at least once, then a list of results is returned instead. A list is always returned if the last path branches and no `default` is given. + If a path ends on a `dict` that result will always be a `dict`. """ # parameter defaults @@ -6055,7 +6075,6 @@ def traverse_obj(obj, *paths, **kwargs): # instant compat str = compat_str - is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes)) casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k if isinstance(expected_type, type): @@ -6063,128 +6082,180 @@ def traverse_obj(obj, *paths, **kwargs): else: type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,)) + def lookup_or_none(v, k, getter=None): + try: + return getter(v, k) if getter else v[k] + except IndexError: + return None + def from_iterable(iterables): # chain.from_iterable(['ABC', 'DEF']) --> A B C D E F for it in iterables: for item in it: yield item - def apply_key(key, obj): - if obj is None: - return + def apply_key(key, obj, is_last): + branching = False + + if obj is None and _traverse_string: + if key is Ellipsis or callable(key) or isinstance(key, slice): + branching = True + result = () + else: + result = None elif key is None: - yield obj + result = obj + + elif isinstance(key, set): + assert len(key) == 1, 'Set should only be used to wrap a single item' + item = next(iter(key)) + if isinstance(item, type): + result = obj if isinstance(obj, item) else None + else: + result = try_call(item, args=(obj,)) elif isinstance(key, (list, tuple)): - for branch in key: - _, result = apply_path(obj, branch) - for item in result: - yield item + branching = True + result = from_iterable( + apply_path(obj, branch, is_last)[0] for branch in key) elif key is Ellipsis: - result = [] + branching = True if isinstance(obj, compat_collections_abc.Mapping): result = obj.values() - elif is_sequence(obj): + elif is_iterable_like(obj): result = obj elif isinstance(obj, compat_re_Match): result = obj.groups() elif _traverse_string: + branching = False result = str(obj) - for item in result: - yield item + else: + result = () elif callable(key): - if is_sequence(obj): - iter_obj = enumerate(obj) - elif isinstance(obj, compat_collections_abc.Mapping): + branching = True + if isinstance(obj, compat_collections_abc.Mapping): iter_obj = obj.items() + elif is_iterable_like(obj): + iter_obj = enumerate(obj) elif isinstance(obj, compat_re_Match): - iter_obj = enumerate(itertools.chain([obj.group()], obj.groups())) + iter_obj = itertools.chain( + enumerate(itertools.chain((obj.group(),), obj.groups())), + obj.groupdict().items()) elif _traverse_string: + branching = False iter_obj = enumerate(str(obj)) else: - return - for item in (v for k, v in iter_obj if try_call(key, args=(k, v))): - yield item + iter_obj = () + + result = (v for k, v in iter_obj if try_call(key, args=(k, v))) + if not branching: # string traversal + result = ''.join(result) elif isinstance(key, dict): - iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items()) - yield dict((k, v if v is not None else default) for k, v in iter_obj - if v is not None or default is not NO_DEFAULT) + iter_obj = ((k, _traverse_obj(obj, v, False, is_last)) for k, v in key.items()) + result = dict((k, v if v is not None else default) for k, v in iter_obj + if v is not None or default is not NO_DEFAULT) or None elif isinstance(obj, compat_collections_abc.Mapping): - yield (obj.get(key) if casesense or (key in obj) - else next((v for k, v in obj.items() if casefold(k) == key), None)) + result = (try_call(obj.get, args=(key,)) + if casesense or try_call(obj.__contains__, args=(key,)) + else next((v for k, v in obj.items() if casefold(k) == key), None)) elif isinstance(obj, compat_re_Match): + result = None if isinstance(key, int) or casesense: - try: - yield obj.group(key) - return - except IndexError: - pass - if not isinstance(key, str): - return + result = lookup_or_none(obj, key, getter=compat_re_Match.group) - yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None) + elif isinstance(key, str): + result = next((v for k, v in obj.groupdict().items() + if casefold(k) == key), None) else: - if _is_user_input: - key = (int_or_none(key) if ':' not in key - else slice(*map(int_or_none, key.split(':')))) + result = None + if isinstance(key, (int, slice)): + if is_iterable_like(obj, compat_collections_abc.Sequence): + branching = isinstance(key, slice) + result = lookup_or_none(obj, key) + elif _traverse_string: + result = lookup_or_none(str(obj), key) - if not isinstance(key, (int, slice)): - return + return branching, result if branching else (result,) - if not is_sequence(obj): - if not _traverse_string: - return - obj = str(obj) + def lazy_last(iterable): + iterator = iter(iterable) + prev = next(iterator, NO_DEFAULT) + if prev is NO_DEFAULT: + return - try: - yield obj[key] - except IndexError: - pass + for item in iterator: + yield False, prev + prev = item - def apply_path(start_obj, path): + yield True, prev + + def apply_path(start_obj, path, test_type): objs = (start_obj,) has_branched = False - for key in variadic(path): - if _is_user_input and key == ':': - key = Ellipsis + key = None + for last, key in lazy_last(variadic(path, (str, bytes, dict, set))): + if _is_user_input and isinstance(key, str): + if key == ':': + key = Ellipsis + elif ':' in key: + key = slice(*map(int_or_none, key.split(':'))) + elif int_or_none(key) is not None: + key = int(key) if not casesense and isinstance(key, str): key = compat_casefold(key) - if key is Ellipsis or isinstance(key, (list, tuple)) or callable(key): - has_branched = True + if __debug__ and callable(key): + # Verify function signature + inspect.getcallargs(key, None, None) - key_func = functools.partial(apply_key, key) - objs = from_iterable(map(key_func, objs)) + new_objs = [] + for obj in objs: + branching, results = apply_key(key, obj, last) + has_branched |= branching + new_objs.append(results) - return has_branched, objs + objs = from_iterable(new_objs) - def _traverse_obj(obj, path, use_list=True): - has_branched, results = apply_path(obj, path) - results = LazyList(x for x in map(type_test, results) if x is not None) + if test_type and not isinstance(key, (dict, list, tuple)): + objs = map(type_test, objs) + + return objs, has_branched, isinstance(key, dict) + + def _traverse_obj(obj, path, allow_empty, test_type): + results, has_branched, is_dict = apply_path(obj, path, test_type) + results = LazyList(x for x in results if x not in (None, {})) if get_all and has_branched: - return results.exhaust() if results or use_list else None + if results: + return results.exhaust() + if allow_empty: + return [] if default is NO_DEFAULT else default + return None - return results[0] if results else None + return results[0] if results else {} if allow_empty and is_dict else None for index, path in enumerate(paths, 1): - use_list = default is NO_DEFAULT and index == len(paths) - result = _traverse_obj(obj, path, use_list) + result = _traverse_obj(obj, path, index == len(paths), True) if result is not None: return result return None if default is NO_DEFAULT else default +def T(x): + """ For use in yt-dl instead of {type} or set((type,)) """ + return set((x,)) + + def get_first(obj, keys, **kwargs): return traverse_obj(obj, (Ellipsis,) + tuple(variadic(keys)), get_all=False, **kwargs) From d5ef405c5d533c85cebd205a5b7958614c7013f3 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 7 Jul 2023 18:45:31 +0100 Subject: [PATCH 07/47] [core] Align error reporting methods with yt-dlp --- test/helper.py | 3 ++- test/test_YoutubeDL.py | 10 ++-------- youtube_dl/YoutubeDL.py | 39 ++++++++++++++++++++++++++++++++------- 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/test/helper.py b/test/helper.py index 883b2e877..e3314b03e 100644 --- a/test/helper.py +++ b/test/helper.py @@ -72,7 +72,8 @@ class FakeYDL(YoutubeDL): def to_screen(self, s, skip_eol=None): print(s) - def trouble(self, s, tb=None): + def trouble(self, *args, **kwargs): + s = args[0] if len(args) > 0 else kwargs.get('message', 'Missing message') raise Exception(s) def download(self, x): diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index f8c8e619c..60780b8a7 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -930,17 +930,11 @@ class TestYoutubeDL(unittest.TestCase): # Test case for https://github.com/ytdl-org/youtube-dl/issues/27064 def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self): - class _YDL(YDL): - def __init__(self, *args, **kwargs): - super(_YDL, self).__init__(*args, **kwargs) - - def trouble(self, s, tb=None): - pass - - ydl = _YDL({ + ydl = YDL({ 'format': 'extra', 'ignoreerrors': True, }) + ydl.trouble = lambda *_, **__: None class VideoIE(InfoExtractor): _VALID_URL = r'video:(?P\d+)' diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4e7fd1063..1435754c2 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -582,7 +582,7 @@ class YoutubeDL(object): if self.params.get('cookiefile') is not None: self.cookiejar.save(ignore_discard=True, ignore_expires=True) - def trouble(self, message=None, tb=None): + def trouble(self, *args, **kwargs): """Determine action to take when a download problem appears. Depending on if the downloader has been configured to ignore @@ -591,6 +591,11 @@ class YoutubeDL(object): tb, if given, is additional traceback information. """ + # message=None, tb=None, is_error=True + message = args[0] if len(args) > 0 else kwargs.get('message', None) + tb = args[1] if len(args) > 1 else kwargs.get('tb', None) + is_error = args[2] if len(args) > 2 else kwargs.get('is_error', True) + if message is not None: self.to_stderr(message) if self.params.get('verbose'): @@ -603,7 +608,10 @@ class YoutubeDL(object): else: tb_data = traceback.format_list(traceback.extract_stack()) tb = ''.join(tb_data) - self.to_stderr(tb) + if tb: + self.to_stderr(tb) + if not is_error: + return if not self.params.get('ignoreerrors', False): if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: exc_info = sys.exc_info()[1].exc_info @@ -612,11 +620,18 @@ class YoutubeDL(object): raise DownloadError(message, exc_info) self._download_retcode = 1 - def report_warning(self, message): + def report_warning(self, message, only_once=False, _cache={}): ''' Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored ''' + if only_once: + m_hash = hash((self, message)) + m_cnt = _cache.setdefault(m_hash, 0) + _cache[m_hash] = m_cnt + 1 + if m_cnt > 0: + return + if self.params.get('logger') is not None: self.params['logger'].warning(message) else: @@ -629,7 +644,7 @@ class YoutubeDL(object): warning_message = '%s %s' % (_msg_header, message) self.to_stderr(warning_message) - def report_error(self, message, tb=None): + def report_error(self, message, *args, **kwargs): ''' Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. @@ -638,8 +653,18 @@ class YoutubeDL(object): _msg_header = '\033[0;31mERROR:\033[0m' else: _msg_header = 'ERROR:' - error_message = '%s %s' % (_msg_header, message) - self.trouble(error_message, tb) + kwargs['message'] = '%s %s' % (_msg_header, message) + self.trouble(*args, **kwargs) + + def report_unscoped_cookies(self, *args, **kwargs): + # message=None, tb=False, is_error=False + if len(args) <= 2: + kwargs.setdefault('is_error', False) + if len(args) <= 0: + kwargs.setdefault( + 'message', + 'Unscoped cookies are not allowed: please specify some sort of scoping') + self.report_error(*args, **kwargs) def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" @@ -835,7 +860,7 @@ class YoutubeDL(object): msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' self.report_error(msg) except ExtractorError as e: # An error we somewhat expected - self.report_error(compat_str(e), e.format_traceback()) + self.report_error(compat_str(e), tb=e.format_traceback()) except MaxDownloadsReached: raise except Exception as e: From 1720c04dc56fa0d2caa0a455b1acbd569347482e Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 17 Jul 2023 20:47:58 +0100 Subject: [PATCH 08/47] [test] Make skipped tests in test_execution work with Py 2.6 --- test/test_execution.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/test/test_execution.py b/test/test_execution.py index 35e7a5651..ae59e562a 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -24,21 +24,24 @@ except AttributeError: class TestExecution(unittest.TestCase): + def setUp(self): + self.module = 'youtube_dl' + if sys.version_info < (2, 7): + self.module += '.__main__' + def test_import(self): subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir) - @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution') def test_module_exec(self): - subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, '-m', self.module, '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_main_exec(self): subprocess.check_call([sys.executable, os.path.normpath('youtube_dl/__main__.py'), '--version'], cwd=rootDir, stdout=_DEV_NULL) - @unittest.skipIf(sys.version_info < (2, 7), 'Python 2.6 doesn\'t support package execution') def test_cmdline_umlauts(self): os.environ['PYTHONIOENCODING'] = 'utf-8' p = subprocess.Popen( - [sys.executable, os.path.normpath('youtube_dl/__main__.py'), encodeArgument('ä'), '--version'], + [sys.executable, '-m', self.module, encodeArgument('ä'), '--version'], cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) _, stderr = p.communicate() self.assertFalse(stderr) From 648dc5304cb2476592ff142988b8c62675011fcc Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 7 Jul 2023 18:51:38 +0100 Subject: [PATCH 09/47] [compat] Add Request and HTTPClient compat for redirect * support `method` parameter of `Request.__init__` (Py 2 and old Py 3) * support `getcode` method of compat_http_client.HTTPResponse (Py 2) --- youtube_dl/compat.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 2554fd1c3..cd11ba5aa 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -21,6 +21,7 @@ import socket import struct import subprocess import sys +import types import xml.etree.ElementTree # naming convention @@ -55,6 +56,22 @@ try: except ImportError: # Python 2 import urllib2 as compat_urllib_request +# Also fix up lack of method arg in old Pythons +try: + _req = compat_urllib_request.Request + _req('http://127.0.0.1', method='GET') +except TypeError: + class _request(object): + def __new__(cls, url, *args, **kwargs): + method = kwargs.pop('method', None) + r = _req(url, *args, **kwargs) + if method: + r.get_method = types.MethodType(lambda _: method, r) + return r + + compat_urllib_request.Request = _request + + try: import urllib.error as compat_urllib_error except ImportError: # Python 2 @@ -79,6 +96,12 @@ try: except ImportError: # Python 2 import urllib as compat_urllib_response +try: + compat_urllib_response.addinfourl.status +except AttributeError: + # .getcode() is deprecated in Py 3. + compat_urllib_response.addinfourl.status = property(lambda self: self.getcode()) + try: import http.cookiejar as compat_cookiejar except ImportError: # Python 2 @@ -2360,6 +2383,11 @@ try: import http.client as compat_http_client except ImportError: # Python 2 import httplib as compat_http_client +try: + compat_http_client.HTTPResponse.getcode +except AttributeError: + # Py < 3.1 + compat_http_client.HTTPResponse.getcode = lambda self: self.status try: from urllib.error import HTTPError as compat_HTTPError From 46fde7caeeab13a6277aab22a0e8a29e10c30cc3 Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 7 Jun 2023 14:51:50 +0100 Subject: [PATCH 10/47] [core] Update redirect handling from yt-dlp * Thx coletdjnz: https://github.com/yt-dlp/yt-dlp/pull/7094 * add test that redirected `POST` loses its `Content-Type` --- test/test_http.py | 489 +++++++++++++++++++++++++++++++++++++++----- youtube_dl/utils.py | 74 ++++--- 2 files changed, 484 insertions(+), 79 deletions(-) diff --git a/test/test_http.py b/test/test_http.py index 487a9bc77..1a65df9e0 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -8,33 +8,160 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import gzip +import io +import ssl +import tempfile +import threading +import zlib + +# avoid deprecated alias assertRaisesRegexp +if hasattr(unittest.TestCase, 'assertRaisesRegex'): + unittest.TestCase.assertRaisesRegexp = unittest.TestCase.assertRaisesRegex + +try: + import brotli +except ImportError: + brotli = None +try: + from urllib.request import pathname2url +except ImportError: + from urllib import pathname2url + +from youtube_dl.compat import ( + compat_http_cookiejar_Cookie, + compat_http_server, + compat_str as str, + compat_urllib_error, + compat_urllib_HTTPError, + compat_urllib_parse, + compat_urllib_request, +) + +from youtube_dl.utils import ( + sanitized_Request, + urlencode_postdata, +) + from test.helper import ( + FakeYDL, FakeLogger, http_server_port, ) from youtube_dl import YoutubeDL -from youtube_dl.compat import compat_http_server, compat_urllib_request -import ssl -import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + protocol_version = 'HTTP/1.1' + + # work-around old/new -style class inheritance + def super(self, meth_name, *args, **kwargs): + from types import MethodType + try: + super() + fn = lambda s, m, *a, **k: getattr(super(), m)(*a, **k) + except TypeError: + fn = lambda s, m, *a, **k: getattr(compat_http_server.BaseHTTPRequestHandler, m)(s, *a, **k) + self.super = MethodType(fn, self) + return self.super(meth_name, *args, **kwargs) + def log_message(self, format, *args): pass + def _headers(self): + payload = str(self.headers).encode('utf-8') + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def _redirect(self): + self.send_response(int(self.path[len('/redirect_'):])) + self.send_header('Location', '/method') + self.send_header('Content-Length', '0') + self.end_headers() + + def _method(self, method, payload=None): + self.send_response(200) + self.send_header('Content-Length', str(len(payload or ''))) + self.send_header('Method', method) + self.end_headers() + if payload: + self.wfile.write(payload) + + def _status(self, status): + payload = '{0} NOT FOUND'.format(status).encode('utf-8') + self.send_response(int(status)) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def _read_data(self): + if 'Content-Length' in self.headers: + return self.rfile.read(int(self.headers['Content-Length'])) + + def _test_url(self, path, host='127.0.0.1', scheme='http', port=None): + return '{0}://{1}:{2}/{3}'.format( + scheme, host, + port if port is not None + else http_server_port(self.server), path) + + def do_POST(self): + data = self._read_data() + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('POST', data) + elif self.path.startswith('/headers'): + self._headers() + else: + self._status(404) + + def do_HEAD(self): + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('HEAD') + else: + self._status(404) + + def do_PUT(self): + data = self._read_data() + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('PUT', data) + else: + self._status(404) + def do_GET(self): + + def respond(payload=b'