diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a609f3704..93562afd7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,81 +1,476 @@ name: CI -on: [push, pull_request] + +env: + all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 3.10, 3.11, 3.12 + main-cpython-versions: 2.7, 3.2, 3.5, 3.9, 3.11 + pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7 + cpython-versions: main + test-set: core + # Python beta version to be built using pyenv before setup-python support + # Must also be included in all-cpython-versions + next: 3.13 + +on: + push: + # push inputs aren't known to GitHub + inputs: + cpython-versions: + type: string + default: all + test-set: + type: string + default: core + pull_request: + # pull_request inputs aren't known to GitHub + inputs: + cpython-versions: + type: string + default: main + test-set: + type: string + default: both + workflow_dispatch: + inputs: + cpython-versions: + type: choice + description: CPython versions (main = 2.7, 3.2, 3.5, 3.9, 3.11) + options: + - all + - main + required: true + default: main + test-set: + type: choice + description: core, download + options: + - both + - core + - download + required: true + default: both + +permissions: + contents: read + jobs: + select: + name: Select tests from inputs + runs-on: ubuntu-latest + outputs: + cpython-versions: ${{ steps.run.outputs.cpython-versions }} + test-set: ${{ steps.run.outputs.test-set }} + own-pip-versions: ${{ steps.run.outputs.own-pip-versions }} + steps: + # push and pull_request inputs aren't known to GitHub (pt3) + - name: Set push defaults + if: ${{ github.event_name == 'push' }} + env: + cpython-versions: all + test-set: core + run: | + echo "cpython-versions=${{env.cpython-versions}}" >> "$GITHUB_ENV" + echo "test_set=${{env.test_set}}" >> "$GITHUB_ENV" + - name: Get pull_request inputs + if: ${{ github.event_name == 'pull_request' }} + env: + cpython-versions: main + test-set: both + run: | + echo "cpython-versions=${{env.cpython-versions}}" >> "$GITHUB_ENV" + echo "test_set=${{env.test_set}}" >> "$GITHUB_ENV" + - name: Make version array + id: run + run: | + # Make a JSON Array from comma/space-separated string (no extra escaping) + json_list() { \ + ret=""; IFS="${IFS},"; set -- $*; \ + for a in "$@"; do \ + ret=$(printf '%s"%s"' "${ret}${ret:+, }" "$a"); \ + done; \ + printf '[%s]' "$ret"; } + tests="${{ inputs.test-set || env.test-set }}" + [ $tests = both ] && tests="core download" + printf 'test-set=%s\n' "$(json_list $tests)" >> "$GITHUB_OUTPUT" + versions="${{ inputs.cpython-versions || env.cpython-versions }}" + if [ "$versions" = all ]; then \ + versions="${{ env.all-cpython-versions }}"; else \ + versions="${{ env.main-cpython-versions }}"; \ + fi + printf 'cpython-versions=%s\n' \ + "$(json_list ${versions}${versions:+, }${{ env.pypy-versions }})" >> "$GITHUB_OUTPUT" + # versions with a special get-pip.py in a per-version subdirectory + printf 'own-pip-versions=%s\n' \ + "$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT" + tests: - name: Tests + name: Run tests + needs: select + permissions: + contents: read + packages: write runs-on: ${{ matrix.os }} + env: + PIP: python -m pip + PIP_DISABLE_PIP_VERSION_CHECK: true + PIP_NO_PYTHON_VERSION_WARNING: true strategy: fail-fast: true matrix: - os: [ubuntu-18.04] - # TODO: python 2.6 - python-version: [2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7] + os: [ubuntu-20.04] + python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }} python-impl: [cpython] - ytdl-test-set: [core, download] + ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }} run-tests-ext: [sh] include: - # python 3.2 is only available on windows via setup-python - os: windows-2019 - python-version: 3.2 + python-version: 3.4 python-impl: cpython - ytdl-test-set: core + ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }} run-tests-ext: bat - os: windows-2019 - python-version: 3.2 + python-version: 3.4 python-impl: cpython - ytdl-test-set: download + ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }} run-tests-ext: bat # jython - - os: ubuntu-18.04 + - os: ubuntu-20.04 + python-version: 2.7 python-impl: jython - ytdl-test-set: core + ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }} run-tests-ext: sh - - os: ubuntu-18.04 + - os: ubuntu-20.04 + python-version: 2.7 python-impl: jython - ytdl-test-set: download + ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }} run-tests-ext: sh steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - if: ${{ matrix.python-impl == 'cpython' }} + - name: Prepare Linux + if: ${{ startswith(matrix.os, 'ubuntu') }} + shell: bash + run: | + # apt in runner, if needed, may not be up-to-date + sudo apt-get update + - name: Checkout + uses: actions/checkout@v3 + #-------- Python 3 ----- + - name: Set up supported Python ${{ matrix.python-version }} + id: setup-python + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != env.next }} + # wrap broken actions/setup-python@v4 + # NB may run apt-get install in Linux + uses: ytdl-org/setup-python@v1 with: python-version: ${{ matrix.python-version }} + cache-build: true + allow-build: info + - name: Locate supported Python ${{ matrix.python-version }} + if: ${{ env.pythonLocation }} + shell: bash + run: | + echo "PYTHONHOME=${pythonLocation}" >> "$GITHUB_ENV" + export expected="${{ steps.setup-python.outputs.python-path }}" + dirname() { printf '%s\n' \ + 'import os, sys' \ + 'print(os.path.dirname(sys.argv[1]))' \ + | ${expected} - "$1"; } + expd="$(dirname "$expected")" + export python="$(command -v python)" + [ "$expd" = "$(dirname "$python")" ] || echo "PATH=$expd:${PATH}" >> "$GITHUB_ENV" + [ -x "$python" ] || printf '%s\n' \ + 'import os' \ + 'exp = os.environ["expected"]' \ + 'python = os.environ["python"]' \ + 'exps = os.path.split(exp)' \ + 'if python and (os.path.dirname(python) == exp[0]):' \ + ' exit(0)' \ + 'exps[1] = "python" + os.path.splitext(exps[1])[1]' \ + 'python = os.path.join(*exps)' \ + 'try:' \ + ' os.symlink(exp, python)' \ + 'except AttributeError:' \ + ' os.rename(exp, python)' \ + | ${expected} - + printf '%s\n' \ + 'import sys' \ + 'print(sys.path)' \ + | ${expected} - + #-------- Python next (was 3.12) - + - name: Set up CPython 3.next environment + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }} + shell: bash + run: | + PYENV_ROOT=$HOME/.local/share/pyenv + echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV" + - name: Cache Python 3.next + id: cachenext + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }} + uses: actions/cache@v3 + with: + key: python-${{ env.next }} + path: | + ${{ env.PYENV_ROOT }} + - name: Build and set up Python 3.next + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next && ! steps.cachenext.outputs.cache-hit }} + # dl and build locally + shell: bash + run: | + # Install build environment + sudo apt-get install -y build-essential llvm libssl-dev tk-dev \ + libncursesw5-dev libreadline-dev libsqlite3-dev \ + libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev + # Download PyEnv from its GitHub repository. + export PYENV_ROOT=${{ env.PYENV_ROOT }} + export PATH=$PYENV_ROOT/bin:$PATH + git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT" + pyenv install ${{ env.next }} + - name: Locate Python 3.next + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }} + shell: bash + run: | + PYTHONHOME="$(echo "${{ env.PYENV_ROOT }}/versions/${{ env.next }}."*)" + test -n "$PYTHONHOME" + echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV" + echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV" + #-------- Python 2.7 -- + - name: Set up Python 2.7 + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.7' }} + # install 2.7 + shell: bash + run: | + sudo apt-get install -y python2 python-is-python2 + echo "PYTHONHOME=/usr" >> "$GITHUB_ENV" + #-------- Python 2.6 -- + - name: Set up Python 2.6 environment + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }} + shell: bash + run: | + openssl_name=openssl-1.0.2u + echo "openssl_name=${openssl_name}" >> "$GITHUB_ENV" + openssl_dir=$HOME/.local/opt/$openssl_name + echo "openssl_dir=${openssl_dir}" >> "$GITHUB_ENV" + PYENV_ROOT=$HOME/.local/share/pyenv + echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV" + sudo apt-get install -y openssl ca-certificates + - name: Cache Python 2.6 + id: cache26 + if: ${{ matrix.python-version == '2.6' }} + uses: actions/cache@v3 + with: + key: python-2.6.9 + path: | + ${{ env.openssl_dir }} + ${{ env.PYENV_ROOT }} + - name: Build and set up Python 2.6 + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }} + # dl and build locally + shell: bash + run: | + # Install build environment + sudo apt-get install -y build-essential llvm libssl-dev tk-dev \ + libncursesw5-dev libreadline-dev libsqlite3-dev \ + libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev + # Download and install OpenSSL 1.0.2, back in time + openssl_name=${{ env.openssl_name }} + openssl_targz=${openssl_name}.tar.gz + openssl_dir=${{ env.openssl_dir }} + openssl_inc=$openssl_dir/include + openssl_lib=$openssl_dir/lib + openssl_ssl=$openssl_dir/ssl + curl -L "https://www.openssl.org/source/$openssl_targz" -o $openssl_targz + tar -xf $openssl_targz + ( cd $openssl_name; \ + ./config --prefix=$openssl_dir --openssldir=${openssl_dir}/ssl \ + --libdir=lib -Wl,-rpath=${openssl_dir}/lib shared zlib-dynamic && \ + make && \ + make install ) + rm -rf $openssl_name + rmdir $openssl_ssl/certs && ln -s /etc/ssl/certs $openssl_ssl/certs + # Download PyEnv from its GitHub repository. + export PYENV_ROOT=${{ env.PYENV_ROOT }} + export PATH=$PYENV_ROOT/bin:$PATH + git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT" + # Prevent pyenv build trying (and failing) to update pip + export GET_PIP=get-pip-2.6.py + echo 'import sys; sys.exit(0)' > ${GET_PIP} + GET_PIP=$(realpath $GET_PIP) + # Build and install Python + export CFLAGS="-I$openssl_inc" + export LDFLAGS="-L$openssl_lib" + export LD_LIBRARY_PATH="$openssl_lib" + pyenv install 2.6.9 + - name: Locate Python 2.6 + if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }} + shell: bash + run: | + PYTHONHOME="${{ env.PYENV_ROOT }}/versions/2.6.9" + echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV" + echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${{ env.openssl_dir }}/lib${LD_LIBRARY_PATH:+:}${LD_LIBRARY_PATH}" >> "$GITHUB_ENV" + #-------- Jython ------ - name: Set up Java 8 if: ${{ matrix.python-impl == 'jython' }} - uses: actions/setup-java@v1 + uses: actions/setup-java@v3 with: java-version: 8 + distribution: 'zulu' + - name: Setup Jython environment + if: ${{ matrix.python-impl == 'jython' }} + shell: bash + run: | + echo "JYTHON_ROOT=${HOME}/jython" >> "$GITHUB_ENV" + echo "PIP=pip" >> "$GITHUB_ENV" + - name: Cache Jython + id: cachejy + if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' }} + uses: actions/cache@v3 + with: + # 2.7.3 now available, may solve SNI issue + key: jython-2.7.1 + path: | + ${{ env.JYTHON_ROOT }} - name: Install Jython - if: ${{ matrix.python-impl == 'jython' }} + if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' && ! steps.cachejy.outputs.cache-hit }} + shell: bash run: | - wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar - java -jar jython-installer.jar -s -d "$HOME/jython" - echo "$HOME/jython/bin" >> $GITHUB_PATH - - name: Install nose - if: ${{ matrix.python-impl != 'jython' }} - run: pip install nose - - name: Install nose (Jython) - if: ${{ matrix.python-impl == 'jython' }} - # Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) + JYTHON_ROOT="${{ env.JYTHON_ROOT }}" + curl -L "https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar" -o jython-installer.jar + java -jar jython-installer.jar -s -d "${JYTHON_ROOT}" + echo "${JYTHON_ROOT}/bin" >> "$GITHUB_PATH" + - name: Set up cached Jython + if: ${{ steps.cachejy.outputs.cache-hit }} + shell: bash run: | - wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl - pip install nose-1.3.7-py2-none-any.whl + JYTHON_ROOT="${{ env.JYTHON_ROOT }}" + echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH + - name: Install supporting Python 2.7 if possible + if: ${{ steps.cachejy.outputs.cache-hit }} + shell: bash + run: | + sudo apt-get install -y python2.7 || true + #-------- pip --------- + - name: Set up supported Python ${{ matrix.python-version }} pip + if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }} + # This step may run in either Linux or Windows + shell: bash + run: | + echo "$PATH" + echo "$PYTHONHOME" + # curl is available on both Windows and Linux, -L follows redirects, -O gets name + python -m ensurepip || python -m pip --version || { \ + get_pip="${{ contains(needs.select.outputs.own-pip-versions, matrix.python-version) && format('{0}/', matrix.python-version) || '' }}"; \ + curl -L -O "https://bootstrap.pypa.io/pip/${get_pip}get-pip.py"; \ + python get-pip.py; } + - name: Set up Python 2.6 pip + if: ${{ matrix.python-version == '2.6' }} + shell: bash + run: | + python -m pip --version || { \ + curl -L -O "https://bootstrap.pypa.io/pip/2.6/get-pip.py"; \ + curl -L -O "https://files.pythonhosted.org/packages/ac/95/a05b56bb975efa78d3557efa36acaf9cf5d2fd0ee0062060493687432e03/pip-9.0.3-py2.py3-none-any.whl"; \ + python get-pip.py --no-setuptools --no-wheel pip-9.0.3-py2.py3-none-any.whl; } + # work-around to invoke pip module on 2.6: https://bugs.python.org/issue2751 + echo "PIP=python -m pip.__main__" >> "$GITHUB_ENV" + - name: Set up other Python ${{ matrix.python-version }} pip + if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }} + shell: bash + run: | + python -m pip --version || { \ + curl -L -O "https://bootstrap.pypa.io/pip/3.2/get-pip.py"; \ + curl -L -O "https://files.pythonhosted.org/packages/b2/d0/cd115fe345dd6f07ec1c780020a7dfe74966fceeb171e0f20d1d4905b0b7/pip-7.1.2-py2.py3-none-any.whl"; \ + python get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; } + #-------- unittest ---- + - name: Upgrade Unittest for Python 2.6 + if: ${{ matrix.python-version == '2.6' }} + shell: bash + run: | + # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) + $PIP -qq show unittest2 || { \ + for u in "65/26/32b8464df2a97e6dd1b656ed26b2c194606c16fe163c695a992b36c11cdf/six-1.13.0-py2.py3-none-any.whl" \ + "f2/94/3af39d34be01a24a6e65433d19e107099374224905f1e0cc6bbe1fd22a2f/argparse-1.4.0-py2.py3-none-any.whl" \ + "c7/a3/c5da2a44c85bfbb6eebcfc1dde24933f8704441b98fdde6528f4831757a6/linecache2-1.0.0-py2.py3-none-any.whl" \ + "17/0a/6ac05a3723017a967193456a2efa0aa9ac4b51456891af1e2353bb9de21e/traceback2-1.4.0-py2.py3-none-any.whl" \ + "72/20/7f0f433060a962200b7272b8c12ba90ef5b903e218174301d0abfd523813/unittest2-1.1.0-py2.py3-none-any.whl"; do \ + curl -L -O "https://files.pythonhosted.org/packages/${u}"; \ + $PIP install ${u##*/}; \ + done; } + # make tests use unittest2 + for test in ./test/test_*.py ./test/helper.py; do + sed -r -i -e '/^import unittest$/s/test/test2 as unittest/' "$test" + done + #-------- nose -------- + - name: Install nose for Python ${{ matrix.python-version }} + if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || (matrix.python-impl == 'cpython' && (matrix.python-version == '2.7' || matrix.python-version == env.next)) }} + shell: bash + run: | + echo "$PATH" + echo "$PYTHONHOME" + # Use PyNose for recent Pythons instead of Nose + py3ver="${{ matrix.python-version }}" + py3ver=${py3ver#3.} + [ "$py3ver" != "${{ matrix.python-version }}" ] && py3ver=${py3ver%.*} || py3ver=0 + [ "$py3ver" -ge 9 ] && nose=pynose || nose=nose + $PIP -qq show $nose || $PIP install $nose + - name: Install nose for other Python 2 + if: ${{ matrix.python-impl == 'jython' || (matrix.python-impl == 'cpython' && matrix.python-version == '2.6') }} + shell: bash + run: | + # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb) + $PIP -qq show nose || { \ + curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \ + $PIP install nose-1.3.7-py2-none-any.whl; } + - name: Install nose for other Python 3 + if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }} + shell: bash + run: | + $PIP -qq show nose || { \ + curl -L -O "https://files.pythonhosted.org/packages/15/d8/dd071918c040f50fa1cf80da16423af51ff8ce4a0f2399b7bf8de45ac3d9/nose-1.3.7-py3-none-any.whl"; \ + $PIP install nose-1.3.7-py3-none-any.whl; } + - name: Set up nosetest test + if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }} + shell: bash + run: | + # set PYTHON_VER + PYTHON_VER=${{ matrix.python-version }} + [ "${PYTHON_VER#*-}" != "$PYTHON_VER" ] || PYTHON_VER="${{ matrix.python-impl }}-${PYTHON_VER}" + echo "PYTHON_VER=$PYTHON_VER" >> "$GITHUB_ENV" + echo "PYTHON_IMPL=${{ matrix.python-impl }}" >> "$GITHUB_ENV" + # define a test to validate the Python version used by nosetests + printf '%s\n' \ + 'from __future__ import unicode_literals' \ + 'import sys, os, platform' \ + 'try:' \ + ' import unittest2 as unittest' \ + 'except ImportError:' \ + ' import unittest' \ + 'class TestPython(unittest.TestCase):' \ + ' def setUp(self):' \ + ' self.ver = os.environ["PYTHON_VER"].split("-")' \ + ' def test_python_ver(self):' \ + ' self.assertEqual(["%d" % v for v in sys.version_info[:2]], self.ver[-1].split(".")[:2])' \ + ' self.assertTrue(sys.version.startswith(self.ver[-1]))' \ + ' self.assertIn(self.ver[0], ",".join((sys.version, platform.python_implementation())).lower())' \ + ' def test_python_impl(self):' \ + ' self.assertIn(platform.python_implementation().lower(), (os.environ["PYTHON_IMPL"], self.ver[0]))' \ + > test/test_python.py + #-------- TESTS ------- - name: Run tests + if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }} continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }} env: YTDL_TEST_SET: ${{ matrix.ytdl-test-set }} - run: ./devscripts/run_tests.${{ matrix.run-tests-ext }} + run: | + ./devscripts/run_tests.${{ matrix.run-tests-ext }} flake8: name: Linter runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: 3.9 - name: Install flake8 run: pip install flake8 - name: Run flake8 run: flake8 . + diff --git a/README.md b/README.md index cd888c731..47e686f84 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.ex You can also use pip: sudo -H pip install --upgrade youtube-dl - + This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information. macOS users can install youtube-dl with [Homebrew](https://brew.sh/): @@ -563,7 +563,7 @@ The basic usage is not to set any template arguments when downloading a single f - `is_live` (boolean): Whether this video is a live stream or a fixed-length video - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL - - `format` (string): A human-readable description of the format + - `format` (string): A human-readable description of the format - `format_id` (string): Format code specified by `--format` - `format_note` (string): Additional info about the format - `width` (numeric): Width of the video @@ -632,7 +632,7 @@ To use percent literals in an output template use `%%`. To output to stdout use The current default template is `%(title)s-%(id)s.%(ext)s`. -In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title: +In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title. #### Output template and Windows batch files @@ -675,7 +675,7 @@ The general syntax for format selection is `--format FORMAT` or shorter `-f FORM **tl;dr:** [navigate me to examples](#format-selection-examples). -The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. +The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file. @@ -760,7 +760,7 @@ Videos can be filtered by their upload date using the options `--date`, `--dateb - Absolute dates: Dates in the format `YYYYMMDD`. - Relative dates: Dates in the format `(now|today)[+-][0-9](day|week|month|year)(s)?` - + Examples: ```bash @@ -918,7 +918,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. -In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox). +In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt LOCALLY](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox). Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. @@ -1000,6 +1000,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file python test/test_download.py nosetests +For Python versions 3.6 and later, you can use [pynose](https://pypi.org/project/pynose/) to implement `nosetests`. The original [nose](https://pypi.org/project/nose/) has not been upgraded for 3.10 and later. + See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. If you want to create a build of youtube-dl yourself, you'll need @@ -1091,7 +1093,7 @@ In any case, thank you very much for your contributions! ## youtube-dl coding conventions -This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code. +This section introduces guidelines for writing idiomatic, robust and future-proof extractor code. Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all. @@ -1114,7 +1116,7 @@ Say you have some source dictionary `meta` that you've fetched as JSON with HTTP ```python meta = self._download_json(url, video_id) ``` - + Assume at this point `meta`'s layout is: ```python @@ -1158,7 +1160,7 @@ description = self._search_regex( ``` On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present. - + ### Provide fallbacks When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable. @@ -1206,7 +1208,7 @@ r'(id|ID)=(?P\d+)' #### Make regular expressions relaxed and flexible When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on. - + ##### Example Say you need to extract `title` from the following HTML code: @@ -1230,7 +1232,7 @@ title = self._search_regex( webpage, 'title', group='title') ``` -Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute: +Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute: The code definitely should not look like: @@ -1331,27 +1333,114 @@ Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`] Use `url_or_none` for safe URL processing. -Use `try_get` for safe metadata extraction from parsed JSON. +Use `traverse_obj` for safe metadata extraction from parsed JSON. -Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. +Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. #### More examples ##### Safely extract optional description from parsed JSON + +When processing complex JSON, as often returned by site API requests or stashed in web pages for "hydration", you can use the `traverse_obj()` utility function to handle multiple fallback values and to ensure the expected type of metadata items. The function's docstring defines how the function works: also review usage in the codebase for more examples. + +In this example, a text `description`, or `None`, is pulled from the `.result.video[0].summary` member of the parsed JSON `response`, if available. + +```python +description = traverse_obj(response, ('result', 'video', 0, 'summary', T(compat_str))) +``` +`T(...)` is a shorthand for a set literal; if you hate people who still run Python 2.6, `T(type_or_transformation)` could be written as a set literal `{type_or_transformation}`. + +Some extractors use the older and less capable `try_get()` function in the same way. + ```python description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str) ``` ##### Safely extract more optional metadata + +In this example, various optional metadata values are extracted from the `.result.video[0]` member of the parsed JSON `response`, which is expected to be a JS object, parsed into a `dict`, with no crash if that isn't so, or if any of the target values are missing or invalid. + ```python -video = try_get(response, lambda x: x['result']['video'][0], dict) or {} +video = traverse_obj(response, ('result', 'video', 0, T(dict))) or {} +# formerly: +# video = try_get(response, lambda x: x['result']['video'][0], dict) or {} description = video.get('summary') duration = float_or_none(video.get('durationMs'), scale=1000) view_count = int_or_none(video.get('views')) ``` +#### Safely extract nested lists + +Suppose you've extracted JSON like this into a Python data structure named `media_json` using, say, the `_download_json()` or `_parse_json()` methods of `InfoExtractor`: +```json +{ + "title": "Example video", + "comment": "try extracting this", + "media": [{ + "type": "bad", + "size": 320, + "url": "https://some.cdn.site/bad.mp4" + }, { + "type": "streaming", + "url": "https://some.cdn.site/hls.m3u8" + }, { + "type": "super", + "size": 1280, + "url": "https://some.cdn.site/good.webm" + }], + "moreStuff": "more values", + ... +} +``` + +Then extractor code like this can collect the various fields of the JSON: +```python +... +from ..utils import ( + determine_ext, + int_or_none, + T, + traverse_obj, + txt_or_none, + url_or_none, +) +... + ... + info_dict = {} + # extract title and description if valid and not empty + info_dict.update(traverse_obj(media_json, { + 'title': ('title', T(txt_or_none)), + 'description': ('comment', T(txt_or_none)), + })) + + # extract any recognisable media formats + fmts = [] + # traverse into "media" list, extract `dict`s with desired keys + for fmt in traverse_obj(media_json, ('media', Ellipsis, { + 'format_id': ('type', T(txt_or_none)), + 'url': ('url', T(url_or_none)), + 'width': ('size', T(int_or_none)), })): + # bad `fmt` values were `None` and removed + if 'url' not in fmt: + continue + fmt_url = fmt['url'] # known to be valid URL + ext = determine_ext(fmt_url) + if ext == 'm3u8': + fmts.extend(self._extract_m3u8_formats(fmt_url, video_id, 'mp4', fatal=False)) + else: + fmt['ext'] = ext + fmts.append(fmt) + + # sort, raise if no formats + self._sort_formats(fmts) + + info_dict['formats'] = fmts + ... +``` +The extractor raises an exception rather than random crashes if the JSON structure changes so that no formats are found. + # EMBEDDING YOUTUBE-DL youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new). @@ -1408,7 +1497,11 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl: # BUGS -Bugs and suggestions should be reported at: . Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)). +Bugs and suggestions should be reported in the issue tracker: ( is an alias for this). Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)). + +## Opening a bug report or suggestion + +Be sure to follow instructions provided **below** and **in the issue tracker**. Complete the appropriate issue template fully. Consider whether your problem is covered by an existing issue: if so, follow the discussion there. Avoid commenting on existing duplicate issues as such comments do not add to the discussion of the issue and are liable to be treated as spam. **Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: ``` @@ -1428,17 +1521,17 @@ $ youtube-dl -v The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever. -Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist): +Finally please review your issue to avoid various common mistakes (you can and should use this as a checklist) listed below. ### Is the description of the issue itself sufficient? -We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts. +We often get issue reports that are hard to understand. To avoid subsequent clarifications, and to assist participants who are not native English speakers, please elaborate on what feature you are requesting, or what bug you want to be fixed. -So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious +Make sure that it's obvious - What the problem is - How it could be fixed -- How your proposed solution would look like +- How your proposed solution would look If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over. @@ -1448,14 +1541,14 @@ If your server has multiple IPs or you suspect censorship, adding `--call-home` **Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL. +### Is the issue already documented? + +Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. Initially, at least, use the search term `-label:duplicate` to focus on active issues. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. + ### Are you using the latest version? Before reporting any issue, type `youtube-dl -U`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well. -### Is the issue already documented? - -Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. - ### Why are existing options not enough? Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. diff --git a/devscripts/__init__.py b/devscripts/__init__.py new file mode 100644 index 000000000..750dbdca7 --- /dev/null +++ b/devscripts/__init__.py @@ -0,0 +1 @@ +# Empty file needed to make devscripts.utils properly importable from outside diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py index 3d1391334..7db396a77 100755 --- a/devscripts/bash-completion.py +++ b/devscripts/bash-completion.py @@ -5,8 +5,12 @@ import os from os.path import dirname as dirn import sys -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + import youtube_dl +from youtube_dl.compat import compat_open as open + +from utils import read_file BASH_COMPLETION_FILE = "youtube-dl.bash-completion" BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in" @@ -18,9 +22,8 @@ def build_completion(opt_parser): for option in group.option_list: # for every long flag opts_flag.append(option.get_opt_string()) - with open(BASH_COMPLETION_TEMPLATE) as f: - template = f.read() - with open(BASH_COMPLETION_FILE, "w") as f: + template = read_file(BASH_COMPLETION_TEMPLATE) + with open(BASH_COMPLETION_FILE, "w", encoding='utf-8') as f: # just using the special char filled_template = template.replace("{{flags}}", " ".join(opts_flag)) f.write(filled_template) diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py new file mode 100755 index 000000000..9fb1d2ba8 --- /dev/null +++ b/devscripts/cli_to_api.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python +# coding: utf-8 + +from __future__ import unicode_literals + +""" +This script displays the API parameters corresponding to a yt-dl command line + +Example: +$ ./cli_to_api.py -f best +{u'format': 'best'} +$ +""" + +# Allow direct execution +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import youtube_dl +from types import MethodType + + +def cli_to_api(*opts): + YDL = youtube_dl.YoutubeDL + + # to extract the parsed options, break out of YoutubeDL instantiation + + # return options via this Exception + class ParseYTDLResult(Exception): + def __init__(self, result): + super(ParseYTDLResult, self).__init__('result') + self.opts = result + + # replacement constructor that raises ParseYTDLResult + def ytdl_init(ydl, ydl_opts): + super(YDL, ydl).__init__(ydl_opts) + raise ParseYTDLResult(ydl_opts) + + # patch in the constructor + YDL.__init__ = MethodType(ytdl_init, YDL) + + # core parser + def parsed_options(argv): + try: + youtube_dl._real_main(list(argv)) + except ParseYTDLResult as result: + return result.opts + + # from https://github.com/yt-dlp/yt-dlp/issues/5859#issuecomment-1363938900 + default = parsed_options([]) + + def neq_opt(a, b): + if a == b: + return False + if a is None and repr(type(object)).endswith(".utils.DateRange'>"): + return '0001-01-01 - 9999-12-31' != '{0}'.format(b) + return a != b + + diff = dict((k, v) for k, v in parsed_options(opts).items() if neq_opt(default[k], v)) + if 'postprocessors' in diff: + diff['postprocessors'] = [pp for pp in diff['postprocessors'] if pp not in default['postprocessors']] + return diff + + +def main(): + from pprint import PrettyPrinter + + pprint = PrettyPrinter() + super_format = pprint.format + + def format(object, context, maxlevels, level): + if repr(type(object)).endswith(".utils.DateRange'>"): + return '{0}: {1}>'.format(repr(object)[:-2], object), True, False + return super_format(object, context, maxlevels, level) + + pprint.format = format + + pprint.pprint(cli_to_api(*sys.argv)) + + +if __name__ == '__main__': + main() diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py index 2ddfa1096..320bcfc27 100644 --- a/devscripts/create-github-release.py +++ b/devscripts/create-github-release.py @@ -1,7 +1,6 @@ #!/usr/bin/env python from __future__ import unicode_literals -import io import json import mimetypes import netrc @@ -10,7 +9,9 @@ import os import re import sys -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) from youtube_dl.compat import ( compat_basestring, @@ -22,6 +23,7 @@ from youtube_dl.utils import ( make_HTTPS_handler, sanitized_Request, ) +from utils import read_file class GitHubReleaser(object): @@ -89,8 +91,7 @@ def main(): changelog_file, version, build_path = args - with io.open(changelog_file, encoding='utf-8') as inf: - changelog = inf.read() + changelog = read_file(changelog_file) mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog) body = mobj.group(1) if mobj else '' diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py index 51d19dd33..ef8a39e0b 100755 --- a/devscripts/fish-completion.py +++ b/devscripts/fish-completion.py @@ -6,10 +6,13 @@ import os from os.path import dirname as dirn import sys -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + import youtube_dl from youtube_dl.utils import shell_quote +from utils import read_file, write_file + FISH_COMPLETION_FILE = 'youtube-dl.fish' FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in' @@ -38,11 +41,9 @@ def build_completion(opt_parser): complete_cmd.extend(EXTRA_ARGS.get(long_option, [])) commands.append(shell_quote(complete_cmd)) - with open(FISH_COMPLETION_TEMPLATE) as f: - template = f.read() + template = read_file(FISH_COMPLETION_TEMPLATE) filled_template = template.replace('{{commands}}', '\n'.join(commands)) - with open(FISH_COMPLETION_FILE, 'w') as f: - f.write(filled_template) + write_file(FISH_COMPLETION_FILE, filled_template) parser = youtube_dl.parseOpts()[0] diff --git a/devscripts/gh-pages/add-version.py b/devscripts/gh-pages/add-version.py index 867ea0048..b84908f85 100755 --- a/devscripts/gh-pages/add-version.py +++ b/devscripts/gh-pages/add-version.py @@ -6,16 +6,21 @@ import sys import hashlib import os.path +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__))))) + +from devscripts.utils import read_file, write_file +from youtube_dl.compat import compat_open as open if len(sys.argv) <= 1: print('Specify the version number as parameter') sys.exit() version = sys.argv[1] -with open('update/LATEST_VERSION', 'w') as f: - f.write(version) +write_file('update/LATEST_VERSION', version) -versions_info = json.load(open('update/versions.json')) +versions_info = json.loads(read_file('update/versions.json')) if 'signature' in versions_info: del versions_info['signature'] @@ -39,5 +44,5 @@ for key, filename in filenames.items(): versions_info['versions'][version] = new_version versions_info['latest'] = version -with open('update/versions.json', 'w') as jsonf: - json.dump(versions_info, jsonf, indent=4, sort_keys=True) +with open('update/versions.json', 'w', encoding='utf-8') as jsonf: + json.dumps(versions_info, jsonf, indent=4, sort_keys=True) diff --git a/devscripts/gh-pages/generate-download.py b/devscripts/gh-pages/generate-download.py index a873d32ee..3e38e9299 100755 --- a/devscripts/gh-pages/generate-download.py +++ b/devscripts/gh-pages/generate-download.py @@ -2,14 +2,21 @@ from __future__ import unicode_literals import json +import os.path +import sys -versions_info = json.load(open('update/versions.json')) +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) + +from utils import read_file, write_file + +versions_info = json.loads(read_file('update/versions.json')) version = versions_info['latest'] version_dict = versions_info['versions'][version] # Read template page -with open('download.html.in', 'r', encoding='utf-8') as tmplf: - template = tmplf.read() +template = read_file('download.html.in') template = template.replace('@PROGRAM_VERSION@', version) template = template.replace('@PROGRAM_URL@', version_dict['bin'][0]) @@ -18,5 +25,5 @@ template = template.replace('@EXE_URL@', version_dict['exe'][0]) template = template.replace('@EXE_SHA256SUM@', version_dict['exe'][1]) template = template.replace('@TAR_URL@', version_dict['tar'][0]) template = template.replace('@TAR_SHA256SUM@', version_dict['tar'][1]) -with open('download.html', 'w', encoding='utf-8') as dlf: - dlf.write(template) + +write_file('download.html', template) diff --git a/devscripts/gh-pages/update-copyright.py b/devscripts/gh-pages/update-copyright.py index 61487f925..444595c48 100755 --- a/devscripts/gh-pages/update-copyright.py +++ b/devscripts/gh-pages/update-copyright.py @@ -5,17 +5,22 @@ from __future__ import with_statement, unicode_literals import datetime import glob -import io # For Python 2 compatibility import os import re +import sys -year = str(datetime.datetime.now().year) +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__))))) + +from devscripts.utils import read_file, write_file +from youtube_dl import compat_str + +year = compat_str(datetime.datetime.now().year) for fn in glob.glob('*.html*'): - with io.open(fn, encoding='utf-8') as f: - content = f.read() + content = read_file(fn) newc = re.sub(r'(?PCopyright © 2011-)(?P[0-9]{4})', 'Copyright © 2011-' + year, content) if content != newc: tmpFn = fn + '.part' - with io.open(tmpFn, 'wt', encoding='utf-8') as outf: - outf.write(newc) + write_file(tmpFn, newc) os.rename(tmpFn, fn) diff --git a/devscripts/gh-pages/update-feed.py b/devscripts/gh-pages/update-feed.py index 506a62377..13a367d34 100755 --- a/devscripts/gh-pages/update-feed.py +++ b/devscripts/gh-pages/update-feed.py @@ -2,10 +2,16 @@ from __future__ import unicode_literals import datetime -import io import json +import os.path import textwrap +import sys +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + +from utils import write_file atom_template = textwrap.dedent("""\ @@ -72,5 +78,4 @@ for v in versions: entries_str = textwrap.indent(''.join(entries), '\t') atom_template = atom_template.replace('@ENTRIES@', entries_str) -with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file: - atom_file.write(atom_template) +write_file('update/releases.atom', atom_template) diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py index 531c93c70..06a8a474c 100755 --- a/devscripts/gh-pages/update-sites.py +++ b/devscripts/gh-pages/update-sites.py @@ -5,15 +5,17 @@ import sys import os import textwrap +dirn = os.path.dirname + # We must be able to import youtube_dl -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__))))) import youtube_dl +from devscripts.utils import read_file, write_file def main(): - with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf: - template = tmplf.read() + template = read_file('supportedsites.html.in') ie_htmls = [] for ie in youtube_dl.list_extractors(age_limit=None): @@ -29,8 +31,7 @@ def main(): template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t')) - with open('supportedsites.html', 'w', encoding='utf-8') as sitesf: - sitesf.write(template) + write_file('supportedsites.html', template) if __name__ == '__main__': diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py index 226d1a5d6..5a9eb194f 100755 --- a/devscripts/make_contributing.py +++ b/devscripts/make_contributing.py @@ -1,10 +1,11 @@ #!/usr/bin/env python from __future__ import unicode_literals -import io import optparse import re +from utils import read_file, write_file + def main(): parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') @@ -14,8 +15,7 @@ def main(): infile, outfile = args - with io.open(infile, encoding='utf-8') as inf: - readme = inf.read() + readme = read_file(infile) bug_text = re.search( r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1) @@ -25,8 +25,7 @@ def main(): out = bug_text + dev_text - with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(out) + write_file(outfile, out) if __name__ == '__main__': diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index b7ad23d83..65fa8169f 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -1,8 +1,11 @@ #!/usr/bin/env python from __future__ import unicode_literals -import io import optparse +import os.path +import sys + +from utils import read_file, read_version, write_file def main(): @@ -13,17 +16,11 @@ def main(): infile, outfile = args - with io.open(infile, encoding='utf-8') as inf: - issue_template_tmpl = inf.read() + issue_template_tmpl = read_file(infile) - # Get the version from youtube_dl/version.py without importing the package - exec(compile(open('youtube_dl/version.py').read(), - 'youtube_dl/version.py', 'exec')) + out = issue_template_tmpl % {'version': read_version()} - out = issue_template_tmpl % {'version': locals()['__version__']} - - with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(out) + write_file(outfile, out) if __name__ == '__main__': main() diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 878ae72b1..5b8b123a4 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -1,28 +1,49 @@ from __future__ import unicode_literals, print_function from inspect import getsource -import io import os from os.path import dirname as dirn +import re import sys print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr) -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) lazy_extractors_filename = sys.argv[1] if os.path.exists(lazy_extractors_filename): os.remove(lazy_extractors_filename) +# Py2: may be confused by leftover lazy_extractors.pyc +if sys.version_info[0] < 3: + for c in ('c', 'o'): + try: + os.remove(lazy_extractors_filename + 'c') + except OSError: + pass + +from devscripts.utils import read_file, write_file +from youtube_dl.compat import compat_register_utf8 + +compat_register_utf8() from youtube_dl.extractor import _ALL_CLASSES from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor -with open('devscripts/lazy_load_template.py', 'rt') as f: - module_template = f.read() +module_template = read_file('devscripts/lazy_load_template.py') + + +def get_source(m): + return re.sub(r'(?m)^\s*#.*\n', '', getsource(m)) + module_contents = [ - module_template + '\n' + getsource(InfoExtractor.suitable) + '\n', - 'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n'] + module_template, + get_source(InfoExtractor.suitable), + get_source(InfoExtractor._match_valid_url) + '\n', + 'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n', + # needed for suitable() methods of Youtube extractor (see #28780) + 'from youtube_dl.utils import parse_qs, variadic\n', +] ie_template = ''' class {name}({bases}): @@ -54,7 +75,7 @@ def build_lazy_ie(ie, name): valid_url=valid_url, module=ie.__module__) if ie.suitable.__func__ is not InfoExtractor.suitable.__func__: - s += '\n' + getsource(ie.suitable) + s += '\n' + get_source(ie.suitable) if hasattr(ie, '_make_valid_url'): # search extractors s += make_valid_template.format(valid_url=ie._make_valid_url()) @@ -94,7 +115,17 @@ for ie in ordered_cls: module_contents.append( '_ALL_CLASSES = [{0}]'.format(', '.join(names))) -module_src = '\n'.join(module_contents) + '\n' +module_src = '\n'.join(module_contents) -with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f: - f.write(module_src) +write_file(lazy_extractors_filename, module_src + '\n') + +# work around JVM byte code module limit in Jython +if sys.platform.startswith('java') and sys.version_info[:2] == (2, 7): + import subprocess + from youtube_dl.compat import compat_subprocess_get_DEVNULL + # if Python 2.7 is available, use it to compile the module for Jython + try: + # if Python 2.7 is available, use it to compile the module for Jython + subprocess.check_call(['python2.7', '-m', 'py_compile', lazy_extractors_filename], stdout=compat_subprocess_get_DEVNULL()) + except Exception: + pass diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py index 8fbce0796..7a5b04dcc 100755 --- a/devscripts/make_readme.py +++ b/devscripts/make_readme.py @@ -1,8 +1,14 @@ from __future__ import unicode_literals -import io -import sys +import os.path import re +import sys +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + +from utils import read_file +from youtube_dl.compat import compat_open as open README_FILE = 'README.md' helptext = sys.stdin.read() @@ -10,8 +16,7 @@ helptext = sys.stdin.read() if isinstance(helptext, bytes): helptext = helptext.decode('utf-8') -with io.open(README_FILE, encoding='utf-8') as f: - oldreadme = f.read() +oldreadme = read_file(README_FILE) header = oldreadme[:oldreadme.index('# OPTIONS')] footer = oldreadme[oldreadme.index('# CONFIGURATION'):] @@ -20,7 +25,7 @@ options = helptext[helptext.index(' General Options:') + 19:] options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options) options = '# OPTIONS\n' + options + '\n' -with io.open(README_FILE, 'w', encoding='utf-8') as f: +with open(README_FILE, 'w', encoding='utf-8') as f: f.write(header) f.write(options) f.write(footer) diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 764795bc5..c424d18d7 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -1,17 +1,19 @@ #!/usr/bin/env python from __future__ import unicode_literals -import io import optparse -import os +import os.path import sys - # Import youtube_dl -ROOT_DIR = os.path.join(os.path.dirname(__file__), '..') -sys.path.insert(0, ROOT_DIR) +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + import youtube_dl +from utils import write_file + def main(): parser = optparse.OptionParser(usage='%prog OUTFILE.md') @@ -38,8 +40,7 @@ def main(): ' - ' + md + '\n' for md in gen_ies_md(ies)) - with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(out) + write_file(outfile, out) if __name__ == '__main__': diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index 76bf873e1..0090ada3e 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -1,13 +1,13 @@ from __future__ import unicode_literals -import io import optparse import os.path import re +from utils import read_file, write_file + ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) README_FILE = os.path.join(ROOT_DIR, 'README.md') - PREFIX = r'''%YOUTUBE-DL(1) # NAME @@ -29,8 +29,7 @@ def main(): outfile, = args - with io.open(README_FILE, encoding='utf-8') as f: - readme = f.read() + readme = read_file(README_FILE) readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme) readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) @@ -38,8 +37,7 @@ def main(): readme = filter_options(readme) - with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(readme) + write_file(outfile, readme) def filter_options(readme): diff --git a/devscripts/utils.py b/devscripts/utils.py new file mode 100644 index 000000000..2d072d2e0 --- /dev/null +++ b/devscripts/utils.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import argparse +import functools +import os.path +import subprocess +import sys + +dirn = os.path.dirname + +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) + +from youtube_dl.compat import ( + compat_kwargs, + compat_open as open, +) + + +def read_file(fname): + with open(fname, encoding='utf-8') as f: + return f.read() + + +def write_file(fname, content, mode='w'): + with open(fname, mode, encoding='utf-8') as f: + return f.write(content) + + +def read_version(fname='youtube_dl/version.py'): + """Get the version without importing the package""" + exec(compile(read_file(fname), fname, 'exec')) + return locals()['__version__'] + + +def get_filename_args(has_infile=False, default_outfile=None): + parser = argparse.ArgumentParser() + if has_infile: + parser.add_argument('infile', help='Input file') + kwargs = {'nargs': '?', 'default': default_outfile} if default_outfile else {} + kwargs['help'] = 'Output file' + parser.add_argument('outfile', **compat_kwargs(kwargs)) + + opts = parser.parse_args() + if has_infile: + return opts.infile, opts.outfile + return opts.outfile + + +def compose_functions(*functions): + return lambda x: functools.reduce(lambda y, f: f(y), functions, x) + + +def run_process(*args, **kwargs): + kwargs.setdefault('text', True) + kwargs.setdefault('check', True) + kwargs.setdefault('capture_output', True) + if kwargs['text']: + kwargs.setdefault('encoding', 'utf-8') + kwargs.setdefault('errors', 'replace') + kwargs = compat_kwargs(kwargs) + return subprocess.run(args, **kwargs) diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py index 60aaf76cc..ebd552fcb 100755 --- a/devscripts/zsh-completion.py +++ b/devscripts/zsh-completion.py @@ -7,6 +7,8 @@ import sys sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) import youtube_dl +from utils import read_file, write_file + ZSH_COMPLETION_FILE = "youtube-dl.zsh" ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in" @@ -34,15 +36,13 @@ def build_completion(opt_parser): flags = [opt.get_opt_string() for opt in opts] - with open(ZSH_COMPLETION_TEMPLATE) as f: - template = f.read() + template = read_file(ZSH_COMPLETION_TEMPLATE) template = template.replace("{{fileopts}}", "|".join(fileopts)) template = template.replace("{{diropts}}", "|".join(diropts)) template = template.replace("{{flags}}", " ".join(flags)) - with open(ZSH_COMPLETION_FILE, "w") as f: - f.write(template) + write_file(ZSH_COMPLETION_FILE, template) parser = youtube_dl.parseOpts()[0] diff --git a/test/helper.py b/test/helper.py index c6a2f0667..5b7e3dfe2 100644 --- a/test/helper.py +++ b/test/helper.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals import errno -import io import hashlib import json import os.path @@ -9,14 +8,17 @@ import re import types import ssl import sys +import unittest import youtube_dl.extractor from youtube_dl import YoutubeDL from youtube_dl.compat import ( + compat_open as open, compat_os_name, compat_str, ) from youtube_dl.utils import ( + IDENTITY, preferredencoding, write_string, ) @@ -27,10 +29,10 @@ def get_params(override=None): "parameters.json") LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "local_parameters.json") - with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: + with open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) if os.path.exists(LOCAL_PARAMETERS_FILE): - with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: + with open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: parameters.update(json.load(pf)) if override: parameters.update(override) @@ -72,7 +74,8 @@ class FakeYDL(YoutubeDL): def to_screen(self, s, skip_eol=None): print(s) - def trouble(self, s, tb=None): + def trouble(self, *args, **kwargs): + s = args[0] if len(args) > 0 else kwargs.get('message', 'Missing message') raise Exception(s) def download(self, x): @@ -89,6 +92,17 @@ class FakeYDL(YoutubeDL): self.report_warning = types.MethodType(report_warning, self) +class FakeLogger(object): + def debug(self, msg): + pass + + def warning(self, msg): + pass + + def error(self, msg): + pass + + def gettestcases(include_onlymatching=False): for ie in youtube_dl.extractor.gen_extractors(): for tc in ie.get_testcases(include_onlymatching): @@ -128,7 +142,7 @@ def expect_value(self, got, expected, field): self.assertTrue( contains_str in got, 'field %s (value: %r) should contain %r' % (field, got, contains_str)) - elif isinstance(expected, compat_str) and re.match(r'^lambda \w+:', expected): + elif isinstance(expected, compat_str) and re.match(r'lambda \w+:', expected): fn = eval(expected) suite = expected.split(':', 1)[1].strip() self.assertTrue( @@ -286,3 +300,7 @@ def http_server_port(httpd): else: sock = httpd.socket return sock.getsockname()[1] + + +def expectedFailureIf(cond): + return unittest.expectedFailure if cond else IDENTITY diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index dd69a681b..d55d6ad54 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -3,19 +3,37 @@ from __future__ import unicode_literals # Allow direct execution -import io import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL, expect_dict, expect_value, http_server_port -from youtube_dl.compat import compat_etree_fromstring, compat_http_server -from youtube_dl.extractor.common import InfoExtractor -from youtube_dl.extractor import YoutubeIE, get_info_extractor -from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError import threading +from test.helper import ( + expect_dict, + expect_value, + FakeYDL, + http_server_port, +) +from youtube_dl.compat import ( + compat_etree_fromstring, + compat_http_server, + compat_open as open, +) +from youtube_dl.extractor.common import InfoExtractor +from youtube_dl.extractor import ( + get_info_extractor, + YoutubeIE, +) +from youtube_dl.utils import ( + encode_data_uri, + ExtractorError, + RegexNotFoundError, + strip_jsonp, +) + TEAPOT_RESPONSE_STATUS = 418 TEAPOT_RESPONSE_BODY = "

418 I'm a teapot

" @@ -35,13 +53,13 @@ class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler) assert False -class TestIE(InfoExtractor): +class DummyIE(InfoExtractor): pass class TestInfoExtractor(unittest.TestCase): def setUp(self): - self.ie = TestIE(FakeYDL()) + self.ie = DummyIE(FakeYDL()) def test_ie_key(self): self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) @@ -62,6 +80,7 @@ class TestInfoExtractor(unittest.TestCase): + ''' self.assertEqual(ie._og_search_title(html), 'Foo') self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') @@ -74,6 +93,7 @@ class TestInfoExtractor(unittest.TestCase): self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) + self.assertEqual(ie._og_search_property('test4', html), 'unquoted-value') def test_html_search_meta(self): ie = self.ie @@ -98,6 +118,71 @@ class TestInfoExtractor(unittest.TestCase): self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) + def test_search_nextjs_data(self): + html = ''' + + + + + + Test _search_nextjs_data() + + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ + + +''' + search = self.ie._search_nextjs_data(html, 'testID') + self.assertEqual(search['props']['pageProps']['video']['id'], 'testid') + + def test_search_nuxt_data(self): + html = ''' + + + + + Nuxt.js Test Page + + + + +
+

Example heading

+
+

Decoy text

+
+
+ + + + +''' + search = self.ie._search_nuxt_data(html, 'testID') + self.assertEqual(search['track']['id'], 'testid') + def test_search_json_ld_realworld(self): # https://github.com/ytdl-org/youtube-dl/issues/23306 expect_dict( @@ -346,6 +431,24 @@ class TestInfoExtractor(unittest.TestCase): }], }) + # from https://0000.studio/ + # with type attribute but without extension in URL + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://0000.studio', + r''' + + ''', None)[0], + { + 'formats': [{ + 'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92', + 'ext': 'mp4', + }], + }) + def test_extract_jwplayer_data_realworld(self): # from http://www.suffolk.edu/sjc/ expect_dict( @@ -799,8 +902,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for m3u8_file, m3u8_url, expected_formats in _TEST_CASES: - with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, + mode='r', encoding='utf-8') as f: formats = self.ie._parse_m3u8_formats( f.read(), m3u8_url, ext='mp4') self.ie._sort_formats(formats) @@ -890,7 +993,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'tbr': 5997.485, 'width': 1920, 'height': 1080, - }] + }], + {}, ), ( # https://github.com/ytdl-org/youtube-dl/pull/14844 'urls_only', @@ -973,7 +1077,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'tbr': 4400, 'width': 1920, 'height': 1080, - }] + }], + {}, ), ( # https://github.com/ytdl-org/youtube-dl/issues/20346 # Media considered unfragmented even though it contains @@ -1019,18 +1124,185 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'width': 360, 'height': 360, 'fps': 30, - }] + }], + {}, + ), ( + # https://github.com/ytdl-org/youtube-dl/issues/30235 + # Bento4 generated test mpd + # mp4dash --mpd-name=manifest.mpd --no-split --use-segment-list mediafiles + 'url_and_range', + 'http://unknown/manifest.mpd', # mpd_url + 'http://unknown/', # mpd_base_url + [{ + 'manifest_url': 'http://unknown/manifest.mpd', + 'fragment_base_url': 'http://unknown/', + 'ext': 'm4a', + 'format_id': 'audio-und-mp4a.40.2', + 'format_note': 'DASH audio', + 'container': 'm4a_dash', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'none', + 'tbr': 98.808, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'fragment_base_url': 'http://unknown/', + 'ext': 'mp4', + 'format_id': 'video-avc1', + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.4D401E', + 'tbr': 699.597, + 'width': 768, + 'height': 432 + }], + {}, + ), ( + # https://github.com/ytdl-org/youtube-dl/issues/27575 + # GPAC generated test mpd + # MP4Box -dash 10000 -single-file -out manifest.mpd mediafiles + 'range_only', + 'http://unknown/manifest.mpd', # mpd_url + 'http://unknown/', # mpd_base_url + [{ + 'manifest_url': 'http://unknown/manifest.mpd', + 'fragment_base_url': 'http://unknown/audio_dashinit.mp4', + 'ext': 'm4a', + 'format_id': '2', + 'format_note': 'DASH audio', + 'container': 'm4a_dash', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'none', + 'tbr': 98.096, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'fragment_base_url': 'http://unknown/video_dashinit.mp4', + 'ext': 'mp4', + 'format_id': '1', + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.4D401E', + 'tbr': 526.987, + 'width': 768, + 'height': 432 + }], + {}, + ), ( + 'subtitles', + 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/', + [{ + 'format_id': 'audio=128001', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'ext': 'm4a', + 'tbr': 128.001, + 'asr': 48000, + 'format_note': 'DASH audio', + 'container': 'm4a_dash', + 'vcodec': 'none', + 'acodec': 'mp4a.40.2', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', + 'protocol': 'http_dash_segments', + }, { + 'format_id': 'video=100000', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'ext': 'mp4', + 'width': 336, + 'height': 144, + 'tbr': 100, + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'vcodec': 'avc1.4D401F', + 'acodec': 'none', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', + 'protocol': 'http_dash_segments', + }, { + 'format_id': 'video=326000', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'ext': 'mp4', + 'width': 562, + 'height': 240, + 'tbr': 326, + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'vcodec': 'avc1.4D401F', + 'acodec': 'none', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', + 'protocol': 'http_dash_segments', + }, { + 'format_id': 'video=698000', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'ext': 'mp4', + 'width': 844, + 'height': 360, + 'tbr': 698, + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'vcodec': 'avc1.4D401F', + 'acodec': 'none', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', + 'protocol': 'http_dash_segments', + }, { + 'format_id': 'video=1493000', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'ext': 'mp4', + 'width': 1126, + 'height': 480, + 'tbr': 1493, + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'vcodec': 'avc1.4D401F', + 'acodec': 'none', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', + 'protocol': 'http_dash_segments', + }, { + 'format_id': 'video=4482000', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'ext': 'mp4', + 'width': 1688, + 'height': 720, + 'tbr': 4482, + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'vcodec': 'avc1.4D401F', + 'acodec': 'none', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', + 'protocol': 'http_dash_segments', + }], + { + 'en': [ + { + 'ext': 'mp4', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', + 'protocol': 'http_dash_segments', + } + ] + }, ) ] - for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES: - with io.open('./test/testdata/mpd/%s.mpd' % mpd_file, - mode='r', encoding='utf-8') as f: - formats = self.ie._parse_mpd_formats( + for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES: + with open('./test/testdata/mpd/%s.mpd' % mpd_file, + mode='r', encoding='utf-8') as f: + formats, subtitles = self.ie._parse_mpd_formats_and_subtitles( compat_etree_fromstring(f.read().encode('utf-8')), mpd_base_url=mpd_base_url, mpd_url=mpd_url) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) + expect_value(self, subtitles, expected_subtitles, None) def test_parse_f4m_formats(self): _TEST_CASES = [ @@ -1051,8 +1323,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for f4m_file, f4m_url, expected_formats in _TEST_CASES: - with io.open('./test/testdata/f4m/%s.f4m' % f4m_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/f4m/%s.f4m' % f4m_file, + mode='r', encoding='utf-8') as f: formats = self.ie._parse_f4m_formats( compat_etree_fromstring(f.read().encode('utf-8')), f4m_url, None) @@ -1099,8 +1371,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for xspf_file, xspf_url, expected_entries in _TEST_CASES: - with io.open('./test/testdata/xspf/%s.xspf' % xspf_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/xspf/%s.xspf' % xspf_file, + mode='r', encoding='utf-8') as f: entries = self.ie._parse_xspf( compat_etree_fromstring(f.read().encode('utf-8')), xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index f8c8e619c..d994682b2 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -10,14 +10,31 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import copy +import json -from test.helper import FakeYDL, assertRegexpMatches +from test.helper import ( + FakeYDL, + assertRegexpMatches, + try_rm, +) from youtube_dl import YoutubeDL -from youtube_dl.compat import compat_str, compat_urllib_error +from youtube_dl.compat import ( + compat_http_cookiejar_Cookie, + compat_http_cookies_SimpleCookie, + compat_kwargs, + compat_open as open, + compat_str, + compat_urllib_error, +) + from youtube_dl.extractor import YoutubeIE from youtube_dl.extractor.common import InfoExtractor from youtube_dl.postprocessor.common import PostProcessor -from youtube_dl.utils import ExtractorError, match_filter_func +from youtube_dl.utils import ( + ExtractorError, + match_filter_func, + traverse_obj, +) TEST_URL = 'http://localhost/sample.mp4' @@ -29,11 +46,14 @@ class YDL(FakeYDL): self.msgs = [] def process_info(self, info_dict): - self.downloaded_info_dicts.append(info_dict) + self.downloaded_info_dicts.append(info_dict.copy()) def to_screen(self, msg): self.msgs.append(msg) + def dl(self, *args, **kwargs): + assert False, 'Downloader must not be invoked for test_YoutubeDL' + def _make_result(formats, **kwargs): res = { @@ -42,8 +62,9 @@ def _make_result(formats, **kwargs): 'title': 'testttitle', 'extractor': 'testex', 'extractor_key': 'TestEx', + 'webpage_url': 'http://example.com/watch?v=shenanigans', } - res.update(**kwargs) + res.update(**compat_kwargs(kwargs)) return res @@ -681,12 +702,12 @@ class TestYoutubeDL(unittest.TestCase): class SimplePP(PostProcessor): def run(self, info): - with open(audiofile, 'wt') as f: + with open(audiofile, 'w') as f: f.write('EXAMPLE') return [info['filepath']], info def run_pp(params, PP): - with open(filename, 'wt') as f: + with open(filename, 'w') as f: f.write('EXAMPLE') ydl = YoutubeDL(params) ydl.add_post_processor(PP()) @@ -705,7 +726,7 @@ class TestYoutubeDL(unittest.TestCase): class ModifierPP(PostProcessor): def run(self, info): - with open(info['filepath'], 'wt') as f: + with open(info['filepath'], 'w') as f: f.write('MODIFIED') return [], info @@ -930,17 +951,11 @@ class TestYoutubeDL(unittest.TestCase): # Test case for https://github.com/ytdl-org/youtube-dl/issues/27064 def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self): - class _YDL(YDL): - def __init__(self, *args, **kwargs): - super(_YDL, self).__init__(*args, **kwargs) - - def trouble(self, s, tb=None): - pass - - ydl = _YDL({ + ydl = YDL({ 'format': 'extra', 'ignoreerrors': True, }) + ydl.trouble = lambda *_, **__: None class VideoIE(InfoExtractor): _VALID_URL = r'video:(?P\d+)' @@ -1017,5 +1032,160 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(out_info['release_date'], '20210930') +class TestYoutubeDLCookies(unittest.TestCase): + + @staticmethod + def encode_cookie(cookie): + if not isinstance(cookie, dict): + cookie = vars(cookie) + for name, value in cookie.items(): + yield name, compat_str(value) + + @classmethod + def comparable_cookies(cls, cookies): + # Work around cookiejar cookies not being unicode strings + return sorted(map(tuple, map(sorted, map(cls.encode_cookie, cookies)))) + + def assertSameCookies(self, c1, c2, msg=None): + return self.assertEqual( + *map(self.comparable_cookies, (c1, c2)), + msg=msg) + + def assertSameCookieStrings(self, c1, c2, msg=None): + return self.assertSameCookies( + *map(lambda c: compat_http_cookies_SimpleCookie(c).values(), (c1, c2)), + msg=msg) + + def test_header_cookies(self): + + ydl = FakeYDL() + ydl.report_warning = lambda *_, **__: None + + def cookie(name, value, version=None, domain='', path='', secure=False, expires=None): + return compat_http_cookiejar_Cookie( + version or 0, name, value, None, False, + domain, bool(domain), bool(domain), path, bool(path), + secure, expires, False, None, None, rest={}) + + test_url, test_domain = (t % ('yt.dl',) for t in ('https://%s/test', '.%s')) + + def test(encoded_cookies, cookies, headers=False, round_trip=None, error_re=None): + def _test(): + ydl.cookiejar.clear() + ydl._load_cookies(encoded_cookies, autoscope=headers) + if headers: + ydl._apply_header_cookies(test_url) + data = {'url': test_url} + ydl._calc_headers(data) + self.assertSameCookies( + cookies, ydl.cookiejar, + 'Extracted cookiejar.Cookie is not the same') + if not headers: + self.assertSameCookieStrings( + data.get('cookies'), round_trip or encoded_cookies, + msg='Cookie is not the same as round trip') + ydl.__dict__['_YoutubeDL__header_cookies'] = [] + + try: + _test() + except AssertionError: + raise + except Exception as e: + if not error_re: + raise + assertRegexpMatches(self, e.args[0], error_re.join(('.*',) * 2)) + + test('test=value; Domain=' + test_domain, [cookie('test', 'value', domain=test_domain)]) + test('test=value', [cookie('test', 'value')], error_re='Unscoped cookies are not allowed') + test('cookie1=value1; Domain={0}; Path=/test; cookie2=value2; Domain={0}; Path=/'.format(test_domain), [ + cookie('cookie1', 'value1', domain=test_domain, path='/test'), + cookie('cookie2', 'value2', domain=test_domain, path='/')]) + cookie_kw = compat_kwargs( + {'domain': test_domain, 'path': '/test', 'secure': True, 'expires': '9999999999', }) + test('test=value; Domain={domain}; Path={path}; Secure; Expires={expires}'.format(**cookie_kw), [ + cookie('test', 'value', **cookie_kw)]) + test('test="value; "; path=/test; domain=' + test_domain, [ + cookie('test', 'value; ', domain=test_domain, path='/test')], + round_trip='test="value\\073 "; Domain={0}; Path=/test'.format(test_domain)) + test('name=; Domain=' + test_domain, [cookie('name', '', domain=test_domain)], + round_trip='name=""; Domain=' + test_domain) + test('test=value', [cookie('test', 'value', domain=test_domain)], headers=True) + test('cookie1=value; Domain={0}; cookie2=value'.format(test_domain), [], + headers=True, error_re='Invalid syntax') + ydl.report_warning = ydl.report_error + test('test=value', [], headers=True, error_re='Passing cookies as a header is a potential security risk') + + def test_infojson_cookies(self): + TEST_FILE = 'test_infojson_cookies.info.json' + TEST_URL = 'https://example.com/example.mp4' + COOKIES = 'a=b; Domain=.example.com; c=d; Domain=.example.com' + COOKIE_HEADER = {'Cookie': 'a=b; c=d'} + + ydl = FakeYDL() + ydl.process_info = lambda x: ydl._write_info_json('test', x, TEST_FILE) + + def make_info(info_header_cookies=False, fmts_header_cookies=False, cookies_field=False): + fmt = {'url': TEST_URL} + if fmts_header_cookies: + fmt['http_headers'] = COOKIE_HEADER + if cookies_field: + fmt['cookies'] = COOKIES + return _make_result([fmt], http_headers=COOKIE_HEADER if info_header_cookies else None) + + def test(initial_info, note): + + def failure_msg(why): + return ' when '.join((why, note)) + + result = {} + result['processed'] = ydl.process_ie_result(initial_info) + self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL), + msg=failure_msg('No cookies set in cookiejar after initial process')) + ydl.cookiejar.clear() + with open(TEST_FILE) as infojson: + result['loaded'] = ydl.sanitize_info(json.load(infojson), True) + result['final'] = ydl.process_ie_result(result['loaded'].copy(), download=False) + self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL), + msg=failure_msg('No cookies set in cookiejar after final process')) + ydl.cookiejar.clear() + for key in ('processed', 'loaded', 'final'): + info = result[key] + self.assertIsNone( + traverse_obj(info, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False), + msg=failure_msg('Cookie header not removed in {0} result'.format(key))) + self.assertSameCookieStrings( + traverse_obj(info, ((None, ('formats', 0)), 'cookies'), get_all=False), COOKIES, + msg=failure_msg('No cookies field found in {0} result'.format(key))) + + test({'url': TEST_URL, 'http_headers': COOKIE_HEADER, 'id': '1', 'title': 'x'}, 'no formats field') + test(make_info(info_header_cookies=True), 'info_dict header cokies') + test(make_info(fmts_header_cookies=True), 'format header cookies') + test(make_info(info_header_cookies=True, fmts_header_cookies=True), 'info_dict and format header cookies') + test(make_info(info_header_cookies=True, fmts_header_cookies=True, cookies_field=True), 'all cookies fields') + test(make_info(cookies_field=True), 'cookies format field') + test({'url': TEST_URL, 'cookies': COOKIES, 'id': '1', 'title': 'x'}, 'info_dict cookies field only') + + try_rm(TEST_FILE) + + def test_add_headers_cookie(self): + def check_for_cookie_header(result): + return traverse_obj(result, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False) + + ydl = FakeYDL({'http_headers': {'Cookie': 'a=b'}}) + ydl._apply_header_cookies(_make_result([])['webpage_url']) # Scope to input webpage URL: .example.com + + fmt = {'url': 'https://example.com/video.mp4'} + result = ydl.process_ie_result(_make_result([fmt]), download=False) + self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies in result info_dict') + self.assertEqual(result.get('cookies'), 'a=b; Domain=.example.com', msg='No cookies were set in cookies field') + self.assertIn('a=b', ydl.cookiejar.get_cookie_header(fmt['url']), msg='No cookies were set in cookiejar') + + fmt = {'url': 'https://wrong.com/video.mp4'} + result = ydl.process_ie_result(_make_result([fmt]), download=False) + self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies for wrong domain') + self.assertFalse(result.get('cookies'), msg='Cookies set in cookies field for wrong domain') + self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain') + + if __name__ == '__main__': unittest.main() diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py index 05f48bd74..4f9dd71ae 100644 --- a/test/test_YoutubeDLCookieJar.py +++ b/test/test_YoutubeDLCookieJar.py @@ -46,6 +46,20 @@ class TestYoutubeDLCookieJar(unittest.TestCase): # will be ignored self.assertFalse(cookiejar._cookies) + def test_get_cookie_header(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') + cookiejar.load(ignore_discard=True, ignore_expires=True) + header = cookiejar.get_cookie_header('https://www.foobar.foobar') + self.assertIn('HTTPONLY_COOKIE', header) + + def test_get_cookies_for_url(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt') + cookiejar.load(ignore_discard=True, ignore_expires=True) + cookies = cookiejar.get_cookies_for_url('https://www.foobar.foobar/') + self.assertEqual(len(cookies), 2) + cookies = cookiejar.get_cookies_for_url('https://foobar.foobar/') + self.assertFalse(cookies) + if __name__ == '__main__': unittest.main() diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py index 6f5513faa..db98494ab 100644 --- a/test/test_age_restriction.py +++ b/test/test_age_restriction.py @@ -11,6 +11,7 @@ from test.helper import try_rm from youtube_dl import YoutubeDL +from youtube_dl.utils import DownloadError def _download_restricted(url, filename, age): @@ -26,7 +27,10 @@ def _download_restricted(url, filename, age): ydl.add_default_info_extractors() json_filename = os.path.splitext(filename)[0] + '.info.json' try_rm(json_filename) - ydl.download([url]) + try: + ydl.download([url]) + except DownloadError: + try_rm(json_filename) res = os.path.exists(json_filename) try_rm(json_filename) return res @@ -38,12 +42,12 @@ class TestAgeRestriction(unittest.TestCase): self.assertFalse(_download_restricted(url, filename, age)) def test_youtube(self): - self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10) + self._assert_restricted('HtVdAasjOgU', 'HtVdAasjOgU.mp4', 10) def test_youporn(self): self._assert_restricted( - 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', - '505835.mp4', 2, old_age=25) + 'https://www.youporn.com/watch/16715086/sex-ed-in-detention-18-asmr/', + '16715086.mp4', 2, old_age=25) if __name__ == '__main__': diff --git a/test/test_compat.py b/test/test_compat.py index 0986cff37..b83c8cb41 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -23,6 +23,7 @@ from youtube_dl.compat import ( compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlencode, + compat_urllib_request, ) @@ -48,10 +49,11 @@ class TestCompat(unittest.TestCase): def test_all_present(self): import youtube_dl.compat - all_names = youtube_dl.compat.__all__ - present_names = set(filter( + all_names = sorted( + youtube_dl.compat.__all__ + youtube_dl.compat.legacy) + present_names = set(map(compat_str, filter( lambda c: '_' in c and not c.startswith('_'), - dir(youtube_dl.compat))) - set(['unicode_literals']) + dir(youtube_dl.compat)))) - set(['unicode_literals']) self.assertEqual(all_names, sorted(present_names)) def test_compat_urllib_parse_unquote(self): @@ -134,6 +136,19 @@ class TestCompat(unittest.TestCase): self.assertEqual(compat_casefold('\u03a3'), '\u03c3') self.assertEqual(compat_casefold('A\u0345\u03a3'), 'a\u03b9\u03c3') + def test_compat_urllib_request_Request(self): + self.assertEqual( + compat_urllib_request.Request('http://127.0.0.1', method='PUT').get_method(), + 'PUT') + + class PUTrequest(compat_urllib_request.Request): + def get_method(self): + return 'PUT' + + self.assertEqual( + PUTrequest('http://127.0.0.1').get_method(), + 'PUT') + if __name__ == '__main__': unittest.main() diff --git a/test/test_download.py b/test/test_download.py index 9b5a5702f..6cf4577ab 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -20,7 +20,6 @@ from test.helper import ( import hashlib -import io import json import socket import re @@ -32,6 +31,8 @@ from youtube_dl.compat import ( compat_map as map, compat_urllib_error, compat_HTTPError, + compat_open as open, + compat_urllib_error, ) from youtube_dl.utils import ( DownloadError, @@ -172,6 +173,7 @@ def generator(test_case, tname): try_rm(tc_filename) try_rm(tc_filename + '.part') try_rm(os.path.splitext(tc_filename)[0] + '.info.json') + try_rm_tcs_files() try: try_num = 1 @@ -237,7 +239,15 @@ def generator(test_case, tname): # First, check test cases' data against extracted data alone expect_info_dict(self, tc_res_dict, tc.get('info_dict', {})) # Now, check downloaded file consistency + # support test-case with volatile ID, signalled by regexp value + if tc.get('info_dict', {}).get('id', '').startswith('re:'): + test_id = tc['info_dict']['id'] + tc['info_dict']['id'] = tc_res_dict['id'] + else: + test_id = None tc_filename = get_tc_filename(tc) + if test_id: + tc['info_dict']['id'] = test_id if not test_case.get('params', {}).get('skip_download', False): self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) self.assertTrue(tc_filename in finished_hook_called) @@ -260,7 +270,7 @@ def generator(test_case, tname): self.assertTrue( os.path.exists(info_json_fn), 'Missing info file %s' % info_json_fn) - with io.open(info_json_fn, encoding='utf-8') as infof: + with open(info_json_fn, encoding='utf-8') as infof: info_dict = json.load(infof) expect_info_dict(self, info_dict, tc.get('info_dict', {})) finally: diff --git a/test/test_downloader_external.py b/test/test_downloader_external.py new file mode 100644 index 000000000..029f9b05f --- /dev/null +++ b/test/test_downloader_external.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python +# coding: utf-8 +from __future__ import unicode_literals + +# Allow direct execution +import os +import re +import sys +import subprocess +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import ( + FakeLogger, + FakeYDL, + http_server_port, + try_rm, +) +from youtube_dl import YoutubeDL +from youtube_dl.compat import ( + compat_http_cookiejar_Cookie, + compat_http_server, + compat_kwargs, +) +from youtube_dl.utils import ( + encodeFilename, + join_nonempty, +) +from youtube_dl.downloader.external import ( + Aria2cFD, + Aria2pFD, + AxelFD, + CurlFD, + FFmpegFD, + HttpieFD, + WgetFD, +) +import threading + +TEST_SIZE = 10 * 1024 + +TEST_COOKIE = { + 'version': 0, + 'name': 'test', + 'value': 'ytdlp', + 'port': None, + 'port_specified': False, + 'domain': '.example.com', + 'domain_specified': True, + 'domain_initial_dot': False, + 'path': '/', + 'path_specified': True, + 'secure': False, + 'expires': None, + 'discard': False, + 'comment': None, + 'comment_url': None, + 'rest': {}, +} + +TEST_COOKIE_VALUE = join_nonempty('name', 'value', delim='=', from_dict=TEST_COOKIE) + +TEST_INFO = {'url': 'http://www.example.com/'} + + +def cookiejar_Cookie(**cookie_args): + return compat_http_cookiejar_Cookie(**compat_kwargs(cookie_args)) + + +def ifExternalFDAvailable(externalFD): + return unittest.skipUnless(externalFD.available(), + externalFD.get_basename() + ' not found') + + +class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + def log_message(self, format, *args): + pass + + def send_content_range(self, total=None): + range_header = self.headers.get('Range') + start = end = None + if range_header: + mobj = re.match(r'bytes=(\d+)-(\d+)', range_header) + if mobj: + start, end = (int(mobj.group(i)) for i in (1, 2)) + valid_range = start is not None and end is not None + if valid_range: + content_range = 'bytes %d-%d' % (start, end) + if total: + content_range += '/%d' % total + self.send_header('Content-Range', content_range) + return (end - start + 1) if valid_range else total + + def serve(self, range=True, content_length=True): + self.send_response(200) + self.send_header('Content-Type', 'video/mp4') + size = TEST_SIZE + if range: + size = self.send_content_range(TEST_SIZE) + if content_length: + self.send_header('Content-Length', size) + self.end_headers() + self.wfile.write(b'#' * size) + + def do_GET(self): + if self.path == '/regular': + self.serve() + elif self.path == '/no-content-length': + self.serve(content_length=False) + elif self.path == '/no-range': + self.serve(range=False) + elif self.path == '/no-range-no-content-length': + self.serve(range=False, content_length=False) + else: + assert False, 'unrecognised server path' + + +@ifExternalFDAvailable(Aria2pFD) +class TestAria2pFD(unittest.TestCase): + def setUp(self): + self.httpd = compat_http_server.HTTPServer( + ('127.0.0.1', 0), HTTPTestRequestHandler) + self.port = http_server_port(self.httpd) + self.server_thread = threading.Thread(target=self.httpd.serve_forever) + self.server_thread.daemon = True + self.server_thread.start() + + def download(self, params, ep): + with subprocess.Popen( + ['aria2c', '--enable-rpc'], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL + ) as process: + if not process.poll(): + filename = 'testfile.mp4' + params['logger'] = FakeLogger() + params['outtmpl'] = filename + ydl = YoutubeDL(params) + try_rm(encodeFilename(filename)) + self.assertEqual(ydl.download(['http://127.0.0.1:%d/%s' % (self.port, ep)]), 0) + self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE) + try_rm(encodeFilename(filename)) + process.kill() + + def download_all(self, params): + for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'): + self.download(params, ep) + + def test_regular(self): + self.download_all({'external_downloader': 'aria2p'}) + + def test_chunked(self): + self.download_all({ + 'external_downloader': 'aria2p', + 'http_chunk_size': 1000, + }) + + +@ifExternalFDAvailable(HttpieFD) +class TestHttpieFD(unittest.TestCase): + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = HttpieFD(ydl, {}) + self.assertEqual( + downloader._make_cmd('test', TEST_INFO), + ['http', '--download', '--output', 'test', 'http://www.example.com/']) + + # Test cookie header is added + ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE)) + self.assertEqual( + downloader._make_cmd('test', TEST_INFO), + ['http', '--download', '--output', 'test', + 'http://www.example.com/', 'Cookie:' + TEST_COOKIE_VALUE]) + + +@ifExternalFDAvailable(AxelFD) +class TestAxelFD(unittest.TestCase): + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = AxelFD(ydl, {}) + self.assertEqual( + downloader._make_cmd('test', TEST_INFO), + ['axel', '-o', 'test', '--', 'http://www.example.com/']) + + # Test cookie header is added + ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE)) + self.assertEqual( + downloader._make_cmd('test', TEST_INFO), + ['axel', '-o', 'test', '-H', 'Cookie: ' + TEST_COOKIE_VALUE, + '--max-redirect=0', '--', 'http://www.example.com/']) + + +@ifExternalFDAvailable(WgetFD) +class TestWgetFD(unittest.TestCase): + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = WgetFD(ydl, {}) + self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO)) + # Test cookiejar tempfile arg is added + ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE)) + self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO)) + + +@ifExternalFDAvailable(CurlFD) +class TestCurlFD(unittest.TestCase): + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = CurlFD(ydl, {}) + self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO)) + # Test cookie header is added + ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE)) + self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO)) + self.assertIn(TEST_COOKIE_VALUE, downloader._make_cmd('test', TEST_INFO)) + + +@ifExternalFDAvailable(Aria2cFD) +class TestAria2cFD(unittest.TestCase): + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = Aria2cFD(ydl, {}) + downloader._make_cmd('test', TEST_INFO) + self.assertFalse(hasattr(downloader, '_cookies_tempfile')) + + # Test cookiejar tempfile arg is added + ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE)) + cmd = downloader._make_cmd('test', TEST_INFO) + self.assertIn('--load-cookies=%s' % downloader._cookies_tempfile, cmd) + + +@ifExternalFDAvailable(FFmpegFD) +class TestFFmpegFD(unittest.TestCase): + _args = [] + + def _test_cmd(self, args): + self._args = args + + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = FFmpegFD(ydl, {}) + downloader._debug_cmd = self._test_cmd + info_dict = TEST_INFO.copy() + info_dict['ext'] = 'mp4' + + downloader._call_downloader('test', info_dict) + self.assertEqual(self._args, [ + 'ffmpeg', '-y', '-i', 'http://www.example.com/', + '-c', 'copy', '-f', 'mp4', 'file:test']) + + # Test cookies arg is added + ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE)) + downloader._call_downloader('test', info_dict) + self.assertEqual(self._args, [ + 'ffmpeg', '-y', '-cookies', TEST_COOKIE_VALUE + '; path=/; domain=.example.com;\r\n', + '-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test']) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py index 750472281..6af86ae48 100644 --- a/test/test_downloader_http.py +++ b/test/test_downloader_http.py @@ -9,7 +9,11 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import http_server_port, try_rm +from test.helper import ( + FakeLogger, + http_server_port, + try_rm, +) from youtube_dl import YoutubeDL from youtube_dl.compat import compat_http_server from youtube_dl.downloader.http import HttpFD @@ -66,17 +70,6 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): assert False -class FakeLogger(object): - def debug(self, msg): - pass - - def warning(self, msg): - pass - - def error(self, msg): - pass - - class TestHttpFD(unittest.TestCase): def setUp(self): self.httpd = compat_http_server.HTTPServer( @@ -95,7 +88,7 @@ class TestHttpFD(unittest.TestCase): self.assertTrue(downloader.real_download(filename, { 'url': 'http://127.0.0.1:%d/%s' % (self.port, ep), })) - self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE) + self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep) try_rm(encodeFilename(filename)) def download_all(self, params): diff --git a/test/test_execution.py b/test/test_execution.py index 32948d93e..9daaafa6c 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -8,46 +8,54 @@ import unittest import sys import os import subprocess -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from youtube_dl.utils import encodeArgument rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, rootDir) -try: - _DEV_NULL = subprocess.DEVNULL -except AttributeError: - _DEV_NULL = open(os.devnull, 'wb') +from youtube_dl.compat import compat_register_utf8, compat_subprocess_get_DEVNULL +from youtube_dl.utils import encodeArgument + +compat_register_utf8() + + +_DEV_NULL = compat_subprocess_get_DEVNULL() class TestExecution(unittest.TestCase): + def setUp(self): + self.module = 'youtube_dl' + if sys.version_info < (2, 7): + self.module += '.__main__' + def test_import(self): subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir) def test_module_exec(self): - if sys.version_info >= (2, 7): # Python 2.6 doesn't support package execution - subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, '-m', self.module, '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_main_exec(self): - subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, os.path.normpath('youtube_dl/__main__.py'), '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_cmdline_umlauts(self): + os.environ['PYTHONIOENCODING'] = 'utf-8' p = subprocess.Popen( - [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'], + [sys.executable, '-m', self.module, encodeArgument('ä'), '--version'], cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) _, stderr = p.communicate() self.assertFalse(stderr) def test_lazy_extractors(self): + lazy_extractors = os.path.normpath('youtube_dl/extractor/lazy_extractors.py') try: - subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL) - subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, os.path.normpath('devscripts/make_lazy_extractors.py'), lazy_extractors], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, os.path.normpath('test/test_all_urls.py')], cwd=rootDir, stdout=_DEV_NULL) finally: - try: - os.remove('youtube_dl/extractor/lazy_extractors.py') - except (IOError, OSError): - pass + for x in ('', 'c') if sys.version_info[0] < 3 else ('',): + try: + os.remove(lazy_extractors + x) + except OSError: + pass if __name__ == '__main__': diff --git a/test/test_http.py b/test/test_http.py index 3ee0a5dda..485c4c6fc 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -8,30 +8,163 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import http_server_port -from youtube_dl import YoutubeDL -from youtube_dl.compat import compat_http_server, compat_urllib_request +import contextlib +import gzip +import io import ssl +import tempfile import threading +import zlib + +# avoid deprecated alias assertRaisesRegexp +if hasattr(unittest.TestCase, 'assertRaisesRegex'): + unittest.TestCase.assertRaisesRegexp = unittest.TestCase.assertRaisesRegex + +try: + import brotli +except ImportError: + brotli = None +try: + from urllib.request import pathname2url +except ImportError: + from urllib import pathname2url + +from youtube_dl.compat import ( + compat_http_cookiejar_Cookie, + compat_http_server, + compat_str as str, + compat_urllib_error, + compat_urllib_HTTPError, + compat_urllib_parse, + compat_urllib_request, +) + +from youtube_dl.utils import ( + sanitized_Request, + update_Request, + urlencode_postdata, +) + +from test.helper import ( + expectedFailureIf, + FakeYDL, + FakeLogger, + http_server_port, +) +from youtube_dl import YoutubeDL TEST_DIR = os.path.dirname(os.path.abspath(__file__)) class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + protocol_version = 'HTTP/1.1' + + # work-around old/new -style class inheritance + def super(self, meth_name, *args, **kwargs): + from types import MethodType + try: + super() + fn = lambda s, m, *a, **k: getattr(super(), m)(*a, **k) + except TypeError: + fn = lambda s, m, *a, **k: getattr(compat_http_server.BaseHTTPRequestHandler, m)(s, *a, **k) + self.super = MethodType(fn, self) + return self.super(meth_name, *args, **kwargs) + def log_message(self, format, *args): pass + def _headers(self): + payload = str(self.headers).encode('utf-8') + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def _redirect(self): + self.send_response(int(self.path[len('/redirect_'):])) + self.send_header('Location', '/method') + self.send_header('Content-Length', '0') + self.end_headers() + + def _method(self, method, payload=None): + self.send_response(200) + self.send_header('Content-Length', str(len(payload or ''))) + self.send_header('Method', method) + self.end_headers() + if payload: + self.wfile.write(payload) + + def _status(self, status): + payload = '{0} NOT FOUND'.format(status).encode('utf-8') + self.send_response(int(status)) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def _read_data(self): + if 'Content-Length' in self.headers: + return self.rfile.read(int(self.headers['Content-Length'])) + + def _test_url(self, path, host='127.0.0.1', scheme='http', port=None): + return '{0}://{1}:{2}/{3}'.format( + scheme, host, + port if port is not None + else http_server_port(self.server), path) + + def do_POST(self): + data = self._read_data() + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('POST', data) + elif self.path.startswith('/headers'): + self._headers() + else: + self._status(404) + + def do_HEAD(self): + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('HEAD') + else: + self._status(404) + + def do_PUT(self): + data = self._read_data() + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('PUT', data) + else: + self._status(404) + def do_GET(self): + + def respond(payload=b'