diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md
index aedcfa6b3..e5405c235 100644
--- a/.github/ISSUE_TEMPLATE/1_broken_site.md
+++ b/.github/ISSUE_TEMPLATE/1_broken_site.md
@@ -18,7 +18,7 @@ title: ''
- [ ] I'm reporting a broken site support
-- [ ] I've verified that I'm running youtube-dl version **2021.01.16**
+- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
- [debug] youtube-dl version 2021.01.16
+ [debug] youtube-dl version 2021.12.17
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md
index 5c0dfea4e..33b01ce7f 100644
--- a/.github/ISSUE_TEMPLATE/2_site_support_request.md
+++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md
@@ -19,7 +19,7 @@ labels: 'site-support-request'
- [ ] I'm reporting a new site support request
-- [ ] I've verified that I'm running youtube-dl version **2021.01.16**
+- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones
diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md
index 772147a75..285610cc7 100644
--- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md
+++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md
@@ -18,13 +18,13 @@ title: ''
- [ ] I'm reporting a site feature request
-- [ ] I've verified that I'm running youtube-dl version **2021.01.16**
+- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md
index 2fcaa3a23..af73525fb 100644
--- a/.github/ISSUE_TEMPLATE/4_bug_report.md
+++ b/.github/ISSUE_TEMPLATE/4_bug_report.md
@@ -18,7 +18,7 @@ title: ''
- [ ] I'm reporting a broken site support issue
-- [ ] I've verified that I'm running youtube-dl version **2021.01.16**
+- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
- [debug] youtube-dl version 2021.01.16
+ [debug] youtube-dl version 2021.12.17
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md
index f1adfce8f..42c878b83 100644
--- a/.github/ISSUE_TEMPLATE/5_feature_request.md
+++ b/.github/ISSUE_TEMPLATE/5_feature_request.md
@@ -19,13 +19,13 @@ labels: 'request'
- [ ] I'm reporting a feature request
-- [ ] I've verified that I'm running youtube-dl version **2021.01.16**
+- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
- [ ] I've searched the bugtracker for similar feature requests including closed ones
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 000000000..3ba13e0ce
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1 @@
+blank_issues_enabled: false
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a9dc47a71..d3b9ae016 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,74 +1,479 @@
name: CI
-on: [push, pull_request]
+
+env:
+ all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 3.10, 3.11, 3.12
+ main-cpython-versions: 2.7, 3.2, 3.5, 3.9, 3.11
+ pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7
+ cpython-versions: main
+ test-set: core
+ # Python beta version to be built using pyenv before setup-python support
+ # Must also be included in all-cpython-versions
+ next: 3.13
+
+on:
+ push:
+ # push inputs aren't known to GitHub
+ inputs:
+ cpython-versions:
+ type: string
+ default: all
+ test-set:
+ type: string
+ default: core
+ pull_request:
+ # pull_request inputs aren't known to GitHub
+ inputs:
+ cpython-versions:
+ type: string
+ default: main
+ test-set:
+ type: string
+ default: both
+ workflow_dispatch:
+ inputs:
+ cpython-versions:
+ type: choice
+ description: CPython versions (main = 2.7, 3.2, 3.5, 3.9, 3.11)
+ options:
+ - all
+ - main
+ required: true
+ default: main
+ test-set:
+ type: choice
+ description: core, download
+ options:
+ - both
+ - core
+ - download
+ required: true
+ default: both
+
+permissions:
+ contents: read
+
jobs:
+ select:
+ name: Select tests from inputs
+ runs-on: ubuntu-latest
+ outputs:
+ cpython-versions: ${{ steps.run.outputs.cpython-versions }}
+ test-set: ${{ steps.run.outputs.test-set }}
+ own-pip-versions: ${{ steps.run.outputs.own-pip-versions }}
+ steps:
+ # push and pull_request inputs aren't known to GitHub (pt3)
+ - name: Set push defaults
+ if: ${{ github.event_name == 'push' }}
+ env:
+ cpython-versions: all
+ test-set: core
+ run: |
+ echo "cpython-versions=${{env.cpython-versions}}" >> "$GITHUB_ENV"
+ echo "test_set=${{env.test_set}}" >> "$GITHUB_ENV"
+ - name: Get pull_request inputs
+ if: ${{ github.event_name == 'pull_request' }}
+ env:
+ cpython-versions: main
+ test-set: both
+ run: |
+ echo "cpython-versions=${{env.cpython-versions}}" >> "$GITHUB_ENV"
+ echo "test_set=${{env.test_set}}" >> "$GITHUB_ENV"
+ - name: Make version array
+ id: run
+ run: |
+ # Make a JSON Array from comma/space-separated string (no extra escaping)
+ json_list() { \
+ ret=""; IFS="${IFS},"; set -- $*; \
+ for a in "$@"; do \
+ ret=$(printf '%s"%s"' "${ret}${ret:+, }" "$a"); \
+ done; \
+ printf '[%s]' "$ret"; }
+ tests="${{ inputs.test-set || env.test-set }}"
+ [ $tests = both ] && tests="core download"
+ printf 'test-set=%s\n' "$(json_list $tests)" >> "$GITHUB_OUTPUT"
+ versions="${{ inputs.cpython-versions || env.cpython-versions }}"
+ if [ "$versions" = all ]; then \
+ versions="${{ env.all-cpython-versions }}"; else \
+ versions="${{ env.main-cpython-versions }}"; \
+ fi
+ printf 'cpython-versions=%s\n' \
+ "$(json_list ${versions}${versions:+, }${{ env.pypy-versions }})" >> "$GITHUB_OUTPUT"
+ # versions with a special get-pip.py in a per-version subdirectory
+ printf 'own-pip-versions=%s\n' \
+ "$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT"
+
tests:
- name: Tests
+ name: Run tests
+ needs: select
+ permissions:
+ contents: read
+ packages: write
runs-on: ${{ matrix.os }}
+ env:
+ PIP: python -m pip
+ PIP_DISABLE_PIP_VERSION_CHECK: true
+ PIP_NO_PYTHON_VERSION_WARNING: true
strategy:
fail-fast: true
matrix:
- os: [ubuntu-18.04]
- # TODO: python 2.6
- python-version: [2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
+ os: [ubuntu-20.04]
+ python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }}
python-impl: [cpython]
- ytdl-test-set: [core, download]
+ ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }}
run-tests-ext: [sh]
include:
- # python 3.2 is only available on windows via setup-python
- - os: windows-latest
- python-version: 3.2
+ - os: windows-2019
+ python-version: 3.4
python-impl: cpython
- ytdl-test-set: core
+ ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
run-tests-ext: bat
- - os: windows-latest
- python-version: 3.2
+ - os: windows-2019
+ python-version: 3.4
python-impl: cpython
- ytdl-test-set: download
+ ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
run-tests-ext: bat
# jython
- - os: ubuntu-18.04
+ - os: ubuntu-20.04
+ python-version: 2.7
python-impl: jython
- ytdl-test-set: core
+ ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
run-tests-ext: sh
- - os: ubuntu-18.04
+ - os: ubuntu-20.04
+ python-version: 2.7
python-impl: jython
- ytdl-test-set: download
+ ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
run-tests-ext: sh
steps:
- - uses: actions/checkout@v2
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v2
- if: ${{ matrix.python-impl == 'cpython' }}
+ - name: Prepare Linux
+ if: ${{ startswith(matrix.os, 'ubuntu') }}
+ shell: bash
+ run: |
+ # apt in runner, if needed, may not be up-to-date
+ sudo apt-get update
+ - name: Checkout
+ uses: actions/checkout@v3
+ #-------- Python 3 -----
+ - name: Set up supported Python ${{ matrix.python-version }}
+ id: setup-python
+ if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != env.next }}
+ # wrap broken actions/setup-python@v4
+ # NB may run apt-get install in Linux
+ uses: ytdl-org/setup-python@v1
+ env:
+ # Temporary workaround for Python 3.5 failures - May 2024
+ PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org"
with:
python-version: ${{ matrix.python-version }}
+ cache-build: true
+ allow-build: info
+ - name: Locate supported Python ${{ matrix.python-version }}
+ if: ${{ env.pythonLocation }}
+ shell: bash
+ run: |
+ echo "PYTHONHOME=${pythonLocation}" >> "$GITHUB_ENV"
+ export expected="${{ steps.setup-python.outputs.python-path }}"
+ dirname() { printf '%s\n' \
+ 'import os, sys' \
+ 'print(os.path.dirname(sys.argv[1]))' \
+ | ${expected} - "$1"; }
+ expd="$(dirname "$expected")"
+ export python="$(command -v python)"
+ [ "$expd" = "$(dirname "$python")" ] || echo "PATH=$expd:${PATH}" >> "$GITHUB_ENV"
+ [ -x "$python" ] || printf '%s\n' \
+ 'import os' \
+ 'exp = os.environ["expected"]' \
+ 'python = os.environ["python"]' \
+ 'exps = os.path.split(exp)' \
+ 'if python and (os.path.dirname(python) == exp[0]):' \
+ ' exit(0)' \
+ 'exps[1] = "python" + os.path.splitext(exps[1])[1]' \
+ 'python = os.path.join(*exps)' \
+ 'try:' \
+ ' os.symlink(exp, python)' \
+ 'except AttributeError:' \
+ ' os.rename(exp, python)' \
+ | ${expected} -
+ printf '%s\n' \
+ 'import sys' \
+ 'print(sys.path)' \
+ | ${expected} -
+ #-------- Python next (was 3.12) -
+ - name: Set up CPython 3.next environment
+ if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }}
+ shell: bash
+ run: |
+ PYENV_ROOT=$HOME/.local/share/pyenv
+ echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV"
+ - name: Cache Python 3.next
+ id: cachenext
+ if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }}
+ uses: actions/cache@v3
+ with:
+ key: python-${{ env.next }}
+ path: |
+ ${{ env.PYENV_ROOT }}
+ - name: Build and set up Python 3.next
+ if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next && ! steps.cachenext.outputs.cache-hit }}
+ # dl and build locally
+ shell: bash
+ run: |
+ # Install build environment
+ sudo apt-get install -y build-essential llvm libssl-dev tk-dev \
+ libncursesw5-dev libreadline-dev libsqlite3-dev \
+ libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev
+ # Download PyEnv from its GitHub repository.
+ export PYENV_ROOT=${{ env.PYENV_ROOT }}
+ export PATH=$PYENV_ROOT/bin:$PATH
+ git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
+ pyenv install ${{ env.next }}
+ - name: Locate Python 3.next
+ if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }}
+ shell: bash
+ run: |
+ PYTHONHOME="$(echo "${{ env.PYENV_ROOT }}/versions/${{ env.next }}."*)"
+ test -n "$PYTHONHOME"
+ echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV"
+ echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV"
+ #-------- Python 2.7 --
+ - name: Set up Python 2.7
+ if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.7' }}
+ # install 2.7
+ shell: bash
+ run: |
+ sudo apt-get install -y python2 python-is-python2
+ echo "PYTHONHOME=/usr" >> "$GITHUB_ENV"
+ #-------- Python 2.6 --
+ - name: Set up Python 2.6 environment
+ if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }}
+ shell: bash
+ run: |
+ openssl_name=openssl-1.0.2u
+ echo "openssl_name=${openssl_name}" >> "$GITHUB_ENV"
+ openssl_dir=$HOME/.local/opt/$openssl_name
+ echo "openssl_dir=${openssl_dir}" >> "$GITHUB_ENV"
+ PYENV_ROOT=$HOME/.local/share/pyenv
+ echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV"
+ sudo apt-get install -y openssl ca-certificates
+ - name: Cache Python 2.6
+ id: cache26
+ if: ${{ matrix.python-version == '2.6' }}
+ uses: actions/cache@v3
+ with:
+ key: python-2.6.9
+ path: |
+ ${{ env.openssl_dir }}
+ ${{ env.PYENV_ROOT }}
+ - name: Build and set up Python 2.6
+ if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }}
+ # dl and build locally
+ shell: bash
+ run: |
+ # Install build environment
+ sudo apt-get install -y build-essential llvm libssl-dev tk-dev \
+ libncursesw5-dev libreadline-dev libsqlite3-dev \
+ libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev
+ # Download and install OpenSSL 1.0.2, back in time
+ openssl_name=${{ env.openssl_name }}
+ openssl_targz=${openssl_name}.tar.gz
+ openssl_dir=${{ env.openssl_dir }}
+ openssl_inc=$openssl_dir/include
+ openssl_lib=$openssl_dir/lib
+ openssl_ssl=$openssl_dir/ssl
+ curl -L "https://www.openssl.org/source/$openssl_targz" -o $openssl_targz
+ tar -xf $openssl_targz
+ ( cd $openssl_name; \
+ ./config --prefix=$openssl_dir --openssldir=${openssl_dir}/ssl \
+ --libdir=lib -Wl,-rpath=${openssl_dir}/lib shared zlib-dynamic && \
+ make && \
+ make install )
+ rm -rf $openssl_name
+ rmdir $openssl_ssl/certs && ln -s /etc/ssl/certs $openssl_ssl/certs
+ # Download PyEnv from its GitHub repository.
+ export PYENV_ROOT=${{ env.PYENV_ROOT }}
+ export PATH=$PYENV_ROOT/bin:$PATH
+ git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
+ # Prevent pyenv build trying (and failing) to update pip
+ export GET_PIP=get-pip-2.6.py
+ echo 'import sys; sys.exit(0)' > ${GET_PIP}
+ GET_PIP=$(realpath $GET_PIP)
+ # Build and install Python
+ export CFLAGS="-I$openssl_inc"
+ export LDFLAGS="-L$openssl_lib"
+ export LD_LIBRARY_PATH="$openssl_lib"
+ pyenv install 2.6.9
+ - name: Locate Python 2.6
+ if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }}
+ shell: bash
+ run: |
+ PYTHONHOME="${{ env.PYENV_ROOT }}/versions/2.6.9"
+ echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV"
+ echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV"
+ echo "LD_LIBRARY_PATH=${{ env.openssl_dir }}/lib${LD_LIBRARY_PATH:+:}${LD_LIBRARY_PATH}" >> "$GITHUB_ENV"
+ #-------- Jython ------
- name: Set up Java 8
if: ${{ matrix.python-impl == 'jython' }}
- uses: actions/setup-java@v1
+ uses: actions/setup-java@v3
with:
java-version: 8
- - name: Install Jython
+ distribution: 'zulu'
+ - name: Setup Jython environment
if: ${{ matrix.python-impl == 'jython' }}
+ shell: bash
run: |
- wget http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
- java -jar jython-installer.jar -s -d "$HOME/jython"
- echo "$HOME/jython/bin" >> $GITHUB_PATH
- - name: Install nose
- run: pip install nose
+ echo "JYTHON_ROOT=${HOME}/jython" >> "$GITHUB_ENV"
+ echo "PIP=pip" >> "$GITHUB_ENV"
+ - name: Cache Jython
+ id: cachejy
+ if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' }}
+ uses: actions/cache@v3
+ with:
+ # 2.7.3 now available, may solve SNI issue
+ key: jython-2.7.1
+ path: |
+ ${{ env.JYTHON_ROOT }}
+ - name: Install Jython
+ if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' && ! steps.cachejy.outputs.cache-hit }}
+ shell: bash
+ run: |
+ JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
+ curl -L "https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar" -o jython-installer.jar
+ java -jar jython-installer.jar -s -d "${JYTHON_ROOT}"
+ echo "${JYTHON_ROOT}/bin" >> "$GITHUB_PATH"
+ - name: Set up cached Jython
+ if: ${{ steps.cachejy.outputs.cache-hit }}
+ shell: bash
+ run: |
+ JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
+ echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH
+ - name: Install supporting Python 2.7 if possible
+ if: ${{ steps.cachejy.outputs.cache-hit }}
+ shell: bash
+ run: |
+ sudo apt-get install -y python2.7 || true
+ #-------- pip ---------
+ - name: Set up supported Python ${{ matrix.python-version }} pip
+ if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }}
+ # This step may run in either Linux or Windows
+ shell: bash
+ run: |
+ echo "$PATH"
+ echo "$PYTHONHOME"
+ # curl is available on both Windows and Linux, -L follows redirects, -O gets name
+ python -m ensurepip || python -m pip --version || { \
+ get_pip="${{ contains(needs.select.outputs.own-pip-versions, matrix.python-version) && format('{0}/', matrix.python-version) || '' }}"; \
+ curl -L -O "https://bootstrap.pypa.io/pip/${get_pip}get-pip.py"; \
+ python get-pip.py; }
+ - name: Set up Python 2.6 pip
+ if: ${{ matrix.python-version == '2.6' }}
+ shell: bash
+ run: |
+ python -m pip --version || { \
+ curl -L -O "https://bootstrap.pypa.io/pip/2.6/get-pip.py"; \
+ curl -L -O "https://files.pythonhosted.org/packages/ac/95/a05b56bb975efa78d3557efa36acaf9cf5d2fd0ee0062060493687432e03/pip-9.0.3-py2.py3-none-any.whl"; \
+ python get-pip.py --no-setuptools --no-wheel pip-9.0.3-py2.py3-none-any.whl; }
+ # work-around to invoke pip module on 2.6: https://bugs.python.org/issue2751
+ echo "PIP=python -m pip.__main__" >> "$GITHUB_ENV"
+ - name: Set up other Python ${{ matrix.python-version }} pip
+ if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }}
+ shell: bash
+ run: |
+ python -m pip --version || { \
+ curl -L -O "https://bootstrap.pypa.io/pip/3.2/get-pip.py"; \
+ curl -L -O "https://files.pythonhosted.org/packages/b2/d0/cd115fe345dd6f07ec1c780020a7dfe74966fceeb171e0f20d1d4905b0b7/pip-7.1.2-py2.py3-none-any.whl"; \
+ python get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; }
+ #-------- unittest ----
+ - name: Upgrade Unittest for Python 2.6
+ if: ${{ matrix.python-version == '2.6' }}
+ shell: bash
+ run: |
+ # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
+ $PIP -qq show unittest2 || { \
+ for u in "65/26/32b8464df2a97e6dd1b656ed26b2c194606c16fe163c695a992b36c11cdf/six-1.13.0-py2.py3-none-any.whl" \
+ "f2/94/3af39d34be01a24a6e65433d19e107099374224905f1e0cc6bbe1fd22a2f/argparse-1.4.0-py2.py3-none-any.whl" \
+ "c7/a3/c5da2a44c85bfbb6eebcfc1dde24933f8704441b98fdde6528f4831757a6/linecache2-1.0.0-py2.py3-none-any.whl" \
+ "17/0a/6ac05a3723017a967193456a2efa0aa9ac4b51456891af1e2353bb9de21e/traceback2-1.4.0-py2.py3-none-any.whl" \
+ "72/20/7f0f433060a962200b7272b8c12ba90ef5b903e218174301d0abfd523813/unittest2-1.1.0-py2.py3-none-any.whl"; do \
+ curl -L -O "https://files.pythonhosted.org/packages/${u}"; \
+ $PIP install ${u##*/}; \
+ done; }
+ # make tests use unittest2
+ for test in ./test/test_*.py ./test/helper.py; do
+ sed -r -i -e '/^import unittest$/s/test/test2 as unittest/' "$test"
+ done
+ #-------- nose --------
+ - name: Install nose for Python ${{ matrix.python-version }}
+ if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || (matrix.python-impl == 'cpython' && (matrix.python-version == '2.7' || matrix.python-version == env.next)) }}
+ shell: bash
+ run: |
+ echo "$PATH"
+ echo "$PYTHONHOME"
+ # Use PyNose for recent Pythons instead of Nose
+ py3ver="${{ matrix.python-version }}"
+ py3ver=${py3ver#3.}
+ [ "$py3ver" != "${{ matrix.python-version }}" ] && py3ver=${py3ver%.*} || py3ver=0
+ [ "$py3ver" -ge 9 ] && nose=pynose || nose=nose
+ $PIP -qq show $nose || $PIP install $nose
+ - name: Install nose for other Python 2
+ if: ${{ matrix.python-impl == 'jython' || (matrix.python-impl == 'cpython' && matrix.python-version == '2.6') }}
+ shell: bash
+ run: |
+ # Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
+ $PIP -qq show nose || { \
+ curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \
+ $PIP install nose-1.3.7-py2-none-any.whl; }
+ - name: Install nose for other Python 3
+ if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }}
+ shell: bash
+ run: |
+ $PIP -qq show nose || { \
+ curl -L -O "https://files.pythonhosted.org/packages/15/d8/dd071918c040f50fa1cf80da16423af51ff8ce4a0f2399b7bf8de45ac3d9/nose-1.3.7-py3-none-any.whl"; \
+ $PIP install nose-1.3.7-py3-none-any.whl; }
+ - name: Set up nosetest test
+ if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }}
+ shell: bash
+ run: |
+ # set PYTHON_VER
+ PYTHON_VER=${{ matrix.python-version }}
+ [ "${PYTHON_VER#*-}" != "$PYTHON_VER" ] || PYTHON_VER="${{ matrix.python-impl }}-${PYTHON_VER}"
+ echo "PYTHON_VER=$PYTHON_VER" >> "$GITHUB_ENV"
+ echo "PYTHON_IMPL=${{ matrix.python-impl }}" >> "$GITHUB_ENV"
+ # define a test to validate the Python version used by nosetests
+ printf '%s\n' \
+ 'from __future__ import unicode_literals' \
+ 'import sys, os, platform' \
+ 'try:' \
+ ' import unittest2 as unittest' \
+ 'except ImportError:' \
+ ' import unittest' \
+ 'class TestPython(unittest.TestCase):' \
+ ' def setUp(self):' \
+ ' self.ver = os.environ["PYTHON_VER"].split("-")' \
+ ' def test_python_ver(self):' \
+ ' self.assertEqual(["%d" % v for v in sys.version_info[:2]], self.ver[-1].split(".")[:2])' \
+ ' self.assertTrue(sys.version.startswith(self.ver[-1]))' \
+ ' self.assertIn(self.ver[0], ",".join((sys.version, platform.python_implementation())).lower())' \
+ ' def test_python_impl(self):' \
+ ' self.assertIn(platform.python_implementation().lower(), (os.environ["PYTHON_IMPL"], self.ver[0]))' \
+ > test/test_python.py
+ #-------- TESTS -------
- name: Run tests
+ if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }}
continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
env:
YTDL_TEST_SET: ${{ matrix.ytdl-test-set }}
- run: ./devscripts/run_tests.${{ matrix.run-tests-ext }}
+ run: |
+ ./devscripts/run_tests.${{ matrix.run-tests-ext }}
flake8:
name: Linter
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- name: Set up Python
- uses: actions/setup-python@v2
+ uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install flake8
run: pip install flake8
- name: Run flake8
run: flake8 .
+
diff --git a/AUTHORS b/AUTHORS
index 64ac71249..ff7a870dd 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -246,4 +246,5 @@ Enes Solak
Nathan Rossi
Thomas van der Berg
Luca Cherubin
-Henrik Heimbuerger
+Adrian Heine
+Henrik Heimbuerger
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 58ab3a4b8..ff40cef78 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -150,7 +150,7 @@ After you have ensured this site is distributing its content legally, you can fo
# TODO more properties (see youtube_dl/extractor/common.py)
}
```
-5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
+5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). This makes the extractor available for use, as long as the class ends with `IE`.
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
diff --git a/ChangeLog b/ChangeLog
index 27f01c438..658864282 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,366 @@
+version 2021.12.17
+
+Core
+* [postprocessor/ffmpeg] Show ffmpeg output on error (#22680, #29336)
+
+Extractors
+* [youtube] Update signature function patterns (#30363, #30366)
+* [peertube] Only call description endpoint if necessary (#29383)
+* [periscope] Pass referer to HLS requests (#29419)
+- [liveleak] Remove extractor (#17625, #24222, #29331)
++ [pornhub] Add support for pornhubthbh7ap3u.onion
+* [pornhub] Detect geo restriction
+* [pornhub] Dismiss tbr extracted from download URLs (#28927)
+* [curiositystream:collection] Extend _VALID_URL (#26326, #29117)
+* [youtube] Make get_video_info processing more robust (#29333)
+* [youtube] Workaround for get_video_info request (#29333)
+* [bilibili] Strip uploader name (#29202)
+* [youtube] Update invidious instance list (#29281)
+* [umg:de] Update GraphQL API URL (#29304)
+* [nrk] Switch psapi URL to https (#29344)
++ [egghead] Add support for app.egghead.io (#28404, #29303)
+* [appleconnect] Fix extraction (#29208)
++ [orf:tvthek] Add support for MPD formats (#28672, #29236)
+
+
+version 2021.06.06
+
+Extractors
+* [facebook] Improve login required detection
+* [youporn] Fix formats and view count extraction (#29216)
+* [orf:tvthek] Fix thumbnails extraction (#29217)
+* [formula1] Fix extraction (#29206)
+* [ard] Relax URL regular expression and fix video ids (#22724, #29091)
++ [ustream] Detect https embeds (#29133)
+* [ted] Prefer own formats over external sources (#29142)
+* [twitch:clips] Improve extraction (#29149)
++ [twitch:clips] Add access token query to download URLs (#29136)
+* [youtube] Fix get_video_info request (#29086, #29165)
+* [vimeo] Fix vimeo pro embed extraction (#29126)
+* [redbulltv] Fix embed data extraction (#28770)
+* [shahid] Relax URL regular expression (#28772, #28930)
+
+
+version 2021.05.16
+
+Core
+* [options] Fix thumbnail option group name (#29042)
+* [YoutubeDL] Improve extract_info doc (#28946)
+
+Extractors
++ [playstuff] Add support for play.stuff.co.nz (#28901, #28931)
+* [eroprofile] Fix extraction (#23200, #23626, #29008)
++ [vivo] Add support for vivo.st (#29009)
++ [generic] Add support for og:audio (#28311, #29015)
+* [phoenix] Fix extraction (#29057)
++ [generic] Add support for sibnet embeds
++ [vk] Add support for sibnet embeds (#9500)
++ [generic] Add Referer header for direct videojs download URLs (#2879,
+ #20217, #29053)
+* [orf:radio] Switch download URLs to HTTPS (#29012, #29046)
+- [blinkx] Remove extractor (#28941)
+* [medaltv] Relax URL regular expression (#28884)
++ [funimation] Add support for optional lang code in URLs (#28950)
++ [gdcvault] Add support for HTML5 videos
+* [dispeak] Improve FLV extraction (#13513, #28970)
+* [kaltura] Improve iframe extraction (#28969)
+* [kaltura] Make embed code alternatives actually work
+* [cda] Improve extraction (#28709, #28937)
+* [twitter] Improve formats extraction from vmap URL (#28909)
+* [xtube] Fix formats extraction (#28870)
+* [svtplay] Improve extraction (#28507, #28876)
+* [tv2dk] Fix extraction (#28888)
+
+
+version 2021.04.26
+
+Extractors
++ [xfileshare] Add support for wolfstream.tv (#28858)
+* [francetvinfo] Improve video id extraction (#28792)
+* [medaltv] Fix extraction (#28807)
+* [tver] Redirect all downloads to Brightcove (#28849)
+* [go] Improve video id extraction (#25207, #25216, #26058)
+* [youtube] Fix lazy extractors (#28780)
++ [bbc] Extract description and timestamp from __INITIAL_DATA__ (#28774)
+* [cbsnews] Fix extraction for python <3.6 (#23359)
+
+
+version 2021.04.17
+
+Core
++ [utils] Add support for experimental HTTP response status code
+ 308 Permanent Redirect (#27877, #28768)
+
+Extractors
++ [lbry] Add support for HLS videos (#27877, #28768)
+* [youtube] Fix stretched ratio calculation
+* [youtube] Improve stretch extraction (#28769)
+* [youtube:tab] Improve grid extraction (#28725)
++ [youtube:tab] Detect series playlist on playlists page (#28723)
++ [youtube] Add more invidious instances (#28706)
+* [pluralsight] Extend anti-throttling timeout (#28712)
+* [youtube] Improve URL to extractor routing (#27572, #28335, #28742)
++ [maoritv] Add support for maoritelevision.com (#24552)
++ [youtube:tab] Pass innertube context and x-goog-visitor-id header along with
+ continuation requests (#28702)
+* [mtv] Fix Viacom A/B Testing Video Player extraction (#28703)
++ [pornhub] Extract DASH and HLS formats from get_media end point (#28698)
+* [cbssports] Fix extraction (#28682)
+* [jamendo] Fix track extraction (#28686)
+* [curiositystream] Fix format extraction (#26845, #28668)
+
+
+version 2021.04.07
+
+Core
+* [extractor/common] Use compat_cookies_SimpleCookie for _get_cookies
++ [compat] Introduce compat_cookies_SimpleCookie
+* [extractor/common] Improve JSON-LD author extraction
+* [extractor/common] Fix _get_cookies on python 2 (#20673, #23256, #20326,
+ #28640)
+
+Extractors
+* [youtube] Fix extraction of videos with restricted location (#28685)
++ [line] Add support for live.line.me (#17205, #28658)
+* [vimeo] Improve extraction (#28591)
+* [youku] Update ccode (#17852, #28447, #28460, #28648)
+* [youtube] Prefer direct entry metadata over entry metadata from playlist
+ (#28619, #28636)
+* [screencastomatic] Fix extraction (#11976, #24489)
++ [palcomp3] Add support for palcomp3.com (#13120)
++ [arnes] Add support for video.arnes.si (#28483)
++ [youtube:tab] Add support for hashtags (#28308)
+
+
+version 2021.04.01
+
+Extractors
+* [youtube] Setup CONSENT cookie when needed (#28604)
+* [vimeo] Fix password protected review extraction (#27591)
+* [youtube] Improve age-restricted video extraction (#28578)
+
+
+version 2021.03.31
+
+Extractors
+* [vlive] Fix inkey request (#28589)
+* [francetvinfo] Improve video id extraction (#28584)
++ [instagram] Extract duration (#28469)
+* [instagram] Improve title extraction (#28469)
++ [sbs] Add support for ondemand watch URLs (#28566)
+* [youtube] Fix video's channel extraction (#28562)
+* [picarto] Fix live stream extraction (#28532)
+* [vimeo] Fix unlisted video extraction (#28414)
+* [youtube:tab] Fix playlist/community continuation items extraction (#28266)
+* [ard] Improve clip id extraction (#22724, #28528)
+
+
+version 2021.03.25
+
+Extractors
++ [zoom] Add support for zoom.us (#16597, #27002, #28531)
+* [bbc] Fix BBC IPlayer Episodes/Group extraction (#28360)
+* [youtube] Fix default value for youtube_include_dash_manifest (#28523)
+* [zingmp3] Fix extraction (#11589, #16409, #16968, #27205)
++ [vgtv] Add support for new tv.aftonbladet.se URL schema (#28514)
++ [tiktok] Detect private videos (#28453)
+* [vimeo:album] Fix extraction for albums with number of videos multiple
+ to page size (#28486)
+* [vvvvid] Fix kenc format extraction (#28473)
+* [mlb] Fix video extraction (#21241)
+* [svtplay] Improve extraction (#28448)
+* [applepodcasts] Fix extraction (#28445)
+* [rtve] Improve extraction
+ + Extract all formats
+ * Fix RTVE Infantil extraction (#24851)
+ + Extract is_live and series
+
+
+version 2021.03.14
+
+Core
++ Introduce release_timestamp meta field (#28386)
+
+Extractors
++ [southpark] Add support for southparkstudios.com (#28413)
+* [southpark] Fix extraction (#26763, #28413)
+* [sportdeutschland] Fix extraction (#21856, #28425)
+* [pinterest] Reduce the number of HLS format requests
+* [peertube] Improve thumbnail extraction (#28419)
+* [tver] Improve title extraction (#28418)
+* [fujitv] Fix HLS formats extension (#28416)
+* [shahid] Fix format extraction (#28383)
++ [lbry] Add support for channel filters (#28385)
++ [bandcamp] Extract release timestamp
++ [lbry] Extract release timestamp (#28386)
+* [pornhub] Detect flagged videos
++ [pornhub] Extract formats from get_media end point (#28395)
+* [bilibili] Fix video info extraction (#28341)
++ [cbs] Add support for Paramount+ (#28342)
++ [trovo] Add Origin header to VOD formats (#28346)
+* [voxmedia] Fix volume embed extraction (#28338)
+
+
+version 2021.03.03
+
+Extractors
+* [youtube:tab] Switch continuation to browse API (#28289, #28327)
+* [9c9media] Fix extraction for videos with multiple ContentPackages (#28309)
++ [bbc] Add support for BBC Reel videos (#21870, #23660, #28268)
+
+
+version 2021.03.02
+
+Extractors
+* [zdf] Rework extractors (#11606, #13473, #17354, #21185, #26711, #27068,
+ #27930, #28198, #28199, #28274)
+ * Generalize cross-extractor video ids for zdf based extractors
+ * Improve extraction
+ * Fix 3sat and phoenix
+* [stretchinternet] Fix extraction (#28297)
+* [urplay] Fix episode data extraction (#28292)
++ [bandaichannel] Add support for b-ch.com (#21404)
+* [srgssr] Improve extraction (#14717, #14725, #27231, #28238)
+ + Extract subtitle
+ * Fix extraction for new videos
+ * Update srf download domains
+* [vvvvid] Reduce season request payload size
++ [vvvvid] Extract series sublists playlist title (#27601, #27618)
++ [dplay] Extract Ad-Free uplynk URLs (#28160)
++ [wat] Detect DRM protected videos (#27958)
+* [tf1] Improve extraction (#27980, #28040)
+* [tmz] Fix and improve extraction (#24603, #24687, 28211)
++ [gedidigital] Add support for Gedi group sites (#7347, #26946)
+* [youtube] Fix get_video_info request
+
+
+version 2021.02.22
+
+Core
++ [postprocessor/embedthumbnail] Recognize atomicparsley binary in lowercase
+ (#28112)
+
+Extractors
+* [apa] Fix and improve extraction (#27750)
++ [youporn] Extract duration (#28019)
++ [peertube] Add support for canard.tube (#28190)
+* [youtube] Fixup m4a_dash formats (#28165)
++ [samplefocus] Add support for samplefocus.com (#27763)
++ [vimeo] Add support for unlisted video source format extraction
+* [viki] Improve extraction (#26522, #28203)
+ * Extract uploader URL and episode number
+ * Report login required error
+ + Extract 480p formats
+ * Fix API v4 calls
+* [ninegag] Unescape title (#28201)
+* [youtube] Improve URL regular expression (#28193)
++ [youtube] Add support for redirect.invidious.io (#28193)
++ [dplay] Add support for de.hgtv.com (#28182)
++ [dplay] Add support for discoveryplus.com (#24698)
++ [simplecast] Add support for simplecast.com (#24107)
+* [youtube] Fix uploader extraction in flat playlist mode (#28045)
+* [yandexmusic:playlist] Request missing tracks in chunks (#27355, #28184)
++ [storyfire] Add support for storyfire.com (#25628, #26349)
++ [zhihu] Add support for zhihu.com (#28177)
+* [youtube] Fix controversial videos when authenticated with cookies (#28174)
+* [ccma] Fix timestamp parsing in python 2
++ [videopress] Add support for video.wordpress.com
+* [kakao] Improve info extraction and detect geo restriction (#26577)
+* [xboxclips] Fix extraction (#27151)
+* [ard] Improve formats extraction (#28155)
++ [canvas] Add support for dagelijksekost.een.be (#28119)
+
+
+version 2021.02.10
+
+Extractors
+* [youtube:tab] Improve grid continuation extraction (#28130)
+* [ign] Fix extraction (#24771)
++ [xhamster] Extract format filesize
++ [xhamster] Extract formats from xplayer settings (#28114)
++ [youtube] Add support phone/tablet JS player (#26424)
+* [archiveorg] Fix and improve extraction (#21330, #23586, #25277, #26780,
+ #27109, #27236, #28063)
++ [cda] Detect geo restricted videos (#28106)
+* [urplay] Fix extraction (#28073, #28074)
+* [youtube] Fix release date extraction (#28094)
++ [youtube] Extract abr and vbr (#28100)
+* [youtube] Skip OTF formats (#28070)
+
+
+version 2021.02.04.1
+
+Extractors
+* [youtube] Prefer DASH formats (#28070)
+* [azmedien] Fix extraction (#28064)
+
+
+version 2021.02.04
+
+Extractors
+* [pornhub] Implement lazy playlist extraction
+* [svtplay] Fix video id extraction (#28058)
++ [pornhub] Add support for authentication (#18797, #21416, #24294)
+* [pornhub:user] Improve paging
++ [pornhub:user] Add support for URLs unavailable via /videos page (#27853)
++ [bravotv] Add support for oxygen.com (#13357, #22500)
++ [youtube] Pass embed URL to get_video_info request
+* [ccma] Improve metadata extraction (#27994)
+ + Extract age limit, alt title, categories, series and episode number
+ * Fix timestamp multiple subtitles extraction
+* [egghead] Update API domain (#28038)
+- [vidzi] Remove extractor (#12629)
+* [vidio] Improve metadata extraction
+* [youtube] Improve subtitles extraction
+* [youtube] Fix chapter extraction fallback
+* [youtube] Rewrite extractor
+ * Improve format sorting
+ * Remove unused code
+ * Fix series metadata extraction
+ * Fix trailer video extraction
+ * Improve error reporting
+ + Extract video location
++ [vvvvid] Add support for youtube embeds (#27825)
+* [googledrive] Report download page errors (#28005)
+* [vlive] Fix error message decoding for python 2 (#28004)
+* [youtube] Improve DASH formats file size extraction
+* [cda] Improve birth validation detection (#14022, #27929)
++ [awaan] Extract uploader id (#27963)
++ [medialaan] Add support DPG Media MyChannels based websites (#14871, #15597,
+ #16106, #16489)
+* [abcnews] Fix extraction (#12394, #27920)
+* [AMP] Fix upload date and timestamp extraction (#27970)
+* [tv4] Relax URL regular expression (#27964)
++ [tv2] Add support for mtvuutiset.fi (#27744)
+* [adn] Improve login warning reporting
+* [zype] Fix uplynk id extraction (#27956)
++ [adn] Add support for authentication (#17091, #27841, #27937)
+
+
+version 2021.01.24.1
+
+Core
+* Introduce --output-na-placeholder (#27896)
+
+Extractors
+* [franceculture] Make thumbnail optional (#18807)
+* [franceculture] Fix extraction (#27891, #27903)
+* [njpwworld] Fix extraction (#27890)
+* [comedycentral] Fix extraction (#27905)
+* [wat] Fix format extraction (#27901)
++ [americastestkitchen:season] Add support for seasons (#27861)
++ [trovo] Add support for trovo.live (#26125)
++ [aol] Add support for yahoo videos (#26650)
+* [yahoo] Fix single video extraction
+* [lbry] Unescape lbry URI (#27872)
+* [9gag] Fix and improve extraction (#23022)
+* [americastestkitchen] Improve metadata extraction for ATK episodes (#27860)
+* [aljazeera] Fix extraction (#20911, #27779)
++ [minds] Add support for minds.com (#17934)
+* [ard] Fix title and description extraction (#27761)
++ [spotify] Add support for Spotify Podcasts (#27443)
+
+
version 2021.01.16
Core
diff --git a/README.md b/README.md
index 85fed6d3a..47e686f84 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.ex
You can also use pip:
sudo -H pip install --upgrade youtube-dl
-
+
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
macOS users can install youtube-dl with [Homebrew](https://brew.sh/):
@@ -52,394 +52,431 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
youtube-dl [OPTIONS] URL [URL...]
# OPTIONS
- -h, --help Print this help text and exit
- --version Print program version and exit
- -U, --update Update this program to latest version. Make
- sure that you have sufficient permissions
- (run with sudo if needed)
- -i, --ignore-errors Continue on download errors, for example to
- skip unavailable videos in a playlist
- --abort-on-error Abort downloading of further videos (in the
- playlist or the command line) if an error
- occurs
- --dump-user-agent Display the current browser identification
- --list-extractors List all supported extractors
- --extractor-descriptions Output descriptions of all supported
- extractors
- --force-generic-extractor Force extraction to use the generic
- extractor
- --default-search PREFIX Use this prefix for unqualified URLs. For
- example "gvsearch2:" downloads two videos
- from google videos for youtube-dl "large
- apple". Use the value "auto" to let
- youtube-dl guess ("auto_warning" to emit a
- warning when guessing). "error" just throws
- an error. The default value "fixup_error"
- repairs broken URLs, but emits an error if
- this is not possible instead of searching.
- --ignore-config Do not read configuration files. When given
- in the global configuration file
- /etc/youtube-dl.conf: Do not read the user
- configuration in ~/.config/youtube-
- dl/config (%APPDATA%/youtube-dl/config.txt
- on Windows)
- --config-location PATH Location of the configuration file; either
- the path to the config or its containing
- directory.
- --flat-playlist Do not extract the videos of a playlist,
- only list them.
- --mark-watched Mark videos watched (YouTube only)
- --no-mark-watched Do not mark videos watched (YouTube only)
- --no-color Do not emit color codes in output
+ -h, --help Print this help text and exit
+ --version Print program version and exit
+ -U, --update Update this program to latest version.
+ Make sure that you have sufficient
+ permissions (run with sudo if needed)
+ -i, --ignore-errors Continue on download errors, for
+ example to skip unavailable videos in a
+ playlist
+ --abort-on-error Abort downloading of further videos (in
+ the playlist or the command line) if an
+ error occurs
+ --dump-user-agent Display the current browser
+ identification
+ --list-extractors List all supported extractors
+ --extractor-descriptions Output descriptions of all supported
+ extractors
+ --force-generic-extractor Force extraction to use the generic
+ extractor
+ --default-search PREFIX Use this prefix for unqualified URLs.
+ For example "gvsearch2:" downloads two
+ videos from google videos for youtube-
+ dl "large apple". Use the value "auto"
+ to let youtube-dl guess ("auto_warning"
+ to emit a warning when guessing).
+ "error" just throws an error. The
+ default value "fixup_error" repairs
+ broken URLs, but emits an error if this
+ is not possible instead of searching.
+ --ignore-config Do not read configuration files. When
+ given in the global configuration file
+ /etc/youtube-dl.conf: Do not read the
+ user configuration in
+ ~/.config/youtube-dl/config
+ (%APPDATA%/youtube-dl/config.txt on
+ Windows)
+ --config-location PATH Location of the configuration file;
+ either the path to the config or its
+ containing directory.
+ --flat-playlist Do not extract the videos of a
+ playlist, only list them.
+ --mark-watched Mark videos watched (YouTube only)
+ --no-mark-watched Do not mark videos watched (YouTube
+ only)
+ --no-color Do not emit color codes in output
## Network Options:
- --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
- To enable SOCKS proxy, specify a proper
- scheme. For example
- socks5://127.0.0.1:1080/. Pass in an empty
- string (--proxy "") for direct connection
- --socket-timeout SECONDS Time to wait before giving up, in seconds
- --source-address IP Client-side IP address to bind to
- -4, --force-ipv4 Make all connections via IPv4
- -6, --force-ipv6 Make all connections via IPv6
+ --proxy URL Use the specified HTTP/HTTPS/SOCKS
+ proxy. To enable SOCKS proxy, specify a
+ proper scheme. For example
+ socks5://127.0.0.1:1080/. Pass in an
+ empty string (--proxy "") for direct
+ connection
+ --socket-timeout SECONDS Time to wait before giving up, in
+ seconds
+ --source-address IP Client-side IP address to bind to
+ -4, --force-ipv4 Make all connections via IPv4
+ -6, --force-ipv6 Make all connections via IPv6
## Geo Restriction:
- --geo-verification-proxy URL Use this proxy to verify the IP address for
- some geo-restricted sites. The default
- proxy specified by --proxy (or none, if the
- option is not present) is used for the
- actual downloading.
- --geo-bypass Bypass geographic restriction via faking
- X-Forwarded-For HTTP header
- --no-geo-bypass Do not bypass geographic restriction via
- faking X-Forwarded-For HTTP header
- --geo-bypass-country CODE Force bypass geographic restriction with
- explicitly provided two-letter ISO 3166-2
- country code
- --geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with
- explicitly provided IP block in CIDR
- notation
+ --geo-verification-proxy URL Use this proxy to verify the IP address
+ for some geo-restricted sites. The
+ default proxy specified by --proxy (or
+ none, if the option is not present) is
+ used for the actual downloading.
+ --geo-bypass Bypass geographic restriction via
+ faking X-Forwarded-For HTTP header
+ --no-geo-bypass Do not bypass geographic restriction
+ via faking X-Forwarded-For HTTP header
+ --geo-bypass-country CODE Force bypass geographic restriction
+ with explicitly provided two-letter ISO
+ 3166-2 country code
+ --geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction
+ with explicitly provided IP block in
+ CIDR notation
## Video Selection:
- --playlist-start NUMBER Playlist video to start at (default is 1)
- --playlist-end NUMBER Playlist video to end at (default is last)
- --playlist-items ITEM_SPEC Playlist video items to download. Specify
- indices of the videos in the playlist
- separated by commas like: "--playlist-items
- 1,2,5,8" if you want to download videos
- indexed 1, 2, 5, 8 in the playlist. You can
- specify range: "--playlist-items
- 1-3,7,10-13", it will download the videos
- at index 1, 2, 3, 7, 10, 11, 12 and 13.
- --match-title REGEX Download only matching titles (regex or
- caseless sub-string)
- --reject-title REGEX Skip download for matching titles (regex or
- caseless sub-string)
- --max-downloads NUMBER Abort after downloading NUMBER files
- --min-filesize SIZE Do not download any videos smaller than
- SIZE (e.g. 50k or 44.6m)
- --max-filesize SIZE Do not download any videos larger than SIZE
- (e.g. 50k or 44.6m)
- --date DATE Download only videos uploaded in this date
- --datebefore DATE Download only videos uploaded on or before
- this date (i.e. inclusive)
- --dateafter DATE Download only videos uploaded on or after
- this date (i.e. inclusive)
- --min-views COUNT Do not download any videos with less than
- COUNT views
- --max-views COUNT Do not download any videos with more than
- COUNT views
- --match-filter FILTER Generic video filter. Specify any key (see
- the "OUTPUT TEMPLATE" for a list of
- available keys) to match if the key is
- present, !key to check if the key is not
- present, key > NUMBER (like "comment_count
- > 12", also works with >=, <, <=, !=, =) to
- compare against a number, key = 'LITERAL'
- (like "uploader = 'Mike Smith'", also works
- with !=) to match against a string literal
- and & to require multiple matches. Values
- which are not known are excluded unless you
- put a question mark (?) after the operator.
- For example, to only match videos that have
- been liked more than 100 times and disliked
- less than 50 times (or the dislike
- functionality is not available at the given
- service), but who also have a description,
- use --match-filter "like_count > 100 &
- dislike_count 50 & description" .
- --no-playlist Download only the video, if the URL refers
- to a video and a playlist.
- --yes-playlist Download the playlist, if the URL refers to
- a video and a playlist.
- --age-limit YEARS Download only videos suitable for the given
- age
- --download-archive FILE Download only videos not listed in the
- archive file. Record the IDs of all
- downloaded videos in it.
- --include-ads Download advertisements as well
- (experimental)
+ --playlist-start NUMBER Playlist video to start at (default is
+ 1)
+ --playlist-end NUMBER Playlist video to end at (default is
+ last)
+ --playlist-items ITEM_SPEC Playlist video items to download.
+ Specify indices of the videos in the
+ playlist separated by commas like: "--
+ playlist-items 1,2,5,8" if you want to
+ download videos indexed 1, 2, 5, 8 in
+ the playlist. You can specify range: "
+ --playlist-items 1-3,7,10-13", it will
+ download the videos at index 1, 2, 3,
+ 7, 10, 11, 12 and 13.
+ --match-title REGEX Download only matching titles (regex or
+ caseless sub-string)
+ --reject-title REGEX Skip download for matching titles
+ (regex or caseless sub-string)
+ --max-downloads NUMBER Abort after downloading NUMBER files
+ --min-filesize SIZE Do not download any videos smaller than
+ SIZE (e.g. 50k or 44.6m)
+ --max-filesize SIZE Do not download any videos larger than
+ SIZE (e.g. 50k or 44.6m)
+ --date DATE Download only videos uploaded in this
+ date
+ --datebefore DATE Download only videos uploaded on or
+ before this date (i.e. inclusive)
+ --dateafter DATE Download only videos uploaded on or
+ after this date (i.e. inclusive)
+ --min-views COUNT Do not download any videos with less
+ than COUNT views
+ --max-views COUNT Do not download any videos with more
+ than COUNT views
+ --match-filter FILTER Generic video filter. Specify any key
+ (see the "OUTPUT TEMPLATE" for a list
+ of available keys) to match if the key
+ is present, !key to check if the key is
+ not present, key > NUMBER (like
+ "comment_count > 12", also works with
+ >=, <, <=, !=, =) to compare against a
+ number, key = 'LITERAL' (like "uploader
+ = 'Mike Smith'", also works with !=) to
+ match against a string literal and & to
+ require multiple matches. Values which
+ are not known are excluded unless you
+ put a question mark (?) after the
+ operator. For example, to only match
+ videos that have been liked more than
+ 100 times and disliked less than 50
+ times (or the dislike functionality is
+ not available at the given service),
+ but who also have a description, use
+ --match-filter "like_count > 100 &
+ dislike_count 50 & description" .
+ --no-playlist Download only the video, if the URL
+ refers to a video and a playlist.
+ --yes-playlist Download the playlist, if the URL
+ refers to a video and a playlist.
+ --age-limit YEARS Download only videos suitable for the
+ given age
+ --download-archive FILE Download only videos not listed in the
+ archive file. Record the IDs of all
+ downloaded videos in it.
+ --include-ads Download advertisements as well
+ (experimental)
## Download Options:
- -r, --limit-rate RATE Maximum download rate in bytes per second
- (e.g. 50K or 4.2M)
- -R, --retries RETRIES Number of retries (default is 10), or
- "infinite".
- --fragment-retries RETRIES Number of retries for a fragment (default
- is 10), or "infinite" (DASH, hlsnative and
- ISM)
- --skip-unavailable-fragments Skip unavailable fragments (DASH, hlsnative
- and ISM)
- --abort-on-unavailable-fragment Abort downloading when some fragment is not
- available
- --keep-fragments Keep downloaded fragments on disk after
- downloading is finished; fragments are
- erased by default
- --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
- (default is 1024)
- --no-resize-buffer Do not automatically adjust the buffer
- size. By default, the buffer size is
- automatically resized from an initial value
- of SIZE.
- --http-chunk-size SIZE Size of a chunk for chunk-based HTTP
- downloading (e.g. 10485760 or 10M) (default
- is disabled). May be useful for bypassing
- bandwidth throttling imposed by a webserver
- (experimental)
- --playlist-reverse Download playlist videos in reverse order
- --playlist-random Download playlist videos in random order
- --xattr-set-filesize Set file xattribute ytdl.filesize with
- expected file size
- --hls-prefer-native Use the native HLS downloader instead of
- ffmpeg
- --hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
- downloader
- --hls-use-mpegts Use the mpegts container for HLS videos,
- allowing to play the video while
- downloading (some players may not be able
- to play it)
- --external-downloader COMMAND Use the specified external downloader.
- Currently supports
- aria2c,avconv,axel,curl,ffmpeg,httpie,wget
- --external-downloader-args ARGS Give these arguments to the external
- downloader
+ -r, --limit-rate RATE Maximum download rate in bytes per
+ second (e.g. 50K or 4.2M)
+ -R, --retries RETRIES Number of retries (default is 10), or
+ "infinite".
+ --fragment-retries RETRIES Number of retries for a fragment
+ (default is 10), or "infinite" (DASH,
+ hlsnative and ISM)
+ --skip-unavailable-fragments Skip unavailable fragments (DASH,
+ hlsnative and ISM)
+ --abort-on-unavailable-fragment Abort downloading when some fragment is
+ not available
+ --keep-fragments Keep downloaded fragments on disk after
+ downloading is finished; fragments are
+ erased by default
+ --buffer-size SIZE Size of download buffer (e.g. 1024 or
+ 16K) (default is 1024)
+ --no-resize-buffer Do not automatically adjust the buffer
+ size. By default, the buffer size is
+ automatically resized from an initial
+ value of SIZE.
+ --http-chunk-size SIZE Size of a chunk for chunk-based HTTP
+ downloading (e.g. 10485760 or 10M)
+ (default is disabled). May be useful
+ for bypassing bandwidth throttling
+ imposed by a webserver (experimental)
+ --playlist-reverse Download playlist videos in reverse
+ order
+ --playlist-random Download playlist videos in random
+ order
+ --xattr-set-filesize Set file xattribute ytdl.filesize with
+ expected file size
+ --hls-prefer-native Use the native HLS downloader instead
+ of ffmpeg
+ --hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
+ downloader
+ --hls-use-mpegts Use the mpegts container for HLS
+ videos, allowing to play the video
+ while downloading (some players may not
+ be able to play it)
+ --external-downloader COMMAND Use the specified external downloader.
+ Currently supports aria2c,avconv,axel,c
+ url,ffmpeg,httpie,wget
+ --external-downloader-args ARGS Give these arguments to the external
+ downloader
## Filesystem Options:
- -a, --batch-file FILE File containing URLs to download ('-' for
- stdin), one URL per line. Lines starting
- with '#', ';' or ']' are considered as
- comments and ignored.
- --id Use only video ID in file name
- -o, --output TEMPLATE Output filename template, see the "OUTPUT
- TEMPLATE" for all the info
- --autonumber-start NUMBER Specify the start value for %(autonumber)s
- (default is 1)
- --restrict-filenames Restrict filenames to only ASCII
- characters, and avoid "&" and spaces in
- filenames
- -w, --no-overwrites Do not overwrite files
- -c, --continue Force resume of partially downloaded files.
- By default, youtube-dl will resume
- downloads if possible.
- --no-continue Do not resume partially downloaded files
- (restart from beginning)
- --no-part Do not use .part files - write directly
- into output file
- --no-mtime Do not use the Last-modified header to set
- the file modification time
- --write-description Write video description to a .description
- file
- --write-info-json Write video metadata to a .info.json file
- --write-annotations Write video annotations to a
- .annotations.xml file
- --load-info-json FILE JSON file containing the video information
- (created with the "--write-info-json"
- option)
- --cookies FILE File to read cookies from and dump cookie
- jar in
- --cache-dir DIR Location in the filesystem where youtube-dl
- can store some downloaded information
- permanently. By default
- $XDG_CACHE_HOME/youtube-dl or
- ~/.cache/youtube-dl . At the moment, only
- YouTube player files (for videos with
- obfuscated signatures) are cached, but that
- may change.
- --no-cache-dir Disable filesystem caching
- --rm-cache-dir Delete all filesystem cache files
+ -a, --batch-file FILE File containing URLs to download ('-'
+ for stdin), one URL per line. Lines
+ starting with '#', ';' or ']' are
+ considered as comments and ignored.
+ --id Use only video ID in file name
+ -o, --output TEMPLATE Output filename template, see the
+ "OUTPUT TEMPLATE" for all the info
+ --output-na-placeholder PLACEHOLDER Placeholder value for unavailable meta
+ fields in output filename template
+ (default is "NA")
+ --autonumber-start NUMBER Specify the start value for
+ %(autonumber)s (default is 1)
+ --restrict-filenames Restrict filenames to only ASCII
+ characters, and avoid "&" and spaces in
+ filenames
+ -w, --no-overwrites Do not overwrite files
+ -c, --continue Force resume of partially downloaded
+ files. By default, youtube-dl will
+ resume downloads if possible.
+ --no-continue Do not resume partially downloaded
+ files (restart from beginning)
+ --no-part Do not use .part files - write directly
+ into output file
+ --no-mtime Do not use the Last-modified header to
+ set the file modification time
+ --write-description Write video description to a
+ .description file
+ --write-info-json Write video metadata to a .info.json
+ file
+ --write-annotations Write video annotations to a
+ .annotations.xml file
+ --load-info-json FILE JSON file containing the video
+ information (created with the "--write-
+ info-json" option)
+ --cookies FILE File to read cookies from and dump
+ cookie jar in
+ --cache-dir DIR Location in the filesystem where
+ youtube-dl can store some downloaded
+ information permanently. By default
+ $XDG_CACHE_HOME/youtube-dl or
+ ~/.cache/youtube-dl . At the moment,
+ only YouTube player files (for videos
+ with obfuscated signatures) are cached,
+ but that may change.
+ --no-cache-dir Disable filesystem caching
+ --rm-cache-dir Delete all filesystem cache files
-## Thumbnail images:
- --write-thumbnail Write thumbnail image to disk
- --write-all-thumbnails Write all thumbnail image formats to disk
- --list-thumbnails Simulate and list all available thumbnail
- formats
+## Thumbnail Options:
+ --write-thumbnail Write thumbnail image to disk
+ --write-all-thumbnails Write all thumbnail image formats to
+ disk
+ --list-thumbnails Simulate and list all available
+ thumbnail formats
## Verbosity / Simulation Options:
- -q, --quiet Activate quiet mode
- --no-warnings Ignore warnings
- -s, --simulate Do not download the video and do not write
- anything to disk
- --skip-download Do not download the video
- -g, --get-url Simulate, quiet but print URL
- -e, --get-title Simulate, quiet but print title
- --get-id Simulate, quiet but print id
- --get-thumbnail Simulate, quiet but print thumbnail URL
- --get-description Simulate, quiet but print video description
- --get-duration Simulate, quiet but print video length
- --get-filename Simulate, quiet but print output filename
- --get-format Simulate, quiet but print output format
- -j, --dump-json Simulate, quiet but print JSON information.
- See the "OUTPUT TEMPLATE" for a description
- of available keys.
- -J, --dump-single-json Simulate, quiet but print JSON information
- for each command-line argument. If the URL
- refers to a playlist, dump the whole
- playlist information in a single line.
- --print-json Be quiet and print the video information as
- JSON (video is still being downloaded).
- --newline Output progress bar as new lines
- --no-progress Do not print progress bar
- --console-title Display progress in console titlebar
- -v, --verbose Print various debugging information
- --dump-pages Print downloaded pages encoded using base64
- to debug problems (very verbose)
- --write-pages Write downloaded intermediary pages to
- files in the current directory to debug
- problems
- --print-traffic Display sent and read HTTP traffic
- -C, --call-home Contact the youtube-dl server for debugging
- --no-call-home Do NOT contact the youtube-dl server for
- debugging
+ -q, --quiet Activate quiet mode
+ --no-warnings Ignore warnings
+ -s, --simulate Do not download the video and do not
+ write anything to disk
+ --skip-download Do not download the video
+ -g, --get-url Simulate, quiet but print URL
+ -e, --get-title Simulate, quiet but print title
+ --get-id Simulate, quiet but print id
+ --get-thumbnail Simulate, quiet but print thumbnail URL
+ --get-description Simulate, quiet but print video
+ description
+ --get-duration Simulate, quiet but print video length
+ --get-filename Simulate, quiet but print output
+ filename
+ --get-format Simulate, quiet but print output format
+ -j, --dump-json Simulate, quiet but print JSON
+ information. See the "OUTPUT TEMPLATE"
+ for a description of available keys.
+ -J, --dump-single-json Simulate, quiet but print JSON
+ information for each command-line
+ argument. If the URL refers to a
+ playlist, dump the whole playlist
+ information in a single line.
+ --print-json Be quiet and print the video
+ information as JSON (video is still
+ being downloaded).
+ --newline Output progress bar as new lines
+ --no-progress Do not print progress bar
+ --console-title Display progress in console titlebar
+ -v, --verbose Print various debugging information
+ --dump-pages Print downloaded pages encoded using
+ base64 to debug problems (very verbose)
+ --write-pages Write downloaded intermediary pages to
+ files in the current directory to debug
+ problems
+ --print-traffic Display sent and read HTTP traffic
+ -C, --call-home Contact the youtube-dl server for
+ debugging
+ --no-call-home Do NOT contact the youtube-dl server
+ for debugging
## Workarounds:
- --encoding ENCODING Force the specified encoding (experimental)
- --no-check-certificate Suppress HTTPS certificate validation
- --prefer-insecure Use an unencrypted connection to retrieve
- information about the video. (Currently
- supported only for YouTube)
- --user-agent UA Specify a custom user agent
- --referer URL Specify a custom referer, use if the video
- access is restricted to one domain
- --add-header FIELD:VALUE Specify a custom HTTP header and its value,
- separated by a colon ':'. You can use this
- option multiple times
- --bidi-workaround Work around terminals that lack
- bidirectional text support. Requires bidiv
- or fribidi executable in PATH
- --sleep-interval SECONDS Number of seconds to sleep before each
- download when used alone or a lower bound
- of a range for randomized sleep before each
- download (minimum possible number of
- seconds to sleep) when used along with
- --max-sleep-interval.
- --max-sleep-interval SECONDS Upper bound of a range for randomized sleep
- before each download (maximum possible
- number of seconds to sleep). Must only be
- used along with --min-sleep-interval.
+ --encoding ENCODING Force the specified encoding
+ (experimental)
+ --no-check-certificate Suppress HTTPS certificate validation
+ --prefer-insecure Use an unencrypted connection to
+ retrieve information about the video.
+ (Currently supported only for YouTube)
+ --user-agent UA Specify a custom user agent
+ --referer URL Specify a custom referer, use if the
+ video access is restricted to one
+ domain
+ --add-header FIELD:VALUE Specify a custom HTTP header and its
+ value, separated by a colon ':'. You
+ can use this option multiple times
+ --bidi-workaround Work around terminals that lack
+ bidirectional text support. Requires
+ bidiv or fribidi executable in PATH
+ --sleep-interval SECONDS Number of seconds to sleep before each
+ download when used alone or a lower
+ bound of a range for randomized sleep
+ before each download (minimum possible
+ number of seconds to sleep) when used
+ along with --max-sleep-interval.
+ --max-sleep-interval SECONDS Upper bound of a range for randomized
+ sleep before each download (maximum
+ possible number of seconds to sleep).
+ Must only be used along with --min-
+ sleep-interval.
## Video Format Options:
- -f, --format FORMAT Video format code, see the "FORMAT
- SELECTION" for all the info
- --all-formats Download all available video formats
- --prefer-free-formats Prefer free video formats unless a specific
- one is requested
- -F, --list-formats List all available formats of requested
- videos
- --youtube-skip-dash-manifest Do not download the DASH manifests and
- related data on YouTube videos
- --merge-output-format FORMAT If a merge is required (e.g.
- bestvideo+bestaudio), output to given
- container format. One of mkv, mp4, ogg,
- webm, flv. Ignored if no merge is required
+ -f, --format FORMAT Video format code, see the "FORMAT
+ SELECTION" for all the info
+ --all-formats Download all available video formats
+ --prefer-free-formats Prefer free video formats unless a
+ specific one is requested
+ -F, --list-formats List all available formats of requested
+ videos
+ --youtube-skip-dash-manifest Do not download the DASH manifests and
+ related data on YouTube videos
+ --merge-output-format FORMAT If a merge is required (e.g.
+ bestvideo+bestaudio), output to given
+ container format. One of mkv, mp4, ogg,
+ webm, flv. Ignored if no merge is
+ required
## Subtitle Options:
- --write-sub Write subtitle file
- --write-auto-sub Write automatically generated subtitle file
- (YouTube only)
- --all-subs Download all the available subtitles of the
- video
- --list-subs List all available subtitles for the video
- --sub-format FORMAT Subtitle format, accepts formats
- preference, for example: "srt" or
- "ass/srt/best"
- --sub-lang LANGS Languages of the subtitles to download
- (optional) separated by commas, use --list-
- subs for available language tags
+ --write-sub Write subtitle file
+ --write-auto-sub Write automatically generated subtitle
+ file (YouTube only)
+ --all-subs Download all the available subtitles of
+ the video
+ --list-subs List all available subtitles for the
+ video
+ --sub-format FORMAT Subtitle format, accepts formats
+ preference, for example: "srt" or
+ "ass/srt/best"
+ --sub-lang LANGS Languages of the subtitles to download
+ (optional) separated by commas, use
+ --list-subs for available language tags
## Authentication Options:
- -u, --username USERNAME Login with this account ID
- -p, --password PASSWORD Account password. If this option is left
- out, youtube-dl will ask interactively.
- -2, --twofactor TWOFACTOR Two-factor authentication code
- -n, --netrc Use .netrc authentication data
- --video-password PASSWORD Video password (vimeo, youku)
+ -u, --username USERNAME Login with this account ID
+ -p, --password PASSWORD Account password. If this option is
+ left out, youtube-dl will ask
+ interactively.
+ -2, --twofactor TWOFACTOR Two-factor authentication code
+ -n, --netrc Use .netrc authentication data
+ --video-password PASSWORD Video password (vimeo, youku)
## Adobe Pass Options:
- --ap-mso MSO Adobe Pass multiple-system operator (TV
- provider) identifier, use --ap-list-mso for
- a list of available MSOs
- --ap-username USERNAME Multiple-system operator account login
- --ap-password PASSWORD Multiple-system operator account password.
- If this option is left out, youtube-dl will
- ask interactively.
- --ap-list-mso List all supported multiple-system
- operators
+ --ap-mso MSO Adobe Pass multiple-system operator (TV
+ provider) identifier, use --ap-list-mso
+ for a list of available MSOs
+ --ap-username USERNAME Multiple-system operator account login
+ --ap-password PASSWORD Multiple-system operator account
+ password. If this option is left out,
+ youtube-dl will ask interactively.
+ --ap-list-mso List all supported multiple-system
+ operators
## Post-processing Options:
- -x, --extract-audio Convert video files to audio-only files
- (requires ffmpeg or avconv and ffprobe or
- avprobe)
- --audio-format FORMAT Specify audio format: "best", "aac",
- "flac", "mp3", "m4a", "opus", "vorbis", or
- "wav"; "best" by default; No effect without
- -x
- --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert
- a value between 0 (better) and 9 (worse)
- for VBR or a specific bitrate like 128K
- (default 5)
- --recode-video FORMAT Encode the video to another format if
- necessary (currently supported:
- mp4|flv|ogg|webm|mkv|avi)
- --postprocessor-args ARGS Give these arguments to the postprocessor
- -k, --keep-video Keep the video file on disk after the post-
- processing; the video is erased by default
- --no-post-overwrites Do not overwrite post-processed files; the
- post-processed files are overwritten by
- default
- --embed-subs Embed subtitles in the video (only for mp4,
- webm and mkv videos)
- --embed-thumbnail Embed thumbnail in the audio as cover art
- --add-metadata Write metadata to the video file
- --metadata-from-title FORMAT Parse additional metadata like song title /
- artist from the video title. The format
- syntax is the same as --output. Regular
- expression with named capture groups may
- also be used. The parsed parameters replace
- existing values. Example: --metadata-from-
- title "%(artist)s - %(title)s" matches a
- title like "Coldplay - Paradise". Example
- (regex): --metadata-from-title
- "(?P.+?) - (?P.+)"
- --xattrs Write metadata to the video file's xattrs
- (using dublin core and xdg standards)
- --fixup POLICY Automatically correct known faults of the
- file. One of never (do nothing), warn (only
- emit a warning), detect_or_warn (the
- default; fix file if we can, warn
- otherwise)
- --prefer-avconv Prefer avconv over ffmpeg for running the
- postprocessors
- --prefer-ffmpeg Prefer ffmpeg over avconv for running the
- postprocessors (default)
- --ffmpeg-location PATH Location of the ffmpeg/avconv binary;
- either the path to the binary or its
- containing directory.
- --exec CMD Execute a command on the file after
- downloading and post-processing, similar to
- find's -exec syntax. Example: --exec 'adb
- push {} /sdcard/Music/ && rm {}'
- --convert-subs FORMAT Convert the subtitles to other format
- (currently supported: srt|ass|vtt|lrc)
+ -x, --extract-audio Convert video files to audio-only files
+ (requires ffmpeg/avconv and
+ ffprobe/avprobe)
+ --audio-format FORMAT Specify audio format: "best", "aac",
+ "flac", "mp3", "m4a", "opus", "vorbis",
+ or "wav"; "best" by default; No effect
+ without -x
+ --audio-quality QUALITY Specify ffmpeg/avconv audio quality,
+ insert a value between 0 (better) and 9
+ (worse) for VBR or a specific bitrate
+ like 128K (default 5)
+ --recode-video FORMAT Encode the video to another format if
+ necessary (currently supported:
+ mp4|flv|ogg|webm|mkv|avi)
+ --postprocessor-args ARGS Give these arguments to the
+ postprocessor
+ -k, --keep-video Keep the video file on disk after the
+ post-processing; the video is erased by
+ default
+ --no-post-overwrites Do not overwrite post-processed files;
+ the post-processed files are
+ overwritten by default
+ --embed-subs Embed subtitles in the video (only for
+ mp4, webm and mkv videos)
+ --embed-thumbnail Embed thumbnail in the audio as cover
+ art
+ --add-metadata Write metadata to the video file
+ --metadata-from-title FORMAT Parse additional metadata like song
+ title / artist from the video title.
+ The format syntax is the same as
+ --output. Regular expression with named
+ capture groups may also be used. The
+ parsed parameters replace existing
+ values. Example: --metadata-from-title
+ "%(artist)s - %(title)s" matches a
+ title like "Coldplay - Paradise".
+ Example (regex): --metadata-from-title
+ "(?P.+?) - (?P.+)"
+ --xattrs Write metadata to the video file's
+ xattrs (using dublin core and xdg
+ standards)
+ --fixup POLICY Automatically correct known faults of
+ the file. One of never (do nothing),
+ warn (only emit a warning),
+ detect_or_warn (the default; fix file
+ if we can, warn otherwise)
+ --prefer-avconv Prefer avconv over ffmpeg for running
+ the postprocessors
+ --prefer-ffmpeg Prefer ffmpeg over avconv for running
+ the postprocessors (default)
+ --ffmpeg-location PATH Location of the ffmpeg/avconv binary;
+ either the path to the binary or its
+ containing directory.
+ --exec CMD Execute a command on the file after
+ downloading and post-processing,
+ similar to find's -exec syntax.
+ Example: --exec 'adb push {}
+ /sdcard/Music/ && rm {}'
+ --convert-subs FORMAT Convert the subtitles to other format
+ (currently supported: srt|ass|vtt|lrc)
# CONFIGURATION
@@ -526,7 +563,7 @@ The basic usage is not to set any template arguments when downloading a single f
- `is_live` (boolean): Whether this video is a live stream or a fixed-length video
- `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
- `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
- - `format` (string): A human-readable description of the format
+ - `format` (string): A human-readable description of the format
- `format_id` (string): Format code specified by `--format`
- `format_note` (string): Additional info about the format
- `width` (numeric): Width of the video
@@ -583,7 +620,7 @@ Available for the media that is a track or a part of a music album:
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
- `release_year` (numeric): Year (YYYY) when the album was released
-Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
+Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
@@ -595,7 +632,7 @@ To use percent literals in an output template use `%%`. To output to stdout use
The current default template is `%(title)s-%(id)s.%(ext)s`.
-In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
+In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title.
#### Output template and Windows batch files
@@ -638,7 +675,7 @@ The general syntax for format selection is `--format FORMAT` or shorter `-f FORM
**tl;dr:** [navigate me to examples](#format-selection-examples).
-The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
+The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
@@ -723,7 +760,7 @@ Videos can be filtered by their upload date using the options `--date`, `--dateb
- Absolute dates: Dates in the format `YYYYMMDD`.
- Relative dates: Dates in the format `(now|today)[+-][0-9](day|week|month|year)(s)?`
-
+
Examples:
```bash
@@ -856,7 +893,7 @@ Since June 2012 ([#342](https://github.com/ytdl-org/youtube-dl/issues/342)) yout
### The exe throws an error due to missing `MSVCR100.dll`
-To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555).
+To run the exe you need to install first the [Microsoft Visual C++ 2010 Service Pack 1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe).
### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
@@ -881,7 +918,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
-In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
+In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt LOCALLY](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
@@ -963,6 +1000,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
python test/test_download.py
nosetests
+For Python versions 3.6 and later, you can use [pynose](https://pypi.org/project/pynose/) to implement `nosetests`. The original [nose](https://pypi.org/project/nose/) has not been upgraded for 3.10 and later.
+
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
If you want to create a build of youtube-dl yourself, you'll need
@@ -1032,9 +1071,11 @@ After you have ensured this site is distributing its content legally, you can fo
}
```
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
-6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
-7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
-8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
+6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test (actually, test case) then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note:
+ * the test names use the extractor class name **without the trailing `IE`**
+ * tests with `only_matching` key in test's dict are not counted.
+8. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
+9. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
$ flake8 youtube_dl/extractor/yourextractor.py
@@ -1052,7 +1093,7 @@ In any case, thank you very much for your contributions!
## youtube-dl coding conventions
-This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
+This section introduces guidelines for writing idiomatic, robust and future-proof extractor code.
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
@@ -1075,7 +1116,7 @@ Say you have some source dictionary `meta` that you've fetched as JSON with HTTP
```python
meta = self._download_json(url, video_id)
```
-
+
Assume at this point `meta`'s layout is:
```python
@@ -1119,7 +1160,7 @@ description = self._search_regex(
```
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present.
-
+
### Provide fallbacks
When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable.
@@ -1167,7 +1208,7 @@ r'(id|ID)=(?P\d+)'
#### Make regular expressions relaxed and flexible
When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
-
+
##### Example
Say you need to extract `title` from the following HTML code:
@@ -1191,7 +1232,7 @@ title = self._search_regex(
webpage, 'title', group='title')
```
-Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute:
+Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute:
The code definitely should not look like:
@@ -1292,27 +1333,114 @@ Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`]
Use `url_or_none` for safe URL processing.
-Use `try_get` for safe metadata extraction from parsed JSON.
+Use `traverse_obj` for safe metadata extraction from parsed JSON.
-Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
+Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
#### More examples
##### Safely extract optional description from parsed JSON
+
+When processing complex JSON, as often returned by site API requests or stashed in web pages for "hydration", you can use the `traverse_obj()` utility function to handle multiple fallback values and to ensure the expected type of metadata items. The function's docstring defines how the function works: also review usage in the codebase for more examples.
+
+In this example, a text `description`, or `None`, is pulled from the `.result.video[0].summary` member of the parsed JSON `response`, if available.
+
+```python
+description = traverse_obj(response, ('result', 'video', 0, 'summary', T(compat_str)))
+```
+`T(...)` is a shorthand for a set literal; if you hate people who still run Python 2.6, `T(type_or_transformation)` could be written as a set literal `{type_or_transformation}`.
+
+Some extractors use the older and less capable `try_get()` function in the same way.
+
```python
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
```
##### Safely extract more optional metadata
+
+In this example, various optional metadata values are extracted from the `.result.video[0]` member of the parsed JSON `response`, which is expected to be a JS object, parsed into a `dict`, with no crash if that isn't so, or if any of the target values are missing or invalid.
+
```python
-video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
+video = traverse_obj(response, ('result', 'video', 0, T(dict))) or {}
+# formerly:
+# video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
description = video.get('summary')
duration = float_or_none(video.get('durationMs'), scale=1000)
view_count = int_or_none(video.get('views'))
```
+#### Safely extract nested lists
+
+Suppose you've extracted JSON like this into a Python data structure named `media_json` using, say, the `_download_json()` or `_parse_json()` methods of `InfoExtractor`:
+```json
+{
+ "title": "Example video",
+ "comment": "try extracting this",
+ "media": [{
+ "type": "bad",
+ "size": 320,
+ "url": "https://some.cdn.site/bad.mp4"
+ }, {
+ "type": "streaming",
+ "url": "https://some.cdn.site/hls.m3u8"
+ }, {
+ "type": "super",
+ "size": 1280,
+ "url": "https://some.cdn.site/good.webm"
+ }],
+ "moreStuff": "more values",
+ ...
+}
+```
+
+Then extractor code like this can collect the various fields of the JSON:
+```python
+...
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ T,
+ traverse_obj,
+ txt_or_none,
+ url_or_none,
+)
+...
+ ...
+ info_dict = {}
+ # extract title and description if valid and not empty
+ info_dict.update(traverse_obj(media_json, {
+ 'title': ('title', T(txt_or_none)),
+ 'description': ('comment', T(txt_or_none)),
+ }))
+
+ # extract any recognisable media formats
+ fmts = []
+ # traverse into "media" list, extract `dict`s with desired keys
+ for fmt in traverse_obj(media_json, ('media', Ellipsis, {
+ 'format_id': ('type', T(txt_or_none)),
+ 'url': ('url', T(url_or_none)),
+ 'width': ('size', T(int_or_none)), })):
+ # bad `fmt` values were `None` and removed
+ if 'url' not in fmt:
+ continue
+ fmt_url = fmt['url'] # known to be valid URL
+ ext = determine_ext(fmt_url)
+ if ext == 'm3u8':
+ fmts.extend(self._extract_m3u8_formats(fmt_url, video_id, 'mp4', fatal=False))
+ else:
+ fmt['ext'] = ext
+ fmts.append(fmt)
+
+ # sort, raise if no formats
+ self._sort_formats(fmts)
+
+ info_dict['formats'] = fmts
+ ...
+```
+The extractor raises an exception rather than random crashes if the JSON structure changes so that no formats are found.
+
# EMBEDDING YOUTUBE-DL
youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new).
@@ -1369,7 +1497,11 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
# BUGS
-Bugs and suggestions should be reported at: . Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
+Bugs and suggestions should be reported in the issue tracker: ( is an alias for this). Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
+
+## Opening a bug report or suggestion
+
+Be sure to follow instructions provided **below** and **in the issue tracker**. Complete the appropriate issue template fully. Consider whether your problem is covered by an existing issue: if so, follow the discussion there. Avoid commenting on existing duplicate issues as such comments do not add to the discussion of the issue and are liable to be treated as spam.
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
```
@@ -1389,17 +1521,17 @@ $ youtube-dl -v
The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
-Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
+Finally please review your issue to avoid various common mistakes (you can and should use this as a checklist) listed below.
### Is the description of the issue itself sufficient?
-We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts.
+We often get issue reports that are hard to understand. To avoid subsequent clarifications, and to assist participants who are not native English speakers, please elaborate on what feature you are requesting, or what bug you want to be fixed.
-So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious
+Make sure that it's obvious
- What the problem is
- How it could be fixed
-- How your proposed solution would look like
+- How your proposed solution would look
If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
@@ -1409,14 +1541,14 @@ If your server has multiple IPs or you suspect censorship, adding `--call-home`
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
+### Is the issue already documented?
+
+Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. Initially, at least, use the search term `-label:duplicate` to focus on active issues. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
+
### Are you using the latest version?
Before reporting any issue, type `youtube-dl -U`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well.
-### Is the issue already documented?
-
-Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
-
### Why are existing options not enough?
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
diff --git a/devscripts/__init__.py b/devscripts/__init__.py
new file mode 100644
index 000000000..750dbdca7
--- /dev/null
+++ b/devscripts/__init__.py
@@ -0,0 +1 @@
+# Empty file needed to make devscripts.utils properly importable from outside
diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py
index 3d1391334..7db396a77 100755
--- a/devscripts/bash-completion.py
+++ b/devscripts/bash-completion.py
@@ -5,8 +5,12 @@ import os
from os.path import dirname as dirn
import sys
-sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
import youtube_dl
+from youtube_dl.compat import compat_open as open
+
+from utils import read_file
BASH_COMPLETION_FILE = "youtube-dl.bash-completion"
BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
@@ -18,9 +22,8 @@ def build_completion(opt_parser):
for option in group.option_list:
# for every long flag
opts_flag.append(option.get_opt_string())
- with open(BASH_COMPLETION_TEMPLATE) as f:
- template = f.read()
- with open(BASH_COMPLETION_FILE, "w") as f:
+ template = read_file(BASH_COMPLETION_TEMPLATE)
+ with open(BASH_COMPLETION_FILE, "w", encoding='utf-8') as f:
# just using the special char
filled_template = template.replace("{{flags}}", " ".join(opts_flag))
f.write(filled_template)
diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py
new file mode 100755
index 000000000..9fb1d2ba8
--- /dev/null
+++ b/devscripts/cli_to_api.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+"""
+This script displays the API parameters corresponding to a yt-dl command line
+
+Example:
+$ ./cli_to_api.py -f best
+{u'format': 'best'}
+$
+"""
+
+# Allow direct execution
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import youtube_dl
+from types import MethodType
+
+
+def cli_to_api(*opts):
+ YDL = youtube_dl.YoutubeDL
+
+ # to extract the parsed options, break out of YoutubeDL instantiation
+
+ # return options via this Exception
+ class ParseYTDLResult(Exception):
+ def __init__(self, result):
+ super(ParseYTDLResult, self).__init__('result')
+ self.opts = result
+
+ # replacement constructor that raises ParseYTDLResult
+ def ytdl_init(ydl, ydl_opts):
+ super(YDL, ydl).__init__(ydl_opts)
+ raise ParseYTDLResult(ydl_opts)
+
+ # patch in the constructor
+ YDL.__init__ = MethodType(ytdl_init, YDL)
+
+ # core parser
+ def parsed_options(argv):
+ try:
+ youtube_dl._real_main(list(argv))
+ except ParseYTDLResult as result:
+ return result.opts
+
+ # from https://github.com/yt-dlp/yt-dlp/issues/5859#issuecomment-1363938900
+ default = parsed_options([])
+
+ def neq_opt(a, b):
+ if a == b:
+ return False
+ if a is None and repr(type(object)).endswith(".utils.DateRange'>"):
+ return '0001-01-01 - 9999-12-31' != '{0}'.format(b)
+ return a != b
+
+ diff = dict((k, v) for k, v in parsed_options(opts).items() if neq_opt(default[k], v))
+ if 'postprocessors' in diff:
+ diff['postprocessors'] = [pp for pp in diff['postprocessors'] if pp not in default['postprocessors']]
+ return diff
+
+
+def main():
+ from pprint import PrettyPrinter
+
+ pprint = PrettyPrinter()
+ super_format = pprint.format
+
+ def format(object, context, maxlevels, level):
+ if repr(type(object)).endswith(".utils.DateRange'>"):
+ return '{0}: {1}>'.format(repr(object)[:-2], object), True, False
+ return super_format(object, context, maxlevels, level)
+
+ pprint.format = format
+
+ pprint.pprint(cli_to_api(*sys.argv))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py
index 2ddfa1096..320bcfc27 100644
--- a/devscripts/create-github-release.py
+++ b/devscripts/create-github-release.py
@@ -1,7 +1,6 @@
#!/usr/bin/env python
from __future__ import unicode_literals
-import io
import json
import mimetypes
import netrc
@@ -10,7 +9,9 @@ import os
import re
import sys
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
from youtube_dl.compat import (
compat_basestring,
@@ -22,6 +23,7 @@ from youtube_dl.utils import (
make_HTTPS_handler,
sanitized_Request,
)
+from utils import read_file
class GitHubReleaser(object):
@@ -89,8 +91,7 @@ def main():
changelog_file, version, build_path = args
- with io.open(changelog_file, encoding='utf-8') as inf:
- changelog = inf.read()
+ changelog = read_file(changelog_file)
mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog)
body = mobj.group(1) if mobj else ''
diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py
index 51d19dd33..ef8a39e0b 100755
--- a/devscripts/fish-completion.py
+++ b/devscripts/fish-completion.py
@@ -6,10 +6,13 @@ import os
from os.path import dirname as dirn
import sys
-sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
import youtube_dl
from youtube_dl.utils import shell_quote
+from utils import read_file, write_file
+
FISH_COMPLETION_FILE = 'youtube-dl.fish'
FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
@@ -38,11 +41,9 @@ def build_completion(opt_parser):
complete_cmd.extend(EXTRA_ARGS.get(long_option, []))
commands.append(shell_quote(complete_cmd))
- with open(FISH_COMPLETION_TEMPLATE) as f:
- template = f.read()
+ template = read_file(FISH_COMPLETION_TEMPLATE)
filled_template = template.replace('{{commands}}', '\n'.join(commands))
- with open(FISH_COMPLETION_FILE, 'w') as f:
- f.write(filled_template)
+ write_file(FISH_COMPLETION_FILE, filled_template)
parser = youtube_dl.parseOpts()[0]
diff --git a/devscripts/gh-pages/add-version.py b/devscripts/gh-pages/add-version.py
index 867ea0048..b84908f85 100755
--- a/devscripts/gh-pages/add-version.py
+++ b/devscripts/gh-pages/add-version.py
@@ -6,16 +6,21 @@ import sys
import hashlib
import os.path
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__)))))
+
+from devscripts.utils import read_file, write_file
+from youtube_dl.compat import compat_open as open
if len(sys.argv) <= 1:
print('Specify the version number as parameter')
sys.exit()
version = sys.argv[1]
-with open('update/LATEST_VERSION', 'w') as f:
- f.write(version)
+write_file('update/LATEST_VERSION', version)
-versions_info = json.load(open('update/versions.json'))
+versions_info = json.loads(read_file('update/versions.json'))
if 'signature' in versions_info:
del versions_info['signature']
@@ -39,5 +44,5 @@ for key, filename in filenames.items():
versions_info['versions'][version] = new_version
versions_info['latest'] = version
-with open('update/versions.json', 'w') as jsonf:
- json.dump(versions_info, jsonf, indent=4, sort_keys=True)
+with open('update/versions.json', 'w', encoding='utf-8') as jsonf:
+ json.dumps(versions_info, jsonf, indent=4, sort_keys=True)
diff --git a/devscripts/gh-pages/generate-download.py b/devscripts/gh-pages/generate-download.py
index a873d32ee..3e38e9299 100755
--- a/devscripts/gh-pages/generate-download.py
+++ b/devscripts/gh-pages/generate-download.py
@@ -2,14 +2,21 @@
from __future__ import unicode_literals
import json
+import os.path
+import sys
-versions_info = json.load(open('update/versions.json'))
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+
+from utils import read_file, write_file
+
+versions_info = json.loads(read_file('update/versions.json'))
version = versions_info['latest']
version_dict = versions_info['versions'][version]
# Read template page
-with open('download.html.in', 'r', encoding='utf-8') as tmplf:
- template = tmplf.read()
+template = read_file('download.html.in')
template = template.replace('@PROGRAM_VERSION@', version)
template = template.replace('@PROGRAM_URL@', version_dict['bin'][0])
@@ -18,5 +25,5 @@ template = template.replace('@EXE_URL@', version_dict['exe'][0])
template = template.replace('@EXE_SHA256SUM@', version_dict['exe'][1])
template = template.replace('@TAR_URL@', version_dict['tar'][0])
template = template.replace('@TAR_SHA256SUM@', version_dict['tar'][1])
-with open('download.html', 'w', encoding='utf-8') as dlf:
- dlf.write(template)
+
+write_file('download.html', template)
diff --git a/devscripts/gh-pages/update-copyright.py b/devscripts/gh-pages/update-copyright.py
index 61487f925..444595c48 100755
--- a/devscripts/gh-pages/update-copyright.py
+++ b/devscripts/gh-pages/update-copyright.py
@@ -5,17 +5,22 @@ from __future__ import with_statement, unicode_literals
import datetime
import glob
-import io # For Python 2 compatibility
import os
import re
+import sys
-year = str(datetime.datetime.now().year)
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__)))))
+
+from devscripts.utils import read_file, write_file
+from youtube_dl import compat_str
+
+year = compat_str(datetime.datetime.now().year)
for fn in glob.glob('*.html*'):
- with io.open(fn, encoding='utf-8') as f:
- content = f.read()
+ content = read_file(fn)
newc = re.sub(r'(?PCopyright © 2011-)(?P[0-9]{4})', 'Copyright © 2011-' + year, content)
if content != newc:
tmpFn = fn + '.part'
- with io.open(tmpFn, 'wt', encoding='utf-8') as outf:
- outf.write(newc)
+ write_file(tmpFn, newc)
os.rename(tmpFn, fn)
diff --git a/devscripts/gh-pages/update-feed.py b/devscripts/gh-pages/update-feed.py
index 506a62377..13a367d34 100755
--- a/devscripts/gh-pages/update-feed.py
+++ b/devscripts/gh-pages/update-feed.py
@@ -2,10 +2,16 @@
from __future__ import unicode_literals
import datetime
-import io
import json
+import os.path
import textwrap
+import sys
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
+from utils import write_file
atom_template = textwrap.dedent("""\
@@ -72,5 +78,4 @@ for v in versions:
entries_str = textwrap.indent(''.join(entries), '\t')
atom_template = atom_template.replace('@ENTRIES@', entries_str)
-with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file:
- atom_file.write(atom_template)
+write_file('update/releases.atom', atom_template)
diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py
index 531c93c70..06a8a474c 100755
--- a/devscripts/gh-pages/update-sites.py
+++ b/devscripts/gh-pages/update-sites.py
@@ -5,15 +5,17 @@ import sys
import os
import textwrap
+dirn = os.path.dirname
+
# We must be able to import youtube_dl
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__)))))
import youtube_dl
+from devscripts.utils import read_file, write_file
def main():
- with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
- template = tmplf.read()
+ template = read_file('supportedsites.html.in')
ie_htmls = []
for ie in youtube_dl.list_extractors(age_limit=None):
@@ -29,8 +31,7 @@ def main():
template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
- with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
- sitesf.write(template)
+ write_file('supportedsites.html', template)
if __name__ == '__main__':
diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py
index 226d1a5d6..5a9eb194f 100755
--- a/devscripts/make_contributing.py
+++ b/devscripts/make_contributing.py
@@ -1,10 +1,11 @@
#!/usr/bin/env python
from __future__ import unicode_literals
-import io
import optparse
import re
+from utils import read_file, write_file
+
def main():
parser = optparse.OptionParser(usage='%prog INFILE OUTFILE')
@@ -14,8 +15,7 @@ def main():
infile, outfile = args
- with io.open(infile, encoding='utf-8') as inf:
- readme = inf.read()
+ readme = read_file(infile)
bug_text = re.search(
r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1)
@@ -25,8 +25,7 @@ def main():
out = bug_text + dev_text
- with io.open(outfile, 'w', encoding='utf-8') as outf:
- outf.write(out)
+ write_file(outfile, out)
if __name__ == '__main__':
diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py
index b7ad23d83..65fa8169f 100644
--- a/devscripts/make_issue_template.py
+++ b/devscripts/make_issue_template.py
@@ -1,8 +1,11 @@
#!/usr/bin/env python
from __future__ import unicode_literals
-import io
import optparse
+import os.path
+import sys
+
+from utils import read_file, read_version, write_file
def main():
@@ -13,17 +16,11 @@ def main():
infile, outfile = args
- with io.open(infile, encoding='utf-8') as inf:
- issue_template_tmpl = inf.read()
+ issue_template_tmpl = read_file(infile)
- # Get the version from youtube_dl/version.py without importing the package
- exec(compile(open('youtube_dl/version.py').read(),
- 'youtube_dl/version.py', 'exec'))
+ out = issue_template_tmpl % {'version': read_version()}
- out = issue_template_tmpl % {'version': locals()['__version__']}
-
- with io.open(outfile, 'w', encoding='utf-8') as outf:
- outf.write(out)
+ write_file(outfile, out)
if __name__ == '__main__':
main()
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index 878ae72b1..5b8b123a4 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -1,28 +1,49 @@
from __future__ import unicode_literals, print_function
from inspect import getsource
-import io
import os
from os.path import dirname as dirn
+import re
import sys
print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
-sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
lazy_extractors_filename = sys.argv[1]
if os.path.exists(lazy_extractors_filename):
os.remove(lazy_extractors_filename)
+# Py2: may be confused by leftover lazy_extractors.pyc
+if sys.version_info[0] < 3:
+ for c in ('c', 'o'):
+ try:
+ os.remove(lazy_extractors_filename + 'c')
+ except OSError:
+ pass
+
+from devscripts.utils import read_file, write_file
+from youtube_dl.compat import compat_register_utf8
+
+compat_register_utf8()
from youtube_dl.extractor import _ALL_CLASSES
from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
-with open('devscripts/lazy_load_template.py', 'rt') as f:
- module_template = f.read()
+module_template = read_file('devscripts/lazy_load_template.py')
+
+
+def get_source(m):
+ return re.sub(r'(?m)^\s*#.*\n', '', getsource(m))
+
module_contents = [
- module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
- 'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
+ module_template,
+ get_source(InfoExtractor.suitable),
+ get_source(InfoExtractor._match_valid_url) + '\n',
+ 'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
+ # needed for suitable() methods of Youtube extractor (see #28780)
+ 'from youtube_dl.utils import parse_qs, variadic\n',
+]
ie_template = '''
class {name}({bases}):
@@ -54,7 +75,7 @@ def build_lazy_ie(ie, name):
valid_url=valid_url,
module=ie.__module__)
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
- s += '\n' + getsource(ie.suitable)
+ s += '\n' + get_source(ie.suitable)
if hasattr(ie, '_make_valid_url'):
# search extractors
s += make_valid_template.format(valid_url=ie._make_valid_url())
@@ -94,7 +115,17 @@ for ie in ordered_cls:
module_contents.append(
'_ALL_CLASSES = [{0}]'.format(', '.join(names)))
-module_src = '\n'.join(module_contents) + '\n'
+module_src = '\n'.join(module_contents)
-with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
- f.write(module_src)
+write_file(lazy_extractors_filename, module_src + '\n')
+
+# work around JVM byte code module limit in Jython
+if sys.platform.startswith('java') and sys.version_info[:2] == (2, 7):
+ import subprocess
+ from youtube_dl.compat import compat_subprocess_get_DEVNULL
+ # if Python 2.7 is available, use it to compile the module for Jython
+ try:
+ # if Python 2.7 is available, use it to compile the module for Jython
+ subprocess.check_call(['python2.7', '-m', 'py_compile', lazy_extractors_filename], stdout=compat_subprocess_get_DEVNULL())
+ except Exception:
+ pass
diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py
index 8fbce0796..7a5b04dcc 100755
--- a/devscripts/make_readme.py
+++ b/devscripts/make_readme.py
@@ -1,8 +1,14 @@
from __future__ import unicode_literals
-import io
-import sys
+import os.path
import re
+import sys
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
+from utils import read_file
+from youtube_dl.compat import compat_open as open
README_FILE = 'README.md'
helptext = sys.stdin.read()
@@ -10,8 +16,7 @@ helptext = sys.stdin.read()
if isinstance(helptext, bytes):
helptext = helptext.decode('utf-8')
-with io.open(README_FILE, encoding='utf-8') as f:
- oldreadme = f.read()
+oldreadme = read_file(README_FILE)
header = oldreadme[:oldreadme.index('# OPTIONS')]
footer = oldreadme[oldreadme.index('# CONFIGURATION'):]
@@ -20,7 +25,7 @@ options = helptext[helptext.index(' General Options:') + 19:]
options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options)
options = '# OPTIONS\n' + options + '\n'
-with io.open(README_FILE, 'w', encoding='utf-8') as f:
+with open(README_FILE, 'w', encoding='utf-8') as f:
f.write(header)
f.write(options)
f.write(footer)
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
index 764795bc5..c424d18d7 100644
--- a/devscripts/make_supportedsites.py
+++ b/devscripts/make_supportedsites.py
@@ -1,17 +1,19 @@
#!/usr/bin/env python
from __future__ import unicode_literals
-import io
import optparse
-import os
+import os.path
import sys
-
# Import youtube_dl
-ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
-sys.path.insert(0, ROOT_DIR)
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
import youtube_dl
+from utils import write_file
+
def main():
parser = optparse.OptionParser(usage='%prog OUTFILE.md')
@@ -38,8 +40,7 @@ def main():
' - ' + md + '\n'
for md in gen_ies_md(ies))
- with io.open(outfile, 'w', encoding='utf-8') as outf:
- outf.write(out)
+ write_file(outfile, out)
if __name__ == '__main__':
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py
index 76bf873e1..0090ada3e 100644
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@@ -1,13 +1,13 @@
from __future__ import unicode_literals
-import io
import optparse
import os.path
import re
+from utils import read_file, write_file
+
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
README_FILE = os.path.join(ROOT_DIR, 'README.md')
-
PREFIX = r'''%YOUTUBE-DL(1)
# NAME
@@ -29,8 +29,7 @@ def main():
outfile, = args
- with io.open(README_FILE, encoding='utf-8') as f:
- readme = f.read()
+ readme = read_file(README_FILE)
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
@@ -38,8 +37,7 @@ def main():
readme = filter_options(readme)
- with io.open(outfile, 'w', encoding='utf-8') as outf:
- outf.write(readme)
+ write_file(outfile, readme)
def filter_options(readme):
diff --git a/devscripts/utils.py b/devscripts/utils.py
new file mode 100644
index 000000000..2d072d2e0
--- /dev/null
+++ b/devscripts/utils.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import argparse
+import functools
+import os.path
+import subprocess
+import sys
+
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
+
+from youtube_dl.compat import (
+ compat_kwargs,
+ compat_open as open,
+)
+
+
+def read_file(fname):
+ with open(fname, encoding='utf-8') as f:
+ return f.read()
+
+
+def write_file(fname, content, mode='w'):
+ with open(fname, mode, encoding='utf-8') as f:
+ return f.write(content)
+
+
+def read_version(fname='youtube_dl/version.py'):
+ """Get the version without importing the package"""
+ exec(compile(read_file(fname), fname, 'exec'))
+ return locals()['__version__']
+
+
+def get_filename_args(has_infile=False, default_outfile=None):
+ parser = argparse.ArgumentParser()
+ if has_infile:
+ parser.add_argument('infile', help='Input file')
+ kwargs = {'nargs': '?', 'default': default_outfile} if default_outfile else {}
+ kwargs['help'] = 'Output file'
+ parser.add_argument('outfile', **compat_kwargs(kwargs))
+
+ opts = parser.parse_args()
+ if has_infile:
+ return opts.infile, opts.outfile
+ return opts.outfile
+
+
+def compose_functions(*functions):
+ return lambda x: functools.reduce(lambda y, f: f(y), functions, x)
+
+
+def run_process(*args, **kwargs):
+ kwargs.setdefault('text', True)
+ kwargs.setdefault('check', True)
+ kwargs.setdefault('capture_output', True)
+ if kwargs['text']:
+ kwargs.setdefault('encoding', 'utf-8')
+ kwargs.setdefault('errors', 'replace')
+ kwargs = compat_kwargs(kwargs)
+ return subprocess.run(args, **kwargs)
diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py
index 60aaf76cc..ebd552fcb 100755
--- a/devscripts/zsh-completion.py
+++ b/devscripts/zsh-completion.py
@@ -7,6 +7,8 @@ import sys
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
import youtube_dl
+from utils import read_file, write_file
+
ZSH_COMPLETION_FILE = "youtube-dl.zsh"
ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in"
@@ -34,15 +36,13 @@ def build_completion(opt_parser):
flags = [opt.get_opt_string() for opt in opts]
- with open(ZSH_COMPLETION_TEMPLATE) as f:
- template = f.read()
+ template = read_file(ZSH_COMPLETION_TEMPLATE)
template = template.replace("{{fileopts}}", "|".join(fileopts))
template = template.replace("{{diropts}}", "|".join(diropts))
template = template.replace("{{flags}}", " ".join(flags))
- with open(ZSH_COMPLETION_FILE, "w") as f:
- f.write(template)
+ write_file(ZSH_COMPLETION_FILE, template)
parser = youtube_dl.parseOpts()[0]
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index aa8026a32..ae2a6b8b0 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -1,9 +1,9 @@
# Supported sites
- **1tv**: Первый канал
- - **1up.com**
- **20min**
- **220.ro**
- **23video**
+ - **247sports**
- **24video**
- **3qsdn**: 3Q SDN
- **3sat**
@@ -46,10 +46,11 @@
- **Amara**
- **AMCNetworks**
- **AmericasTestKitchen**
+ - **AmericasTestKitchenSeason**
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **AnimeOnDemand**
- **Anvato**
- - **aol.com**
+ - **aol.com**: Yahoo screen and movies
- **APA**
- **Aparat**
- **AppleConnect**
@@ -82,6 +83,7 @@
- **awaan:video**
- **AZMedien**: AZ Medien videos
- **BaiduVideo**: 百度视频
+ - **bandaichannel**
- **Bandcamp**
- **Bandcamp:album**
- **Bandcamp:weekly**
@@ -89,7 +91,8 @@
- **bbc**: BBC
- **bbc.co.uk**: BBC iPlayer
- **bbc.co.uk:article**: BBC articles
- - **bbc.co.uk:iplayer:playlist**
+ - **bbc.co.uk:iplayer:episodes**
+ - **bbc.co.uk:iplayer:group**
- **bbc.co.uk:playlist**
- **BBVTV**
- **Beatport**
@@ -116,7 +119,6 @@
- **BitChuteChannel**
- **BleacherReport**
- **BleacherReportCMS**
- - **blinkx**
- **Bloomberg**
- **BokeCC**
- **BongaCams**
@@ -158,7 +160,8 @@
- **cbsnews**: CBS News
- **cbsnews:embed**
- **cbsnews:livevideo**: CBS News Live Videos
- - **CBSSports**
+ - **cbssports**
+ - **cbssports:embed**
- **CCMA**
- **CCTV**: 央视网
- **CDA**
@@ -192,8 +195,6 @@
- **CNNArticle**
- **CNNBlogs**
- **ComedyCentral**
- - **ComedyCentralFullEpisodes**
- - **ComedyCentralShortname**
- **ComedyCentralTV**
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
- **CONtv**
@@ -214,6 +215,7 @@
- **curiositystream**
- **curiositystream:collection**
- **CWTV**
+ - **DagelijkseKost**: dagelijksekost.een.be
- **DailyMail**
- **dailymotion**
- **dailymotion:playlist**
@@ -235,6 +237,7 @@
- **DiscoveryGo**
- **DiscoveryGoPlaylist**
- **DiscoveryNetworksDe**
+ - **DiscoveryPlus**
- **DiscoveryVR**
- **Disney**
- **dlive:stream**
@@ -330,6 +333,7 @@
- **Gaskrank**
- **Gazeta**
- **GDCVault**
+ - **GediDigital**
- **generic**: Generic downloader that works on some sites
- **Gfycat**
- **GiantBomb**
@@ -355,6 +359,7 @@
- **HentaiStigma**
- **hetklokhuis**
- **hgtv.com:show**
+ - **HGTVDe**
- **HiDive**
- **HistoricFilms**
- **history:player**
@@ -377,6 +382,8 @@
- **HungamaSong**
- **Hypem**
- **ign.com**
+ - **IGNArticle**
+ - **IGNVideo**
- **IHeartRadio**
- **iheartradio:podcast**
- **imdb**: Internet Movie Database trailers
@@ -457,14 +464,14 @@
- **limelight**
- **limelight:channel**
- **limelight:channel_list**
+ - **LineLive**
+ - **LineLiveChannel**
- **LineTV**
- **linkedin:learning**
- **linkedin:learning:course**
- **LinuxAcademy**
- **LiTV**
- **LiveJournal**
- - **LiveLeak**
- - **LiveLeakEmbed**
- **livestream**
- **livestream:original**
- **LnkGo**
@@ -482,6 +489,7 @@
- **mangomolo:live**
- **mangomolo:video**
- **ManyVids**
+ - **MaoriTV**
- **Markiza**
- **MarkizaPage**
- **massengeschmack.tv**
@@ -506,6 +514,9 @@
- **Mgoon**
- **MGTV**: 芒果TV
- **MiaoPai**
+ - **minds**
+ - **minds:channel**
+ - **minds:group**
- **MinistryGrid**
- **Minoto**
- **miomio.tv**
@@ -514,6 +525,7 @@
- **mixcloud:playlist**
- **mixcloud:user**
- **MLB**
+ - **MLBVideo**
- **Mnet**
- **MNetTV**
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
@@ -535,6 +547,7 @@
- **mtv:video**
- **mtvjapan**
- **mtvservices:embedded**
+ - **MTVUutisetArticle**
- **MuenchenTV**: münchen.tv
- **mva**: Microsoft Virtual Academy videos
- **mva:course**: Microsoft Virtual Academy courses
@@ -668,12 +681,14 @@
- **OutsideTV**
- **PacktPub**
- **PacktPubCourse**
+ - **PalcoMP3:artist**
+ - **PalcoMP3:song**
+ - **PalcoMP3:video**
- **pandora.tv**: 판도라TV
- **ParamountNetwork**
- **parliamentlive.tv**: UK parliament videos
- **Patreon**
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
- - **pcmag**
- **PearVideo**
- **PeerTube**
- **People**
@@ -695,6 +710,7 @@
- **play.fm**
- **player.sky.it**
- **PlayPlusTV**
+ - **PlayStuff**
- **PlaysTV**
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
- **Playvid**
@@ -800,6 +816,7 @@
- **safari:course**: safaribooksonline.com online courses
- **SAKTV**
- **SaltTV**
+ - **SampleFocus**
- **Sapo**: SAPO Vídeos
- **savefrom.net**
- **SBS**: sbs.com.au
@@ -822,6 +839,9 @@
- **ShahidShow**
- **Shared**: shared.sx
- **ShowRoomLive**
+ - **simplecast**
+ - **simplecast:episode**
+ - **simplecast:podcast**
- **Sina**
- **sky.it**
- **sky:news**
@@ -859,6 +879,8 @@
- **Sport5**
- **SportBox**
- **SportDeutschland**
+ - **spotify**
+ - **spotify:show**
- **Spreaker**
- **SpreakerPage**
- **SpreakerShow**
@@ -872,6 +894,9 @@
- **Steam**
- **Stitcher**
- **StitcherShow**
+ - **StoryFire**
+ - **StoryFireSeries**
+ - **StoryFireUser**
- **Streamable**
- **streamcloud.eu**
- **StreamCZ**
@@ -940,12 +965,13 @@
- **TNAFlixNetworkEmbed**
- **toggle**
- **ToonGoggles**
- - **Tosh**: Tosh.0
- **tou.tv**
- **Toypics**: Toypics video
- **ToypicsUser**: Toypics user profile
- **TrailerAddict** (Currently broken)
- **Trilulilu**
+ - **Trovo**
+ - **TrovoVod**
- **TruNews**
- **TruTV**
- **Tube8**
@@ -1039,6 +1065,7 @@
- **Vidbit**
- **Viddler**
- **Videa**
+ - **video.arnes.si**: Arnes Video
- **video.google:search**: Google Video search
- **video.sky.it**
- **video.sky.it:live**
@@ -1053,7 +1080,6 @@
- **vidme**
- **vidme:user**
- **vidme:user:likes**
- - **Vidzi**
- **vier**: vier.be and vijf.be
- **vier:videos**
- **viewlift**
@@ -1098,6 +1124,7 @@
- **vrv**
- **vrv:series**
- **VShare**
+ - **VTM**
- **VTXTV**
- **vube**: Vube.com
- **VuClip**
@@ -1133,7 +1160,7 @@
- **WWE**
- **XBef**
- **XboxClips**
- - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing
+ - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing
- **XHamster**
- **XHamsterEmbed**
- **XHamsterUser**
@@ -1192,5 +1219,8 @@
- **ZattooLive**
- **ZDF**
- **ZDFChannel**
+ - **Zhihu**
- **zingmp3**: mp3.zing.vn
+ - **zingmp3:album**
+ - **zoom**
- **Zype**
diff --git a/test/helper.py b/test/helper.py
index e62aab11e..6f2129eff 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -1,22 +1,24 @@
from __future__ import unicode_literals
import errno
-import io
import hashlib
import json
import os.path
import re
-import types
import ssl
import sys
+import types
+import unittest
import youtube_dl.extractor
from youtube_dl import YoutubeDL
from youtube_dl.compat import (
+ compat_open as open,
compat_os_name,
compat_str,
)
from youtube_dl.utils import (
+ IDENTITY,
preferredencoding,
write_string,
)
@@ -27,10 +29,10 @@ def get_params(override=None):
"parameters.json")
LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"local_parameters.json")
- with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
+ with open(PARAMETERS_FILE, encoding='utf-8') as pf:
parameters = json.load(pf)
if os.path.exists(LOCAL_PARAMETERS_FILE):
- with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
+ with open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
parameters.update(json.load(pf))
if override:
parameters.update(override)
@@ -72,7 +74,8 @@ class FakeYDL(YoutubeDL):
def to_screen(self, s, skip_eol=None):
print(s)
- def trouble(self, s, tb=None):
+ def trouble(self, *args, **kwargs):
+ s = args[0] if len(args) > 0 else kwargs.get('message', 'Missing message')
raise Exception(s)
def download(self, x):
@@ -89,6 +92,17 @@ class FakeYDL(YoutubeDL):
self.report_warning = types.MethodType(report_warning, self)
+class FakeLogger(object):
+ def debug(self, msg):
+ pass
+
+ def warning(self, msg):
+ pass
+
+ def error(self, msg):
+ pass
+
+
def gettestcases(include_onlymatching=False):
for ie in youtube_dl.extractor.gen_extractors():
for tc in ie.get_testcases(include_onlymatching):
@@ -128,6 +142,12 @@ def expect_value(self, got, expected, field):
self.assertTrue(
contains_str in got,
'field %s (value: %r) should contain %r' % (field, got, contains_str))
+ elif isinstance(expected, compat_str) and re.match(r'lambda \w+:', expected):
+ fn = eval(expected)
+ suite = expected.split(':', 1)[1].strip()
+ self.assertTrue(
+ fn(got),
+ 'Expected field %s to meet condition %s, but value %r failed ' % (field, suite, got))
elif isinstance(expected, type):
self.assertTrue(
isinstance(got, expected),
@@ -137,7 +157,7 @@ def expect_value(self, got, expected, field):
elif isinstance(expected, list) and isinstance(got, list):
self.assertEqual(
len(expected), len(got),
- 'Expect a list of length %d, but got a list of length %d for field %s' % (
+ 'Expected a list of length %d, but got a list of length %d for field %s' % (
len(expected), len(got), field))
for index, (item_got, item_expected) in enumerate(zip(got, expected)):
type_got = type(item_got)
@@ -161,18 +181,18 @@ def expect_value(self, got, expected, field):
op, _, expected_num = expected.partition(':')
expected_num = int(expected_num)
if op == 'mincount':
- assert_func = assertGreaterEqual
+ assert_func = self.assertGreaterEqual
msg_tmpl = 'Expected %d items in field %s, but only got %d'
elif op == 'maxcount':
- assert_func = assertLessEqual
+ assert_func = self.assertLessEqual
msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
elif op == 'count':
- assert_func = assertEqual
+ assert_func = self.assertEqual
msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
else:
assert False
assert_func(
- self, len(got), expected_num,
+ len(got), expected_num,
msg_tmpl % (expected_num, field, len(got)))
return
self.assertEqual(
@@ -242,27 +262,6 @@ def assertRegexpMatches(self, text, regexp, msg=None):
self.assertTrue(m, msg)
-def assertGreaterEqual(self, got, expected, msg=None):
- if not (got >= expected):
- if msg is None:
- msg = '%r not greater than or equal to %r' % (got, expected)
- self.assertTrue(got >= expected, msg)
-
-
-def assertLessEqual(self, got, expected, msg=None):
- if not (got <= expected):
- if msg is None:
- msg = '%r not less than or equal to %r' % (got, expected)
- self.assertTrue(got <= expected, msg)
-
-
-def assertEqual(self, got, expected, msg=None):
- if not (got == expected):
- if msg is None:
- msg = '%r not equal to %r' % (got, expected)
- self.assertTrue(got == expected, msg)
-
-
def expect_warnings(ydl, warnings_re):
real_warning = ydl.report_warning
@@ -280,3 +279,7 @@ def http_server_port(httpd):
else:
sock = httpd.socket
return sock.getsockname()[1]
+
+
+def expectedFailureIf(cond):
+ return unittest.expectedFailure if cond else IDENTITY
diff --git a/test/parameters.json b/test/parameters.json
index 65fd54428..864c9d130 100644
--- a/test/parameters.json
+++ b/test/parameters.json
@@ -18,7 +18,6 @@
"noprogress": false,
"outtmpl": "%(id)s.%(ext)s",
"password": null,
- "playlistend": -1,
"playliststart": 1,
"prefer_free_formats": false,
"quiet": false,
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index dd69a681b..09100a1d6 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -3,19 +3,37 @@
from __future__ import unicode_literals
# Allow direct execution
-import io
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
-from youtube_dl.compat import compat_etree_fromstring, compat_http_server
-from youtube_dl.extractor.common import InfoExtractor
-from youtube_dl.extractor import YoutubeIE, get_info_extractor
-from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
import threading
+from test.helper import (
+ expect_dict,
+ expect_value,
+ FakeYDL,
+ http_server_port,
+)
+from youtube_dl.compat import (
+ compat_etree_fromstring,
+ compat_http_server,
+ compat_open as open,
+)
+from youtube_dl.extractor.common import InfoExtractor
+from youtube_dl.extractor import (
+ get_info_extractor,
+ YoutubeIE,
+)
+from youtube_dl.utils import (
+ encode_data_uri,
+ ExtractorError,
+ RegexNotFoundError,
+ strip_jsonp,
+)
+
TEAPOT_RESPONSE_STATUS = 418
TEAPOT_RESPONSE_BODY = "418 I'm a teapot
"
@@ -35,13 +53,13 @@ class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler)
assert False
-class TestIE(InfoExtractor):
+class DummyIE(InfoExtractor):
pass
class TestInfoExtractor(unittest.TestCase):
def setUp(self):
- self.ie = TestIE(FakeYDL())
+ self.ie = DummyIE(FakeYDL())
def test_ie_key(self):
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
@@ -62,6 +80,7 @@ class TestInfoExtractor(unittest.TestCase):
+
'''
self.assertEqual(ie._og_search_title(html), 'Foo')
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
@@ -74,6 +93,7 @@ class TestInfoExtractor(unittest.TestCase):
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
+ self.assertEqual(ie._og_search_property('test4', html), 'unquoted-value')
def test_html_search_meta(self):
ie = self.ie
@@ -98,6 +118,74 @@ class TestInfoExtractor(unittest.TestCase):
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
+ def test_search_nextjs_data(self):
+ html = '''
+
+
+
+
+
+ Test _search_nextjs_data()
+
+
+
+
+
+
+'''
+ search = self.ie._search_nextjs_data(html, 'testID')
+ self.assertEqual(search['props']['pageProps']['video']['id'], 'testid')
+ search = self.ie._search_nextjs_data(
+ 'no next.js data here, move along', 'testID', default={'status': 0})
+ self.assertEqual(search['status'], 0)
+
+ def test_search_nuxt_data(self):
+ html = '''
+
+
+
+
+ Nuxt.js Test Page
+
+
+
+
+
+
+
+
+
+'''
+ search = self.ie._search_nuxt_data(html, 'testID')
+ self.assertEqual(search['track']['id'], 'testid')
+
def test_search_json_ld_realworld(self):
# https://github.com/ytdl-org/youtube-dl/issues/23306
expect_dict(
@@ -346,6 +434,24 @@ class TestInfoExtractor(unittest.TestCase):
}],
})
+ # from https://0000.studio/
+ # with type attribute but without extension in URL
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://0000.studio',
+ r'''
+
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92',
+ 'ext': 'mp4',
+ }],
+ })
+
def test_extract_jwplayer_data_realworld(self):
# from http://www.suffolk.edu/sjc/
expect_dict(
@@ -799,8 +905,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
]
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
- with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
+ mode='r', encoding='utf-8') as f:
formats = self.ie._parse_m3u8_formats(
f.read(), m3u8_url, ext='mp4')
self.ie._sort_formats(formats)
@@ -890,7 +996,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'tbr': 5997.485,
'width': 1920,
'height': 1080,
- }]
+ }],
+ {},
), (
# https://github.com/ytdl-org/youtube-dl/pull/14844
'urls_only',
@@ -973,7 +1080,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'tbr': 4400,
'width': 1920,
'height': 1080,
- }]
+ }],
+ {},
), (
# https://github.com/ytdl-org/youtube-dl/issues/20346
# Media considered unfragmented even though it contains
@@ -1019,18 +1127,185 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'width': 360,
'height': 360,
'fps': 30,
- }]
+ }],
+ {},
+ ), (
+ # https://github.com/ytdl-org/youtube-dl/issues/30235
+ # Bento4 generated test mpd
+ # mp4dash --mpd-name=manifest.mpd --no-split --use-segment-list mediafiles
+ 'url_and_range',
+ 'http://unknown/manifest.mpd', # mpd_url
+ 'http://unknown/', # mpd_base_url
+ [{
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'fragment_base_url': 'http://unknown/',
+ 'ext': 'm4a',
+ 'format_id': 'audio-und-mp4a.40.2',
+ 'format_note': 'DASH audio',
+ 'container': 'm4a_dash',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'none',
+ 'tbr': 98.808,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'fragment_base_url': 'http://unknown/',
+ 'ext': 'mp4',
+ 'format_id': 'video-avc1',
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4D401E',
+ 'tbr': 699.597,
+ 'width': 768,
+ 'height': 432
+ }],
+ {},
+ ), (
+ # https://github.com/ytdl-org/youtube-dl/issues/27575
+ # GPAC generated test mpd
+ # MP4Box -dash 10000 -single-file -out manifest.mpd mediafiles
+ 'range_only',
+ 'http://unknown/manifest.mpd', # mpd_url
+ 'http://unknown/', # mpd_base_url
+ [{
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'fragment_base_url': 'http://unknown/audio_dashinit.mp4',
+ 'ext': 'm4a',
+ 'format_id': '2',
+ 'format_note': 'DASH audio',
+ 'container': 'm4a_dash',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'none',
+ 'tbr': 98.096,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'fragment_base_url': 'http://unknown/video_dashinit.mp4',
+ 'ext': 'mp4',
+ 'format_id': '1',
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4D401E',
+ 'tbr': 526.987,
+ 'width': 768,
+ 'height': 432
+ }],
+ {},
+ ), (
+ 'subtitles',
+ 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/',
+ [{
+ 'format_id': 'audio=128001',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'm4a',
+ 'tbr': 128.001,
+ 'asr': 48000,
+ 'format_note': 'DASH audio',
+ 'container': 'm4a_dash',
+ 'vcodec': 'none',
+ 'acodec': 'mp4a.40.2',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ }, {
+ 'format_id': 'video=100000',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'mp4',
+ 'width': 336,
+ 'height': 144,
+ 'tbr': 100,
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'vcodec': 'avc1.4D401F',
+ 'acodec': 'none',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ }, {
+ 'format_id': 'video=326000',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'mp4',
+ 'width': 562,
+ 'height': 240,
+ 'tbr': 326,
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'vcodec': 'avc1.4D401F',
+ 'acodec': 'none',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ }, {
+ 'format_id': 'video=698000',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'mp4',
+ 'width': 844,
+ 'height': 360,
+ 'tbr': 698,
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'vcodec': 'avc1.4D401F',
+ 'acodec': 'none',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ }, {
+ 'format_id': 'video=1493000',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'mp4',
+ 'width': 1126,
+ 'height': 480,
+ 'tbr': 1493,
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'vcodec': 'avc1.4D401F',
+ 'acodec': 'none',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ }, {
+ 'format_id': 'video=4482000',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'ext': 'mp4',
+ 'width': 1688,
+ 'height': 720,
+ 'tbr': 4482,
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'vcodec': 'avc1.4D401F',
+ 'acodec': 'none',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ }],
+ {
+ 'en': [
+ {
+ 'ext': 'mp4',
+ 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+ 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
+ 'protocol': 'http_dash_segments',
+ }
+ ]
+ },
)
]
- for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES:
- with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
- mode='r', encoding='utf-8') as f:
- formats = self.ie._parse_mpd_formats(
+ for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES:
+ with open('./test/testdata/mpd/%s.mpd' % mpd_file,
+ mode='r', encoding='utf-8') as f:
+ formats, subtitles = self.ie._parse_mpd_formats_and_subtitles(
compat_etree_fromstring(f.read().encode('utf-8')),
mpd_base_url=mpd_base_url, mpd_url=mpd_url)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
+ expect_value(self, subtitles, expected_subtitles, None)
def test_parse_f4m_formats(self):
_TEST_CASES = [
@@ -1051,8 +1326,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
]
for f4m_file, f4m_url, expected_formats in _TEST_CASES:
- with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/f4m/%s.f4m' % f4m_file,
+ mode='r', encoding='utf-8') as f:
formats = self.ie._parse_f4m_formats(
compat_etree_fromstring(f.read().encode('utf-8')),
f4m_url, None)
@@ -1099,8 +1374,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
]
for xspf_file, xspf_url, expected_entries in _TEST_CASES:
- with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/xspf/%s.xspf' % xspf_file,
+ mode='r', encoding='utf-8') as f:
entries = self.ie._parse_xspf(
compat_etree_fromstring(f.read().encode('utf-8')),
xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 4d62ba145..d994682b2 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -10,14 +10,31 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import copy
+import json
-from test.helper import FakeYDL, assertRegexpMatches
+from test.helper import (
+ FakeYDL,
+ assertRegexpMatches,
+ try_rm,
+)
from youtube_dl import YoutubeDL
-from youtube_dl.compat import compat_str, compat_urllib_error
+from youtube_dl.compat import (
+ compat_http_cookiejar_Cookie,
+ compat_http_cookies_SimpleCookie,
+ compat_kwargs,
+ compat_open as open,
+ compat_str,
+ compat_urllib_error,
+)
+
from youtube_dl.extractor import YoutubeIE
from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.postprocessor.common import PostProcessor
-from youtube_dl.utils import ExtractorError, match_filter_func
+from youtube_dl.utils import (
+ ExtractorError,
+ match_filter_func,
+ traverse_obj,
+)
TEST_URL = 'http://localhost/sample.mp4'
@@ -29,11 +46,14 @@ class YDL(FakeYDL):
self.msgs = []
def process_info(self, info_dict):
- self.downloaded_info_dicts.append(info_dict)
+ self.downloaded_info_dicts.append(info_dict.copy())
def to_screen(self, msg):
self.msgs.append(msg)
+ def dl(self, *args, **kwargs):
+ assert False, 'Downloader must not be invoked for test_YoutubeDL'
+
def _make_result(formats, **kwargs):
res = {
@@ -42,8 +62,9 @@ def _make_result(formats, **kwargs):
'title': 'testttitle',
'extractor': 'testex',
'extractor_key': 'TestEx',
+ 'webpage_url': 'http://example.com/watch?v=shenanigans',
}
- res.update(**kwargs)
+ res.update(**compat_kwargs(kwargs))
return res
@@ -633,13 +654,20 @@ class TestYoutubeDL(unittest.TestCase):
'title2': '%PATH%',
}
- def fname(templ):
- ydl = YoutubeDL({'outtmpl': templ})
+ def fname(templ, na_placeholder='NA'):
+ params = {'outtmpl': templ}
+ if na_placeholder != 'NA':
+ params['outtmpl_na_placeholder'] = na_placeholder
+ ydl = YoutubeDL(params)
return ydl.prepare_filename(info)
self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
- # Replace missing fields with 'NA'
- self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
+ NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(id)s.%(ext)s'
+ # Replace missing fields with 'NA' by default
+ self.assertEqual(fname(NA_TEST_OUTTMPL), 'NA-NA-1234.mp4')
+ # Or by provided placeholder
+ self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4')
+ self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4')
self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4')
self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4')
@@ -674,12 +702,12 @@ class TestYoutubeDL(unittest.TestCase):
class SimplePP(PostProcessor):
def run(self, info):
- with open(audiofile, 'wt') as f:
+ with open(audiofile, 'w') as f:
f.write('EXAMPLE')
return [info['filepath']], info
def run_pp(params, PP):
- with open(filename, 'wt') as f:
+ with open(filename, 'w') as f:
f.write('EXAMPLE')
ydl = YoutubeDL(params)
ydl.add_post_processor(PP())
@@ -698,7 +726,7 @@ class TestYoutubeDL(unittest.TestCase):
class ModifierPP(PostProcessor):
def run(self, info):
- with open(info['filepath'], 'wt') as f:
+ with open(info['filepath'], 'w') as f:
f.write('MODIFIED')
return [], info
@@ -923,17 +951,11 @@ class TestYoutubeDL(unittest.TestCase):
# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
- class _YDL(YDL):
- def __init__(self, *args, **kwargs):
- super(_YDL, self).__init__(*args, **kwargs)
-
- def trouble(self, s, tb=None):
- pass
-
- ydl = _YDL({
+ ydl = YDL({
'format': 'extra',
'ignoreerrors': True,
})
+ ydl.trouble = lambda *_, **__: None
class VideoIE(InfoExtractor):
_VALID_URL = r'video:(?P\d+)'
@@ -990,6 +1012,180 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(downloaded['extractor'], 'Video')
self.assertEqual(downloaded['extractor_key'], 'Video')
+ def test_default_times(self):
+ """Test addition of missing upload/release/_date from /release_/timestamp"""
+ info = {
+ 'id': '1234',
+ 'url': TEST_URL,
+ 'title': 'Title',
+ 'ext': 'mp4',
+ 'timestamp': 1631352900,
+ 'release_timestamp': 1632995931,
+ }
+
+ params = {'simulate': True, }
+ ydl = FakeYDL(params)
+ out_info = ydl.process_ie_result(info)
+ self.assertTrue(isinstance(out_info['upload_date'], compat_str))
+ self.assertEqual(out_info['upload_date'], '20210911')
+ self.assertTrue(isinstance(out_info['release_date'], compat_str))
+ self.assertEqual(out_info['release_date'], '20210930')
+
+
+class TestYoutubeDLCookies(unittest.TestCase):
+
+ @staticmethod
+ def encode_cookie(cookie):
+ if not isinstance(cookie, dict):
+ cookie = vars(cookie)
+ for name, value in cookie.items():
+ yield name, compat_str(value)
+
+ @classmethod
+ def comparable_cookies(cls, cookies):
+ # Work around cookiejar cookies not being unicode strings
+ return sorted(map(tuple, map(sorted, map(cls.encode_cookie, cookies))))
+
+ def assertSameCookies(self, c1, c2, msg=None):
+ return self.assertEqual(
+ *map(self.comparable_cookies, (c1, c2)),
+ msg=msg)
+
+ def assertSameCookieStrings(self, c1, c2, msg=None):
+ return self.assertSameCookies(
+ *map(lambda c: compat_http_cookies_SimpleCookie(c).values(), (c1, c2)),
+ msg=msg)
+
+ def test_header_cookies(self):
+
+ ydl = FakeYDL()
+ ydl.report_warning = lambda *_, **__: None
+
+ def cookie(name, value, version=None, domain='', path='', secure=False, expires=None):
+ return compat_http_cookiejar_Cookie(
+ version or 0, name, value, None, False,
+ domain, bool(domain), bool(domain), path, bool(path),
+ secure, expires, False, None, None, rest={})
+
+ test_url, test_domain = (t % ('yt.dl',) for t in ('https://%s/test', '.%s'))
+
+ def test(encoded_cookies, cookies, headers=False, round_trip=None, error_re=None):
+ def _test():
+ ydl.cookiejar.clear()
+ ydl._load_cookies(encoded_cookies, autoscope=headers)
+ if headers:
+ ydl._apply_header_cookies(test_url)
+ data = {'url': test_url}
+ ydl._calc_headers(data)
+ self.assertSameCookies(
+ cookies, ydl.cookiejar,
+ 'Extracted cookiejar.Cookie is not the same')
+ if not headers:
+ self.assertSameCookieStrings(
+ data.get('cookies'), round_trip or encoded_cookies,
+ msg='Cookie is not the same as round trip')
+ ydl.__dict__['_YoutubeDL__header_cookies'] = []
+
+ try:
+ _test()
+ except AssertionError:
+ raise
+ except Exception as e:
+ if not error_re:
+ raise
+ assertRegexpMatches(self, e.args[0], error_re.join(('.*',) * 2))
+
+ test('test=value; Domain=' + test_domain, [cookie('test', 'value', domain=test_domain)])
+ test('test=value', [cookie('test', 'value')], error_re='Unscoped cookies are not allowed')
+ test('cookie1=value1; Domain={0}; Path=/test; cookie2=value2; Domain={0}; Path=/'.format(test_domain), [
+ cookie('cookie1', 'value1', domain=test_domain, path='/test'),
+ cookie('cookie2', 'value2', domain=test_domain, path='/')])
+ cookie_kw = compat_kwargs(
+ {'domain': test_domain, 'path': '/test', 'secure': True, 'expires': '9999999999', })
+ test('test=value; Domain={domain}; Path={path}; Secure; Expires={expires}'.format(**cookie_kw), [
+ cookie('test', 'value', **cookie_kw)])
+ test('test="value; "; path=/test; domain=' + test_domain, [
+ cookie('test', 'value; ', domain=test_domain, path='/test')],
+ round_trip='test="value\\073 "; Domain={0}; Path=/test'.format(test_domain))
+ test('name=; Domain=' + test_domain, [cookie('name', '', domain=test_domain)],
+ round_trip='name=""; Domain=' + test_domain)
+ test('test=value', [cookie('test', 'value', domain=test_domain)], headers=True)
+ test('cookie1=value; Domain={0}; cookie2=value'.format(test_domain), [],
+ headers=True, error_re='Invalid syntax')
+ ydl.report_warning = ydl.report_error
+ test('test=value', [], headers=True, error_re='Passing cookies as a header is a potential security risk')
+
+ def test_infojson_cookies(self):
+ TEST_FILE = 'test_infojson_cookies.info.json'
+ TEST_URL = 'https://example.com/example.mp4'
+ COOKIES = 'a=b; Domain=.example.com; c=d; Domain=.example.com'
+ COOKIE_HEADER = {'Cookie': 'a=b; c=d'}
+
+ ydl = FakeYDL()
+ ydl.process_info = lambda x: ydl._write_info_json('test', x, TEST_FILE)
+
+ def make_info(info_header_cookies=False, fmts_header_cookies=False, cookies_field=False):
+ fmt = {'url': TEST_URL}
+ if fmts_header_cookies:
+ fmt['http_headers'] = COOKIE_HEADER
+ if cookies_field:
+ fmt['cookies'] = COOKIES
+ return _make_result([fmt], http_headers=COOKIE_HEADER if info_header_cookies else None)
+
+ def test(initial_info, note):
+
+ def failure_msg(why):
+ return ' when '.join((why, note))
+
+ result = {}
+ result['processed'] = ydl.process_ie_result(initial_info)
+ self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
+ msg=failure_msg('No cookies set in cookiejar after initial process'))
+ ydl.cookiejar.clear()
+ with open(TEST_FILE) as infojson:
+ result['loaded'] = ydl.sanitize_info(json.load(infojson), True)
+ result['final'] = ydl.process_ie_result(result['loaded'].copy(), download=False)
+ self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
+ msg=failure_msg('No cookies set in cookiejar after final process'))
+ ydl.cookiejar.clear()
+ for key in ('processed', 'loaded', 'final'):
+ info = result[key]
+ self.assertIsNone(
+ traverse_obj(info, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False),
+ msg=failure_msg('Cookie header not removed in {0} result'.format(key)))
+ self.assertSameCookieStrings(
+ traverse_obj(info, ((None, ('formats', 0)), 'cookies'), get_all=False), COOKIES,
+ msg=failure_msg('No cookies field found in {0} result'.format(key)))
+
+ test({'url': TEST_URL, 'http_headers': COOKIE_HEADER, 'id': '1', 'title': 'x'}, 'no formats field')
+ test(make_info(info_header_cookies=True), 'info_dict header cokies')
+ test(make_info(fmts_header_cookies=True), 'format header cookies')
+ test(make_info(info_header_cookies=True, fmts_header_cookies=True), 'info_dict and format header cookies')
+ test(make_info(info_header_cookies=True, fmts_header_cookies=True, cookies_field=True), 'all cookies fields')
+ test(make_info(cookies_field=True), 'cookies format field')
+ test({'url': TEST_URL, 'cookies': COOKIES, 'id': '1', 'title': 'x'}, 'info_dict cookies field only')
+
+ try_rm(TEST_FILE)
+
+ def test_add_headers_cookie(self):
+ def check_for_cookie_header(result):
+ return traverse_obj(result, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False)
+
+ ydl = FakeYDL({'http_headers': {'Cookie': 'a=b'}})
+ ydl._apply_header_cookies(_make_result([])['webpage_url']) # Scope to input webpage URL: .example.com
+
+ fmt = {'url': 'https://example.com/video.mp4'}
+ result = ydl.process_ie_result(_make_result([fmt]), download=False)
+ self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies in result info_dict')
+ self.assertEqual(result.get('cookies'), 'a=b; Domain=.example.com', msg='No cookies were set in cookies field')
+ self.assertIn('a=b', ydl.cookiejar.get_cookie_header(fmt['url']), msg='No cookies were set in cookiejar')
+
+ fmt = {'url': 'https://wrong.com/video.mp4'}
+ result = ydl.process_ie_result(_make_result([fmt]), download=False)
+ self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies for wrong domain')
+ self.assertFalse(result.get('cookies'), msg='Cookies set in cookies field for wrong domain')
+ self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain')
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py
index 05f48bd74..4f9dd71ae 100644
--- a/test/test_YoutubeDLCookieJar.py
+++ b/test/test_YoutubeDLCookieJar.py
@@ -46,6 +46,20 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
# will be ignored
self.assertFalse(cookiejar._cookies)
+ def test_get_cookie_header(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
+ cookiejar.load(ignore_discard=True, ignore_expires=True)
+ header = cookiejar.get_cookie_header('https://www.foobar.foobar')
+ self.assertIn('HTTPONLY_COOKIE', header)
+
+ def test_get_cookies_for_url(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
+ cookiejar.load(ignore_discard=True, ignore_expires=True)
+ cookies = cookiejar.get_cookies_for_url('https://www.foobar.foobar/')
+ self.assertEqual(len(cookies), 2)
+ cookies = cookiejar.get_cookies_for_url('https://foobar.foobar/')
+ self.assertFalse(cookies)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_aes.py b/test/test_aes.py
index cc89fb6ab..0f181466b 100644
--- a/test/test_aes.py
+++ b/test/test_aes.py
@@ -8,7 +8,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text
+from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text, aes_ecb_encrypt
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
import base64
@@ -58,6 +58,13 @@ class TestAES(unittest.TestCase):
decrypted = (aes_decrypt_text(encrypted, password, 32))
self.assertEqual(decrypted, self.secret_msg)
+ def test_ecb_encrypt(self):
+ data = bytes_to_intlist(self.secret_msg)
+ encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key))
+ self.assertEqual(
+ encrypted,
+ b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py
index 6f5513faa..db98494ab 100644
--- a/test/test_age_restriction.py
+++ b/test/test_age_restriction.py
@@ -11,6 +11,7 @@ from test.helper import try_rm
from youtube_dl import YoutubeDL
+from youtube_dl.utils import DownloadError
def _download_restricted(url, filename, age):
@@ -26,7 +27,10 @@ def _download_restricted(url, filename, age):
ydl.add_default_info_extractors()
json_filename = os.path.splitext(filename)[0] + '.info.json'
try_rm(json_filename)
- ydl.download([url])
+ try:
+ ydl.download([url])
+ except DownloadError:
+ try_rm(json_filename)
res = os.path.exists(json_filename)
try_rm(json_filename)
return res
@@ -38,12 +42,12 @@ class TestAgeRestriction(unittest.TestCase):
self.assertFalse(_download_restricted(url, filename, age))
def test_youtube(self):
- self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
+ self._assert_restricted('HtVdAasjOgU', 'HtVdAasjOgU.mp4', 10)
def test_youporn(self):
self._assert_restricted(
- 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
- '505835.mp4', 2, old_age=25)
+ 'https://www.youporn.com/watch/16715086/sex-ed-in-detention-18-asmr/',
+ '16715086.mp4', 2, old_age=25)
if __name__ == '__main__':
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index df6d81b5d..26df356b4 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -66,18 +66,9 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])
- # def test_youtube_search_matching(self):
- # self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
- # self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
-
- def test_youtube_extract(self):
- assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
- assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
- assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
- assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
- assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
- assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
- assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
+ def test_youtube_search_matching(self):
+ self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
+ self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
def test_facebook_matching(self):
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
diff --git a/test/test_cache.py b/test/test_cache.py
index a16160142..931074aa1 100644
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -3,17 +3,18 @@
from __future__ import unicode_literals
-import shutil
-
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import shutil
from test.helper import FakeYDL
from youtube_dl.cache import Cache
+from youtube_dl.utils import version_tuple
+from youtube_dl.version import __version__
def _is_empty(d):
@@ -54,6 +55,17 @@ class TestCache(unittest.TestCase):
self.assertFalse(os.path.exists(self.test_dir))
self.assertEqual(c.load('test_cache', 'k.'), None)
+ def test_cache_validation(self):
+ ydl = FakeYDL({
+ 'cachedir': self.test_dir,
+ })
+ c = Cache(ydl)
+ obj = {'x': 1, 'y': ['ä', '\\a', True]}
+ c.store('test_cache', 'k.', obj)
+ self.assertEqual(c.load('test_cache', 'k.', min_ver='1970.01.01'), obj)
+ new_version = '.'.join(('%d' % ((v + 1) if i == 0 else v, )) for i, v in enumerate(version_tuple(__version__)))
+ self.assertIs(c.load('test_cache', 'k.', min_ver=new_version), None)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_compat.py b/test/test_compat.py
index 86ff389fd..b83c8cb41 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.compat import (
+ compat_casefold,
compat_getenv,
compat_setenv,
compat_etree_Element,
@@ -22,6 +23,7 @@ from youtube_dl.compat import (
compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus,
compat_urllib_parse_urlencode,
+ compat_urllib_request,
)
@@ -47,10 +49,11 @@ class TestCompat(unittest.TestCase):
def test_all_present(self):
import youtube_dl.compat
- all_names = youtube_dl.compat.__all__
- present_names = set(filter(
+ all_names = sorted(
+ youtube_dl.compat.__all__ + youtube_dl.compat.legacy)
+ present_names = set(map(compat_str, filter(
lambda c: '_' in c and not c.startswith('_'),
- dir(youtube_dl.compat))) - set(['unicode_literals'])
+ dir(youtube_dl.compat)))) - set(['unicode_literals'])
self.assertEqual(all_names, sorted(present_names))
def test_compat_urllib_parse_unquote(self):
@@ -118,9 +121,34 @@ class TestCompat(unittest.TestCase):
'''
compat_etree_fromstring(xml)
- def test_struct_unpack(self):
+ def test_compat_struct_unpack(self):
self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
+ def test_compat_casefold(self):
+ if hasattr(compat_str, 'casefold'):
+ # don't bother to test str.casefold() (again)
+ return
+ # thanks https://bugs.python.org/file24232/casefolding.patch
+ self.assertEqual(compat_casefold('hello'), 'hello')
+ self.assertEqual(compat_casefold('hELlo'), 'hello')
+ self.assertEqual(compat_casefold('ß'), 'ss')
+ self.assertEqual(compat_casefold('fi'), 'fi')
+ self.assertEqual(compat_casefold('\u03a3'), '\u03c3')
+ self.assertEqual(compat_casefold('A\u0345\u03a3'), 'a\u03b9\u03c3')
+
+ def test_compat_urllib_request_Request(self):
+ self.assertEqual(
+ compat_urllib_request.Request('http://127.0.0.1', method='PUT').get_method(),
+ 'PUT')
+
+ class PUTrequest(compat_urllib_request.Request):
+ def get_method(self):
+ return 'PUT'
+
+ self.assertEqual(
+ PUTrequest('http://127.0.0.1').get_method(),
+ 'PUT')
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_download.py b/test/test_download.py
index ebe820dfc..f7d6a23bc 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -9,7 +9,6 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import (
- assertGreaterEqual,
expect_warnings,
get_params,
gettestcases,
@@ -20,26 +19,35 @@ from test.helper import (
import hashlib
-import io
import json
import socket
import youtube_dl.YoutubeDL
from youtube_dl.compat import (
compat_http_client,
- compat_urllib_error,
compat_HTTPError,
+ compat_open as open,
+ compat_urllib_error,
)
from youtube_dl.utils import (
DownloadError,
ExtractorError,
+ error_to_compat_str,
format_bytes,
+ IDENTITY,
+ preferredencoding,
UnavailableVideoError,
)
from youtube_dl.extractor import get_info_extractor
RETRIES = 3
+# Some unittest APIs require actual str
+if not isinstance('TEST', str):
+ _encode_str = lambda s: s.encode(preferredencoding())
+else:
+ _encode_str = IDENTITY
+
class YoutubeDL(youtube_dl.YoutubeDL):
def __init__(self, *args, **kwargs):
@@ -100,27 +108,31 @@ def generator(test_case, tname):
def print_skipping(reason):
print('Skipping %s: %s' % (test_case['name'], reason))
+ self.skipTest(_encode_str(reason))
+
if not ie.working():
print_skipping('IE marked as not _WORKING')
- return
for tc in test_cases:
info_dict = tc.get('info_dict', {})
if not (info_dict.get('id') and info_dict.get('ext')):
- raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
+ raise Exception('Test definition (%s) requires both \'id\' and \'ext\' keys present to define the output file' % (tname, ))
if 'skip' in test_case:
print_skipping(test_case['skip'])
- return
+
for other_ie in other_ies:
if not other_ie.working():
print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
- return
params = get_params(test_case.get('params', {}))
params['outtmpl'] = tname + '_' + params['outtmpl']
if is_playlist and 'playlist' not in test_case:
params.setdefault('extract_flat', 'in_playlist')
+ params.setdefault('playlistend',
+ test_case['playlist_maxcount'] + 1
+ if test_case.get('playlist_maxcount')
+ else test_case.get('playlist_mincount'))
params.setdefault('skip_download', True)
ydl = YoutubeDL(params, auto_init=False)
@@ -146,6 +158,7 @@ def generator(test_case, tname):
try_rm(tc_filename)
try_rm(tc_filename + '.part')
try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
+
try_rm_tcs_files()
try:
try_num = 1
@@ -160,7 +173,9 @@ def generator(test_case, tname):
except (DownloadError, ExtractorError) as err:
# Check if the exception is not a network related one
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
- raise
+ msg = getattr(err, 'msg', error_to_compat_str(err))
+ err.msg = '%s (%s)' % (msg, tname, )
+ raise err
if try_num == RETRIES:
report_warning('%s failed due to network errors, skipping...' % tname)
@@ -178,13 +193,19 @@ def generator(test_case, tname):
expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
if 'playlist_mincount' in test_case:
- assertGreaterEqual(
- self,
+ self.assertGreaterEqual(
len(res_dict['entries']),
test_case['playlist_mincount'],
'Expected at least %d in playlist %s, but got only %d' % (
test_case['playlist_mincount'], test_case['url'],
len(res_dict['entries'])))
+ if 'playlist_maxcount' in test_case:
+ self.assertLessEqual(
+ len(res_dict['entries']),
+ test_case['playlist_maxcount'],
+ 'Expected at most %d in playlist %s, but got %d' % (
+ test_case['playlist_maxcount'], test_case['url'],
+ len(res_dict['entries'])))
if 'playlist_count' in test_case:
self.assertEqual(
len(res_dict['entries']),
@@ -209,7 +230,15 @@ def generator(test_case, tname):
# First, check test cases' data against extracted data alone
expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
# Now, check downloaded file consistency
+ # support test-case with volatile ID, signalled by regexp value
+ if tc.get('info_dict', {}).get('id', '').startswith('re:'):
+ test_id = tc['info_dict']['id']
+ tc['info_dict']['id'] = tc_res_dict['id']
+ else:
+ test_id = None
tc_filename = get_tc_filename(tc)
+ if test_id:
+ tc['info_dict']['id'] = test_id
if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
self.assertTrue(tc_filename in finished_hook_called)
@@ -218,8 +247,8 @@ def generator(test_case, tname):
if params.get('test'):
expected_minsize = max(expected_minsize, 10000)
got_fsize = os.path.getsize(tc_filename)
- assertGreaterEqual(
- self, got_fsize, expected_minsize,
+ self.assertGreaterEqual(
+ got_fsize, expected_minsize,
'Expected %s to be at least %s, but it\'s only %s ' %
(tc_filename, format_bytes(expected_minsize),
format_bytes(got_fsize)))
@@ -232,7 +261,7 @@ def generator(test_case, tname):
self.assertTrue(
os.path.exists(info_json_fn),
'Missing info file %s' % info_json_fn)
- with io.open(info_json_fn, encoding='utf-8') as infof:
+ with open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof)
expect_info_dict(self, info_dict, tc.get('info_dict', {}))
finally:
diff --git a/test/test_downloader_external.py b/test/test_downloader_external.py
new file mode 100644
index 000000000..4491bd9de
--- /dev/null
+++ b/test/test_downloader_external.py
@@ -0,0 +1,272 @@
+#!/usr/bin/env python
+# coding: utf-8
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import re
+import sys
+import subprocess
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import (
+ FakeLogger,
+ FakeYDL,
+ http_server_port,
+ try_rm,
+)
+from youtube_dl import YoutubeDL
+from youtube_dl.compat import (
+ compat_contextlib_suppress,
+ compat_http_cookiejar_Cookie,
+ compat_http_server,
+ compat_kwargs,
+)
+from youtube_dl.utils import (
+ encodeFilename,
+ join_nonempty,
+)
+from youtube_dl.downloader.external import (
+ Aria2cFD,
+ Aria2pFD,
+ AxelFD,
+ CurlFD,
+ FFmpegFD,
+ HttpieFD,
+ WgetFD,
+)
+from youtube_dl.postprocessor import (
+ FFmpegPostProcessor,
+)
+import threading
+
+TEST_SIZE = 10 * 1024
+
+TEST_COOKIE = {
+ 'version': 0,
+ 'name': 'test',
+ 'value': 'ytdlp',
+ 'port': None,
+ 'port_specified': False,
+ 'domain': '.example.com',
+ 'domain_specified': True,
+ 'domain_initial_dot': False,
+ 'path': '/',
+ 'path_specified': True,
+ 'secure': False,
+ 'expires': None,
+ 'discard': False,
+ 'comment': None,
+ 'comment_url': None,
+ 'rest': {},
+}
+
+TEST_COOKIE_VALUE = join_nonempty('name', 'value', delim='=', from_dict=TEST_COOKIE)
+
+TEST_INFO = {'url': 'http://www.example.com/'}
+
+
+def cookiejar_Cookie(**cookie_args):
+ return compat_http_cookiejar_Cookie(**compat_kwargs(cookie_args))
+
+
+def ifExternalFDAvailable(externalFD):
+ return unittest.skipUnless(externalFD.available(),
+ externalFD.get_basename() + ' not found')
+
+
+class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+ def log_message(self, format, *args):
+ pass
+
+ def send_content_range(self, total=None):
+ range_header = self.headers.get('Range')
+ start = end = None
+ if range_header:
+ mobj = re.match(r'bytes=(\d+)-(\d+)', range_header)
+ if mobj:
+ start, end = (int(mobj.group(i)) for i in (1, 2))
+ valid_range = start is not None and end is not None
+ if valid_range:
+ content_range = 'bytes %d-%d' % (start, end)
+ if total:
+ content_range += '/%d' % total
+ self.send_header('Content-Range', content_range)
+ return (end - start + 1) if valid_range else total
+
+ def serve(self, range=True, content_length=True):
+ self.send_response(200)
+ self.send_header('Content-Type', 'video/mp4')
+ size = TEST_SIZE
+ if range:
+ size = self.send_content_range(TEST_SIZE)
+ if content_length:
+ self.send_header('Content-Length', size)
+ self.end_headers()
+ self.wfile.write(b'#' * size)
+
+ def do_GET(self):
+ if self.path == '/regular':
+ self.serve()
+ elif self.path == '/no-content-length':
+ self.serve(content_length=False)
+ elif self.path == '/no-range':
+ self.serve(range=False)
+ elif self.path == '/no-range-no-content-length':
+ self.serve(range=False, content_length=False)
+ else:
+ assert False, 'unrecognised server path'
+
+
+@ifExternalFDAvailable(Aria2pFD)
+class TestAria2pFD(unittest.TestCase):
+ def setUp(self):
+ self.httpd = compat_http_server.HTTPServer(
+ ('127.0.0.1', 0), HTTPTestRequestHandler)
+ self.port = http_server_port(self.httpd)
+ self.server_thread = threading.Thread(target=self.httpd.serve_forever)
+ self.server_thread.daemon = True
+ self.server_thread.start()
+
+ def download(self, params, ep):
+ with subprocess.Popen(
+ ['aria2c', '--enable-rpc'],
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.DEVNULL
+ ) as process:
+ if not process.poll():
+ filename = 'testfile.mp4'
+ params['logger'] = FakeLogger()
+ params['outtmpl'] = filename
+ ydl = YoutubeDL(params)
+ try_rm(encodeFilename(filename))
+ self.assertEqual(ydl.download(['http://127.0.0.1:%d/%s' % (self.port, ep)]), 0)
+ self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
+ try_rm(encodeFilename(filename))
+ process.kill()
+
+ def download_all(self, params):
+ for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
+ self.download(params, ep)
+
+ def test_regular(self):
+ self.download_all({'external_downloader': 'aria2p'})
+
+ def test_chunked(self):
+ self.download_all({
+ 'external_downloader': 'aria2p',
+ 'http_chunk_size': 1000,
+ })
+
+
+@ifExternalFDAvailable(HttpieFD)
+class TestHttpieFD(unittest.TestCase):
+ def test_make_cmd(self):
+ with FakeYDL() as ydl:
+ downloader = HttpieFD(ydl, {})
+ self.assertEqual(
+ downloader._make_cmd('test', TEST_INFO),
+ ['http', '--download', '--output', 'test', 'http://www.example.com/'])
+
+ # Test cookie header is added
+ ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+ self.assertEqual(
+ downloader._make_cmd('test', TEST_INFO),
+ ['http', '--download', '--output', 'test',
+ 'http://www.example.com/', 'Cookie:' + TEST_COOKIE_VALUE])
+
+
+@ifExternalFDAvailable(AxelFD)
+class TestAxelFD(unittest.TestCase):
+ def test_make_cmd(self):
+ with FakeYDL() as ydl:
+ downloader = AxelFD(ydl, {})
+ self.assertEqual(
+ downloader._make_cmd('test', TEST_INFO),
+ ['axel', '-o', 'test', '--', 'http://www.example.com/'])
+
+ # Test cookie header is added
+ ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+ self.assertEqual(
+ downloader._make_cmd('test', TEST_INFO),
+ ['axel', '-o', 'test', '-H', 'Cookie: ' + TEST_COOKIE_VALUE,
+ '--max-redirect=0', '--', 'http://www.example.com/'])
+
+
+@ifExternalFDAvailable(WgetFD)
+class TestWgetFD(unittest.TestCase):
+ def test_make_cmd(self):
+ with FakeYDL() as ydl:
+ downloader = WgetFD(ydl, {})
+ self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
+ # Test cookiejar tempfile arg is added
+ ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+ self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
+
+
+@ifExternalFDAvailable(CurlFD)
+class TestCurlFD(unittest.TestCase):
+ def test_make_cmd(self):
+ with FakeYDL() as ydl:
+ downloader = CurlFD(ydl, {})
+ self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO))
+ # Test cookie header is added
+ ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+ self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO))
+ self.assertIn(TEST_COOKIE_VALUE, downloader._make_cmd('test', TEST_INFO))
+
+
+@ifExternalFDAvailable(Aria2cFD)
+class TestAria2cFD(unittest.TestCase):
+ def test_make_cmd(self):
+ with FakeYDL() as ydl:
+ downloader = Aria2cFD(ydl, {})
+ downloader._make_cmd('test', TEST_INFO)
+ self.assertFalse(hasattr(downloader, '_cookies_tempfile'))
+
+ # Test cookiejar tempfile arg is added
+ ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+ cmd = downloader._make_cmd('test', TEST_INFO)
+ self.assertIn('--load-cookies=%s' % downloader._cookies_tempfile, cmd)
+
+
+# Handle delegated availability
+def ifFFmpegFDAvailable(externalFD):
+ # raise SkipTest, or set False!
+ avail = ifExternalFDAvailable(externalFD) and False
+ with compat_contextlib_suppress(Exception):
+ avail = FFmpegPostProcessor(downloader=None).available
+ return unittest.skipUnless(
+ avail, externalFD.get_basename() + ' not found')
+
+
+@ifFFmpegFDAvailable(FFmpegFD)
+class TestFFmpegFD(unittest.TestCase):
+ _args = []
+
+ def _test_cmd(self, args):
+ self._args = args
+
+ def test_make_cmd(self):
+ with FakeYDL() as ydl:
+ downloader = FFmpegFD(ydl, {})
+ downloader._debug_cmd = self._test_cmd
+ info_dict = TEST_INFO.copy()
+ info_dict['ext'] = 'mp4'
+
+ downloader._call_downloader('test', info_dict)
+ self.assertEqual(self._args, [
+ 'ffmpeg', '-y', '-i', 'http://www.example.com/',
+ '-c', 'copy', '-f', 'mp4', 'file:test'])
+
+ # Test cookies arg is added
+ ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
+ downloader._call_downloader('test', info_dict)
+ self.assertEqual(self._args, [
+ 'ffmpeg', '-y', '-cookies', TEST_COOKIE_VALUE + '; path=/; domain=.example.com;\r\n',
+ '-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test'])
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
index 750472281..6af86ae48 100644
--- a/test/test_downloader_http.py
+++ b/test/test_downloader_http.py
@@ -9,7 +9,11 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import http_server_port, try_rm
+from test.helper import (
+ FakeLogger,
+ http_server_port,
+ try_rm,
+)
from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server
from youtube_dl.downloader.http import HttpFD
@@ -66,17 +70,6 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
assert False
-class FakeLogger(object):
- def debug(self, msg):
- pass
-
- def warning(self, msg):
- pass
-
- def error(self, msg):
- pass
-
-
class TestHttpFD(unittest.TestCase):
def setUp(self):
self.httpd = compat_http_server.HTTPServer(
@@ -95,7 +88,7 @@ class TestHttpFD(unittest.TestCase):
self.assertTrue(downloader.real_download(filename, {
'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
}))
- self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
+ self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep)
try_rm(encodeFilename(filename))
def download_all(self, params):
diff --git a/test/test_execution.py b/test/test_execution.py
index 11661bb68..9daaafa6c 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -8,37 +8,55 @@ import unittest
import sys
import os
import subprocess
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from youtube_dl.utils import encodeArgument
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, rootDir)
-try:
- _DEV_NULL = subprocess.DEVNULL
-except AttributeError:
- _DEV_NULL = open(os.devnull, 'wb')
+from youtube_dl.compat import compat_register_utf8, compat_subprocess_get_DEVNULL
+from youtube_dl.utils import encodeArgument
+
+compat_register_utf8()
+
+
+_DEV_NULL = compat_subprocess_get_DEVNULL()
class TestExecution(unittest.TestCase):
+ def setUp(self):
+ self.module = 'youtube_dl'
+ if sys.version_info < (2, 7):
+ self.module += '.__main__'
+
def test_import(self):
subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir)
def test_module_exec(self):
- if sys.version_info >= (2, 7): # Python 2.6 doesn't support package execution
- subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+ subprocess.check_call([sys.executable, '-m', self.module, '--version'], cwd=rootDir, stdout=_DEV_NULL)
def test_main_exec(self):
- subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+ subprocess.check_call([sys.executable, os.path.normpath('youtube_dl/__main__.py'), '--version'], cwd=rootDir, stdout=_DEV_NULL)
def test_cmdline_umlauts(self):
+ os.environ['PYTHONIOENCODING'] = 'utf-8'
p = subprocess.Popen(
- [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'],
+ [sys.executable, '-m', self.module, encodeArgument('ä'), '--version'],
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
_, stderr = p.communicate()
self.assertFalse(stderr)
+ def test_lazy_extractors(self):
+ lazy_extractors = os.path.normpath('youtube_dl/extractor/lazy_extractors.py')
+ try:
+ subprocess.check_call([sys.executable, os.path.normpath('devscripts/make_lazy_extractors.py'), lazy_extractors], cwd=rootDir, stdout=_DEV_NULL)
+ subprocess.check_call([sys.executable, os.path.normpath('test/test_all_urls.py')], cwd=rootDir, stdout=_DEV_NULL)
+ finally:
+ for x in ('', 'c') if sys.version_info[0] < 3 else ('',):
+ try:
+ os.remove(lazy_extractors + x)
+ except OSError:
+ pass
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_http.py b/test/test_http.py
index 3ee0a5dda..485c4c6fc 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -8,30 +8,163 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import http_server_port
-from youtube_dl import YoutubeDL
-from youtube_dl.compat import compat_http_server, compat_urllib_request
+import contextlib
+import gzip
+import io
import ssl
+import tempfile
import threading
+import zlib
+
+# avoid deprecated alias assertRaisesRegexp
+if hasattr(unittest.TestCase, 'assertRaisesRegex'):
+ unittest.TestCase.assertRaisesRegexp = unittest.TestCase.assertRaisesRegex
+
+try:
+ import brotli
+except ImportError:
+ brotli = None
+try:
+ from urllib.request import pathname2url
+except ImportError:
+ from urllib import pathname2url
+
+from youtube_dl.compat import (
+ compat_http_cookiejar_Cookie,
+ compat_http_server,
+ compat_str as str,
+ compat_urllib_error,
+ compat_urllib_HTTPError,
+ compat_urllib_parse,
+ compat_urllib_request,
+)
+
+from youtube_dl.utils import (
+ sanitized_Request,
+ update_Request,
+ urlencode_postdata,
+)
+
+from test.helper import (
+ expectedFailureIf,
+ FakeYDL,
+ FakeLogger,
+ http_server_port,
+)
+from youtube_dl import YoutubeDL
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+ protocol_version = 'HTTP/1.1'
+
+ # work-around old/new -style class inheritance
+ def super(self, meth_name, *args, **kwargs):
+ from types import MethodType
+ try:
+ super()
+ fn = lambda s, m, *a, **k: getattr(super(), m)(*a, **k)
+ except TypeError:
+ fn = lambda s, m, *a, **k: getattr(compat_http_server.BaseHTTPRequestHandler, m)(s, *a, **k)
+ self.super = MethodType(fn, self)
+ return self.super(meth_name, *args, **kwargs)
+
def log_message(self, format, *args):
pass
+ def _headers(self):
+ payload = str(self.headers).encode('utf-8')
+ self.send_response(200)
+ self.send_header('Content-Type', 'application/json')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+
+ def _redirect(self):
+ self.send_response(int(self.path[len('/redirect_'):]))
+ self.send_header('Location', '/method')
+ self.send_header('Content-Length', '0')
+ self.end_headers()
+
+ def _method(self, method, payload=None):
+ self.send_response(200)
+ self.send_header('Content-Length', str(len(payload or '')))
+ self.send_header('Method', method)
+ self.end_headers()
+ if payload:
+ self.wfile.write(payload)
+
+ def _status(self, status):
+ payload = '{0} NOT FOUND'.format(status).encode('utf-8')
+ self.send_response(int(status))
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+
+ def _read_data(self):
+ if 'Content-Length' in self.headers:
+ return self.rfile.read(int(self.headers['Content-Length']))
+
+ def _test_url(self, path, host='127.0.0.1', scheme='http', port=None):
+ return '{0}://{1}:{2}/{3}'.format(
+ scheme, host,
+ port if port is not None
+ else http_server_port(self.server), path)
+
+ def do_POST(self):
+ data = self._read_data()
+ if self.path.startswith('/redirect_'):
+ self._redirect()
+ elif self.path.startswith('/method'):
+ self._method('POST', data)
+ elif self.path.startswith('/headers'):
+ self._headers()
+ else:
+ self._status(404)
+
+ def do_HEAD(self):
+ if self.path.startswith('/redirect_'):
+ self._redirect()
+ elif self.path.startswith('/method'):
+ self._method('HEAD')
+ else:
+ self._status(404)
+
+ def do_PUT(self):
+ data = self._read_data()
+ if self.path.startswith('/redirect_'):
+ self._redirect()
+ elif self.path.startswith('/method'):
+ self._method('PUT', data)
+ else:
+ self._status(404)
+
def do_GET(self):
+
+ def respond(payload=b'',
+ payload_type='text/html; charset=utf-8',
+ payload_encoding=None,
+ resp_code=200):
+ self.send_response(resp_code)
+ self.send_header('Content-Type', payload_type)
+ if payload_encoding:
+ self.send_header('Content-Encoding', payload_encoding)
+ self.send_header('Content-Length', str(len(payload))) # required for persistent connections
+ self.end_headers()
+ self.wfile.write(payload)
+
+ def gzip_compress(p):
+ buf = io.BytesIO()
+ with contextlib.closing(gzip.GzipFile(fileobj=buf, mode='wb')) as f:
+ f.write(p)
+ return buf.getvalue()
+
if self.path == '/video.html':
- self.send_response(200)
- self.send_header('Content-Type', 'text/html; charset=utf-8')
- self.end_headers()
- self.wfile.write(b'')
+ respond()
elif self.path == '/vid.mp4':
- self.send_response(200)
- self.send_header('Content-Type', 'video/mp4')
- self.end_headers()
- self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
+ respond(b'\x00\x00\x00\x00\x20\x66\x74[video]', 'video/mp4')
elif self.path == '/302':
if sys.version_info[0] == 3:
# XXX: Python 3 http server does not allow non-ASCII header values
@@ -39,71 +172,336 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
self.end_headers()
return
- new_url = 'http://127.0.0.1:%d/中文.html' % http_server_port(self.server)
+ new_url = self._test_url('中文.html')
self.send_response(302)
self.send_header(b'Location', new_url.encode('utf-8'))
self.end_headers()
elif self.path == '/%E4%B8%AD%E6%96%87.html':
- self.send_response(200)
- self.send_header('Content-Type', 'text/html; charset=utf-8')
+ respond()
+ elif self.path == '/%c7%9f':
+ respond()
+ elif self.path == '/redirect_dotsegments':
+ self.send_response(301)
+ # redirect to /headers but with dot segments before
+ self.send_header('Location', '/a/b/./../../headers')
+ self.send_header('Content-Length', '0')
self.end_headers()
- self.wfile.write(b'')
+ elif self.path.startswith('/redirect_'):
+ self._redirect()
+ elif self.path.startswith('/method'):
+ self._method('GET')
+ elif self.path.startswith('/headers'):
+ self._headers()
+ elif self.path.startswith('/308-to-headers'):
+ self.send_response(308)
+ self.send_header('Location', '/headers')
+ self.send_header('Content-Length', '0')
+ self.end_headers()
+ elif self.path == '/trailing_garbage':
+ payload = b''
+ compressed = gzip_compress(payload) + b'trailing garbage'
+ respond(compressed, payload_encoding='gzip')
+ elif self.path == '/302-non-ascii-redirect':
+ new_url = self._test_url('中文.html')
+ # actually respond with permanent redirect
+ self.send_response(301)
+ self.send_header('Location', new_url)
+ self.send_header('Content-Length', '0')
+ self.end_headers()
+ elif self.path == '/content-encoding':
+ encodings = self.headers.get('ytdl-encoding', '')
+ payload = b''
+ for encoding in filter(None, (e.strip() for e in encodings.split(','))):
+ if encoding == 'br' and brotli:
+ payload = brotli.compress(payload)
+ elif encoding == 'gzip':
+ payload = gzip_compress(payload)
+ elif encoding == 'deflate':
+ payload = zlib.compress(payload)
+ elif encoding == 'unsupported':
+ payload = b'raw'
+ break
+ else:
+ self._status(415)
+ return
+ respond(payload, payload_encoding=encodings)
+
else:
- assert False
+ self._status(404)
+ def send_header(self, keyword, value):
+ """
+ Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
+ This is against what is defined in RFC 3986: but we need to test that we support this
+ since some sites incorrectly do this.
+ """
+ if keyword.lower() == 'connection':
+ return self.super('send_header', keyword, value)
-class FakeLogger(object):
- def debug(self, msg):
- pass
+ if not hasattr(self, '_headers_buffer'):
+ self._headers_buffer = []
- def warning(self, msg):
- pass
+ self._headers_buffer.append('{0}: {1}\r\n'.format(keyword, value).encode('utf-8'))
- def error(self, msg):
- pass
+ def end_headers(self):
+ if hasattr(self, '_headers_buffer'):
+ self.wfile.write(b''.join(self._headers_buffer))
+ self._headers_buffer = []
+ self.super('end_headers')
class TestHTTP(unittest.TestCase):
+ # when does it make sense to check the SSL certificate?
+ _check_cert = (
+ sys.version_info >= (3, 2)
+ or (sys.version_info[0] == 2 and sys.version_info[1:] >= (7, 19)))
+
def setUp(self):
- self.httpd = compat_http_server.HTTPServer(
+ # HTTP server
+ self.http_httpd = compat_http_server.HTTPServer(
('127.0.0.1', 0), HTTPTestRequestHandler)
- self.port = http_server_port(self.httpd)
- self.server_thread = threading.Thread(target=self.httpd.serve_forever)
- self.server_thread.daemon = True
- self.server_thread.start()
+ self.http_port = http_server_port(self.http_httpd)
+
+ self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever)
+ self.http_server_thread.daemon = True
+ self.http_server_thread.start()
+
+ try:
+ from http.server import ThreadingHTTPServer
+ except ImportError:
+ try:
+ from socketserver import ThreadingMixIn
+ except ImportError:
+ from SocketServer import ThreadingMixIn
+
+ class ThreadingHTTPServer(ThreadingMixIn, compat_http_server.HTTPServer):
+ pass
+
+ # HTTPS server
+ certfn = os.path.join(TEST_DIR, 'testcert.pem')
+ self.https_httpd = ThreadingHTTPServer(
+ ('127.0.0.1', 0), HTTPTestRequestHandler)
+ try:
+ sslctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+ sslctx.verify_mode = ssl.CERT_NONE
+ sslctx.check_hostname = False
+ sslctx.load_cert_chain(certfn, None)
+ self.https_httpd.socket = sslctx.wrap_socket(
+ self.https_httpd.socket, server_side=True)
+ except AttributeError:
+ self.https_httpd.socket = ssl.wrap_socket(
+ self.https_httpd.socket, certfile=certfn, server_side=True)
+
+ self.https_port = http_server_port(self.https_httpd)
+ self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever)
+ self.https_server_thread.daemon = True
+ self.https_server_thread.start()
+
+ def tearDown(self):
+
+ def closer(svr):
+ def _closer():
+ svr.shutdown()
+ svr.server_close()
+ return _closer
+
+ shutdown_thread = threading.Thread(target=closer(self.http_httpd))
+ shutdown_thread.start()
+ self.http_server_thread.join(2.0)
+
+ shutdown_thread = threading.Thread(target=closer(self.https_httpd))
+ shutdown_thread.start()
+ self.https_server_thread.join(2.0)
+
+ def _test_url(self, path, host='127.0.0.1', scheme='http', port=None):
+ return '{0}://{1}:{2}/{3}'.format(
+ scheme, host,
+ port if port is not None
+ else self.https_port if scheme == 'https'
+ else self.http_port, path)
+
+ @unittest.skipUnless(_check_cert, 'No support for certificate check in SSL')
+ def test_nocheckcertificate(self):
+ with FakeYDL({'logger': FakeLogger()}) as ydl:
+ with self.assertRaises(compat_urllib_error.URLError):
+ ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https')))
+
+ with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl:
+ r = ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https')))
+ self.assertEqual(r.getcode(), 200)
+ r.close()
+
+ def test_percent_encode(self):
+ with FakeYDL() as ydl:
+ # Unicode characters should be encoded with uppercase percent-encoding
+ res = ydl.urlopen(sanitized_Request(self._test_url('中文.html')))
+ self.assertEqual(res.getcode(), 200)
+ res.close()
+ # don't normalize existing percent encodings
+ res = ydl.urlopen(sanitized_Request(self._test_url('%c7%9f')))
+ self.assertEqual(res.getcode(), 200)
+ res.close()
def test_unicode_path_redirection(self):
- # XXX: Python 3 http server does not allow non-ASCII header values
- if sys.version_info[0] == 3:
- return
+ with FakeYDL() as ydl:
+ r = ydl.urlopen(sanitized_Request(self._test_url('302-non-ascii-redirect')))
+ self.assertEqual(r.url, self._test_url('%E4%B8%AD%E6%96%87.html'))
+ r.close()
- ydl = YoutubeDL({'logger': FakeLogger()})
- r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port)
- self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port)
+ def test_redirect(self):
+ with FakeYDL() as ydl:
+ def do_req(redirect_status, method, check_no_content=False):
+ data = b'testdata' if method in ('POST', 'PUT') else None
+ res = ydl.urlopen(sanitized_Request(
+ self._test_url('redirect_{0}'.format(redirect_status)),
+ method=method, data=data))
+ if check_no_content:
+ self.assertNotIn('Content-Type', res.headers)
+ return res.read().decode('utf-8'), res.headers.get('method', '')
+ # A 303 must either use GET or HEAD for subsequent request
+ self.assertEqual(do_req(303, 'POST'), ('', 'GET'))
+ self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD'))
+ self.assertEqual(do_req(303, 'PUT'), ('', 'GET'))
-class TestHTTPS(unittest.TestCase):
- def setUp(self):
- certfn = os.path.join(TEST_DIR, 'testcert.pem')
- self.httpd = compat_http_server.HTTPServer(
- ('127.0.0.1', 0), HTTPTestRequestHandler)
- self.httpd.socket = ssl.wrap_socket(
- self.httpd.socket, certfile=certfn, server_side=True)
- self.port = http_server_port(self.httpd)
- self.server_thread = threading.Thread(target=self.httpd.serve_forever)
- self.server_thread.daemon = True
- self.server_thread.start()
+ # 301 and 302 turn POST only into a GET, with no Content-Type
+ self.assertEqual(do_req(301, 'POST', True), ('', 'GET'))
+ self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD'))
+ self.assertEqual(do_req(302, 'POST', True), ('', 'GET'))
+ self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD'))
- def test_nocheckcertificate(self):
- if sys.version_info >= (2, 7, 9): # No certificate checking anyways
- ydl = YoutubeDL({'logger': FakeLogger()})
- self.assertRaises(
- Exception,
- ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port)
+ self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT'))
+ self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT'))
- ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
- r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
- self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
+ # 307 and 308 should not change method
+ for m in ('POST', 'PUT'):
+ self.assertEqual(do_req(307, m), ('testdata', m))
+ self.assertEqual(do_req(308, m), ('testdata', m))
+
+ self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD'))
+ self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD'))
+
+ # These should not redirect and instead raise an HTTPError
+ for code in (300, 304, 305, 306):
+ with self.assertRaises(compat_urllib_HTTPError):
+ do_req(code, 'GET')
+
+ # Jython 2.7.1 times out for some reason
+ @expectedFailureIf(sys.platform.startswith('java') and sys.version_info < (2, 7, 2))
+ def test_content_type(self):
+ # https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28
+ with FakeYDL({'nocheckcertificate': True}) as ydl:
+ # method should be auto-detected as POST
+ r = sanitized_Request(self._test_url('headers', scheme='https'), data=urlencode_postdata({'test': 'test'}))
+
+ headers = ydl.urlopen(r).read().decode('utf-8')
+ self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
+
+ # test http
+ r = sanitized_Request(self._test_url('headers'), data=urlencode_postdata({'test': 'test'}))
+ headers = ydl.urlopen(r).read().decode('utf-8')
+ self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
+
+ def test_update_req(self):
+ req = sanitized_Request('http://example.com')
+ assert req.data is None
+ assert req.get_method() == 'GET'
+ assert not req.has_header('Content-Type')
+ # Test that zero-byte payloads will be sent
+ req = update_Request(req, data=b'')
+ assert req.data == b''
+ assert req.get_method() == 'POST'
+ # yt-dl expects data to be encoded and Content-Type to be added by sender
+ # assert req.get_header('Content-Type') == 'application/x-www-form-urlencoded'
+
+ def test_cookiejar(self):
+ with FakeYDL() as ydl:
+ ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
+ 0, 'test', 'ytdl', None, False, '127.0.0.1', True,
+ False, '/headers', True, False, None, False, None, None, {}))
+ data = ydl.urlopen(sanitized_Request(
+ self._test_url('headers'))).read().decode('utf-8')
+ self.assertIn('Cookie: test=ytdl', data)
+
+ def test_passed_cookie_header(self):
+ # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
+ with FakeYDL() as ydl:
+ # Specified Cookie header should be used
+ res = ydl.urlopen(sanitized_Request(
+ self._test_url('headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
+ self.assertIn('Cookie: test=test', res)
+
+ # Specified Cookie header should be removed on any redirect
+ res = ydl.urlopen(sanitized_Request(
+ self._test_url('308-to-headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
+ self.assertNotIn('Cookie: test=test', res)
+
+ # Specified Cookie header should override global cookiejar for that request
+ ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
+ 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
+ False, '/headers', True, False, None, False, None, None, {}))
+ data = ydl.urlopen(sanitized_Request(
+ self._test_url('headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
+ self.assertNotIn('Cookie: test=ytdlp', data)
+ self.assertIn('Cookie: test=test', data)
+
+ def test_no_compression_compat_header(self):
+ with FakeYDL() as ydl:
+ data = ydl.urlopen(
+ sanitized_Request(
+ self._test_url('headers'),
+ headers={'Youtubedl-no-compression': True})).read()
+ self.assertIn(b'Accept-Encoding: identity', data)
+ self.assertNotIn(b'youtubedl-no-compression', data.lower())
+
+ def test_gzip_trailing_garbage(self):
+ # https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5
+ # https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f
+ with FakeYDL() as ydl:
+ data = ydl.urlopen(sanitized_Request(self._test_url('trailing_garbage'))).read().decode('utf-8')
+ self.assertEqual(data, '')
+
+ def __test_compression(self, encoding):
+ with FakeYDL() as ydl:
+ res = ydl.urlopen(
+ sanitized_Request(
+ self._test_url('content-encoding'),
+ headers={'ytdl-encoding': encoding}))
+ # decoded encodings are removed: only check for valid decompressed data
+ self.assertEqual(res.read(), b'')
+
+ @unittest.skipUnless(brotli, 'brotli support is not installed')
+ def test_brotli(self):
+ self.__test_compression('br')
+
+ def test_deflate(self):
+ self.__test_compression('deflate')
+
+ def test_gzip(self):
+ self.__test_compression('gzip')
+
+ def test_multiple_encodings(self):
+ # https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4
+ for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
+ self.__test_compression(pair)
+
+ def test_unsupported_encoding(self):
+ # it should return the raw content
+ with FakeYDL() as ydl:
+ res = ydl.urlopen(
+ sanitized_Request(
+ self._test_url('content-encoding'),
+ headers={'ytdl-encoding': 'unsupported'}))
+ self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')
+ self.assertEqual(res.read(), b'raw')
+
+ def test_remove_dot_segments(self):
+ with FakeYDL() as ydl:
+ res = ydl.urlopen(sanitized_Request(self._test_url('a/b/./../../headers')))
+ self.assertEqual(compat_urllib_parse.urlparse(res.geturl()).path, '/headers')
+
+ res = ydl.urlopen(sanitized_Request(self._test_url('redirect_dotsegments')))
+ self.assertEqual(compat_urllib_parse.urlparse(res.geturl()).path, '/headers')
def _build_proxy_handler(name):
@@ -117,7 +515,7 @@ def _build_proxy_handler(name):
self.send_response(200)
self.send_header('Content-Type', 'text/plain; charset=utf-8')
self.end_headers()
- self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
+ self.wfile.write('{0}: {1}'.format(self.proxy_name, self.path).encode('utf-8'))
return HTTPTestRequestHandler
@@ -137,10 +535,30 @@ class TestProxy(unittest.TestCase):
self.geo_proxy_thread.daemon = True
self.geo_proxy_thread.start()
+ def tearDown(self):
+
+ def closer(svr):
+ def _closer():
+ svr.shutdown()
+ svr.server_close()
+ return _closer
+
+ shutdown_thread = threading.Thread(target=closer(self.proxy))
+ shutdown_thread.start()
+ self.proxy_thread.join(2.0)
+
+ shutdown_thread = threading.Thread(target=closer(self.geo_proxy))
+ shutdown_thread.start()
+ self.geo_proxy_thread.join(2.0)
+
+ def _test_proxy(self, host='127.0.0.1', port=None):
+ return '{0}:{1}'.format(
+ host, port if port is not None else self.port)
+
def test_proxy(self):
- geo_proxy = '127.0.0.1:{0}'.format(self.geo_port)
+ geo_proxy = self._test_proxy(port=self.geo_port)
ydl = YoutubeDL({
- 'proxy': '127.0.0.1:{0}'.format(self.port),
+ 'proxy': self._test_proxy(),
'geo_verification_proxy': geo_proxy,
})
url = 'http://foo.com/bar'
@@ -154,7 +572,7 @@ class TestProxy(unittest.TestCase):
def test_proxy_with_idn(self):
ydl = YoutubeDL({
- 'proxy': '127.0.0.1:{0}'.format(self.port),
+ 'proxy': self._test_proxy(),
})
url = 'http://中文.tw/'
response = ydl.urlopen(url).read().decode('utf-8')
@@ -162,5 +580,25 @@ class TestProxy(unittest.TestCase):
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
+class TestFileURL(unittest.TestCase):
+ # See https://github.com/ytdl-org/youtube-dl/issues/8227
+ def test_file_urls(self):
+ tf = tempfile.NamedTemporaryFile(delete=False)
+ tf.write(b'foobar')
+ tf.close()
+ url = compat_urllib_parse.urljoin('file://', pathname2url(tf.name))
+ with FakeYDL() as ydl:
+ self.assertRaisesRegexp(
+ compat_urllib_error.URLError, 'file:// scheme is explicitly disabled in youtube-dl for security reasons', ydl.urlopen, url)
+ # not yet implemented
+ """
+ with FakeYDL({'enable_file_urls': True}) as ydl:
+ res = ydl.urlopen(url)
+ self.assertEqual(res.read(), b'foobar')
+ res.close()
+ """
+ os.unlink(tf.name)
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index c24b8ca74..c7a4f2cbf 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -8,109 +8,450 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.jsinterp import JSInterpreter
+import math
+import re
+
+from youtube_dl.compat import compat_str
+from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
+
+NaN = object()
class TestJSInterpreter(unittest.TestCase):
+ def _test(self, jsi_or_code, expected, func='f', args=()):
+ if isinstance(jsi_or_code, compat_str):
+ jsi_or_code = JSInterpreter(jsi_or_code)
+ got = jsi_or_code.call_function(func, *args)
+ if expected is NaN:
+ self.assertTrue(math.isnan(got), '{0} is not NaN'.format(got))
+ else:
+ self.assertEqual(got, expected)
+
def test_basic(self):
- jsi = JSInterpreter('function x(){;}')
- self.assertEqual(jsi.call_function('x'), None)
+ jsi = JSInterpreter('function f(){;}')
+ self.assertEqual(repr(jsi.extract_function('f')), 'F')
+ self._test(jsi, None)
- jsi = JSInterpreter('function x3(){return 42;}')
- self.assertEqual(jsi.call_function('x3'), 42)
+ self._test('function f(){return 42;}', 42)
+ self._test('function f(){42}', None)
+ self._test('var f = function(){return 42;}', 42)
- jsi = JSInterpreter('var x5 = function(){return 42;}')
- self.assertEqual(jsi.call_function('x5'), 42)
+ def test_add(self):
+ self._test('function f(){return 42 + 7;}', 49)
+ self._test('function f(){return 42 + undefined;}', NaN)
+ self._test('function f(){return 42 + null;}', 42)
+
+ def test_sub(self):
+ self._test('function f(){return 42 - 7;}', 35)
+ self._test('function f(){return 42 - undefined;}', NaN)
+ self._test('function f(){return 42 - null;}', 42)
+
+ def test_mul(self):
+ self._test('function f(){return 42 * 7;}', 294)
+ self._test('function f(){return 42 * undefined;}', NaN)
+ self._test('function f(){return 42 * null;}', 0)
+
+ def test_div(self):
+ jsi = JSInterpreter('function f(a, b){return a / b;}')
+ self._test(jsi, NaN, args=(0, 0))
+ self._test(jsi, NaN, args=(JS_Undefined, 1))
+ self._test(jsi, float('inf'), args=(2, 0))
+ self._test(jsi, 0, args=(0, 3))
+
+ def test_mod(self):
+ self._test('function f(){return 42 % 7;}', 0)
+ self._test('function f(){return 42 % 0;}', NaN)
+ self._test('function f(){return 42 % undefined;}', NaN)
+
+ def test_exp(self):
+ self._test('function f(){return 42 ** 2;}', 1764)
+ self._test('function f(){return 42 ** undefined;}', NaN)
+ self._test('function f(){return 42 ** null;}', 1)
+ self._test('function f(){return undefined ** 42;}', NaN)
def test_calc(self):
- jsi = JSInterpreter('function x4(a){return 2*a+1;}')
- self.assertEqual(jsi.call_function('x4', 3), 7)
+ self._test('function f(a){return 2*a+1;}', 7, args=[3])
def test_empty_return(self):
- jsi = JSInterpreter('function f(){return; y()}')
- self.assertEqual(jsi.call_function('f'), None)
+ self._test('function f(){return; y()}', None)
def test_morespace(self):
- jsi = JSInterpreter('function x (a) { return 2 * a + 1 ; }')
- self.assertEqual(jsi.call_function('x', 3), 7)
-
- jsi = JSInterpreter('function f () { x = 2 ; return x; }')
- self.assertEqual(jsi.call_function('f'), 2)
+ self._test('function f (a) { return 2 * a + 1 ; }', 7, args=[3])
+ self._test('function f () { x = 2 ; return x; }', 2)
def test_strange_chars(self):
- jsi = JSInterpreter('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }')
- self.assertEqual(jsi.call_function('$_xY1', 20), 21)
+ self._test('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }',
+ 21, args=[20], func='$_xY1')
def test_operators(self):
- jsi = JSInterpreter('function f(){return 1 << 5;}')
- self.assertEqual(jsi.call_function('f'), 32)
-
- jsi = JSInterpreter('function f(){return 19 & 21;}')
- self.assertEqual(jsi.call_function('f'), 17)
-
- jsi = JSInterpreter('function f(){return 11 >> 2;}')
- self.assertEqual(jsi.call_function('f'), 2)
+ self._test('function f(){return 1 << 5;}', 32)
+ self._test('function f(){return 2 ** 5}', 32)
+ self._test('function f(){return 19 & 21;}', 17)
+ self._test('function f(){return 11 >> 2;}', 2)
+ self._test('function f(){return []? 2+3: 4;}', 5)
+ self._test('function f(){return 1 == 2}', False)
+ self._test('function f(){return 0 && 1 || 2;}', 2)
+ self._test('function f(){return 0 ?? 42;}', 0)
+ self._test('function f(){return "life, the universe and everything" < 42;}', False)
+ # https://github.com/ytdl-org/youtube-dl/issues/32815
+ self._test('function f(){return 0 - 7 * - 6;}', 42)
def test_array_access(self):
- jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;}')
- self.assertEqual(jsi.call_function('f'), [5, 2, 7])
+ self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])
def test_parens(self):
- jsi = JSInterpreter('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}')
- self.assertEqual(jsi.call_function('f'), 7)
+ self._test('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}', 7)
+ self._test('function f(){return (1 + 2) * 3;}', 9)
- jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
- self.assertEqual(jsi.call_function('f'), 9)
+ def test_quotes(self):
+ self._test(r'function f(){return "a\"\\("}', r'a"\(')
def test_assignments(self):
- jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
- self.assertEqual(jsi.call_function('f'), 31)
-
- jsi = JSInterpreter('function f(){var x = 20; x += 30 + 1; return x;}')
- self.assertEqual(jsi.call_function('f'), 51)
-
- jsi = JSInterpreter('function f(){var x = 20; x -= 30 + 1; return x;}')
- self.assertEqual(jsi.call_function('f'), -11)
+ self._test('function f(){var x = 20; x = 30 + 1; return x;}', 31)
+ self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
+ self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)
+ @unittest.skip('Not yet fully implemented')
def test_comments(self):
- 'Skipping: Not yet fully implemented'
- return
- jsi = JSInterpreter('''
- function x() {
- var x = /* 1 + */ 2;
- var y = /* 30
- * 40 */ 50;
- return x + y;
- }
- ''')
- self.assertEqual(jsi.call_function('x'), 52)
+ self._test('''
+ function f() {
+ var x = /* 1 + */ 2;
+ var y = /* 30
+ * 40 */ 50;
+ return x + y;
+ }
+ ''', 52)
- jsi = JSInterpreter('''
- function f() {
- var x = "/*";
- var y = 1 /* comment */ + 2;
- return y;
- }
- ''')
- self.assertEqual(jsi.call_function('f'), 3)
+ self._test('''
+ function f() {
+ var x = "/*";
+ var y = 1 /* comment */ + 2;
+ return y;
+ }
+ ''', 3)
def test_precedence(self):
- jsi = JSInterpreter('''
- function x() {
- var a = [10, 20, 30, 40, 50];
- var b = 6;
- a[0]=a[b%a.length];
- return a;
- }''')
- self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
+ self._test('''
+ function f() {
+ var a = [10, 20, 30, 40, 50];
+ var b = 6;
+ a[0]=a[b%a.length];
+ return a;
+ }
+ ''', [20, 20, 30, 40, 50])
+
+ def test_builtins(self):
+ self._test('function f() { return NaN }', NaN)
+
+ def test_Date(self):
+ self._test('function f() { return new Date("Wednesday 31 December 1969 18:01:26 MDT") - 0; }', 86000)
+
+ jsi = JSInterpreter('function f(dt) { return new Date(dt) - 0; }')
+ # date format m/d/y
+ self._test(jsi, 86000, args=['12/31/1969 18:01:26 MDT'])
+ # epoch 0
+ self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC'])
def test_call(self):
jsi = JSInterpreter('''
function x() { return 2; }
- function y(a) { return x() + a; }
+ function y(a) { return x() + (a?a:0); }
function z() { return y(3); }
''')
- self.assertEqual(jsi.call_function('z'), 5)
+ self._test(jsi, 5, func='z')
+ self._test(jsi, 2, func='y')
+
+ def test_if(self):
+ self._test('''
+ function f() {
+ let a = 9;
+ if (0==0) {a++}
+ return a
+ }
+ ''', 10)
+
+ self._test('''
+ function f() {
+ if (0==0) {return 10}
+ }
+ ''', 10)
+
+ self._test('''
+ function f() {
+ if (0!=0) {return 1}
+ else {return 10}
+ }
+ ''', 10)
+
+ def test_elseif(self):
+ self._test('''
+ function f() {
+ if (0!=0) {return 1}
+ else if (1==0) {return 2}
+ else {return 10}
+ }
+ ''', 10)
+
+ def test_for_loop(self):
+ self._test('function f() { a=0; for (i=0; i-10; i++) {a++} return a }', 10)
+
+ def test_while_loop(self):
+ self._test('function f() { a=0; while (a<10) {a++} return a }', 10)
+
+ def test_switch(self):
+ jsi = JSInterpreter('''
+ function f(x) { switch(x){
+ case 1:x+=1;
+ case 2:x+=2;
+ case 3:x+=3;break;
+ case 4:x+=4;
+ default:x=0;
+ } return x }
+ ''')
+ self._test(jsi, 7, args=[1])
+ self._test(jsi, 6, args=[3])
+ self._test(jsi, 0, args=[5])
+
+ def test_switch_default(self):
+ jsi = JSInterpreter('''
+ function f(x) { switch(x){
+ case 2: x+=2;
+ default: x-=1;
+ case 5:
+ case 6: x+=6;
+ case 0: break;
+ case 1: x+=1;
+ } return x }
+ ''')
+ self._test(jsi, 2, args=[1])
+ self._test(jsi, 11, args=[5])
+ self._test(jsi, 14, args=[9])
+
+ def test_try(self):
+ self._test('function f() { try{return 10} catch(e){return 5} }', 10)
+
+ def test_catch(self):
+ self._test('function f() { try{throw 10} catch(e){return 5} }', 5)
+
+ def test_finally(self):
+ self._test('function f() { try{throw 10} finally {return 42} }', 42)
+ self._test('function f() { try{throw 10} catch(e){return 5} finally {return 42} }', 42)
+
+ def test_nested_try(self):
+ self._test('''
+ function f() {try {
+ try{throw 10} finally {throw 42}
+ } catch(e){return 5} }
+ ''', 5)
+
+ def test_for_loop_continue(self):
+ self._test('function f() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }', 0)
+
+ def test_for_loop_break(self):
+ self._test('function f() { a=0; for (i=0; i-10; i++) { break; a++ } return a }', 0)
+
+ def test_for_loop_try(self):
+ self._test('''
+ function f() {
+ for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
+ return 42 }
+ ''', 42)
+
+ def test_literal_list(self):
+ self._test('function f() { return [1, 2, "asdf", [5, 6, 7]][3] }', [5, 6, 7])
+
+ def test_comma(self):
+ self._test('function f() { a=5; a -= 1, a+=3; return a }', 7)
+ self._test('function f() { a=5; return (a -= 1, a+=3, a); }', 7)
+ self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5)
+
+ def test_void(self):
+ self._test('function f() { return void 42; }', None)
+
+ def test_return_function(self):
+ jsi = JSInterpreter('''
+ function x() { return [1, function(){return 1}][1] }
+ ''')
+ self.assertEqual(jsi.call_function('x')([]), 1)
+
+ def test_null(self):
+ self._test('function f() { return null; }', None)
+ self._test('function f() { return [null > 0, null < 0, null == 0, null === 0]; }',
+ [False, False, False, False])
+ self._test('function f() { return [null >= 0, null <= 0]; }', [True, True])
+
+ def test_undefined(self):
+ self._test('function f() { return undefined === undefined; }', True)
+ self._test('function f() { return undefined; }', JS_Undefined)
+ self._test('function f() {return undefined ?? 42; }', 42)
+ self._test('function f() { let v; return v; }', JS_Undefined)
+ self._test('function f() { let v; return v**0; }', 1)
+ self._test('function f() { let v; return [v>42, v<=42, v&&42, 42&&v]; }',
+ [False, False, JS_Undefined, JS_Undefined])
+
+ self._test('''
+ function f() { return [
+ undefined === undefined,
+ undefined == undefined,
+ undefined == null
+ ]; }
+ ''', [True] * 3)
+ self._test('''
+ function f() { return [
+ undefined < undefined,
+ undefined > undefined,
+ undefined === 0,
+ undefined == 0,
+ undefined < 0,
+ undefined > 0,
+ undefined >= 0,
+ undefined <= 0,
+ undefined > null,
+ undefined < null,
+ undefined === null
+ ]; }
+ ''', [False] * 11)
+
+ jsi = JSInterpreter('''
+ function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
+ ''')
+ for y in jsi.call_function('x'):
+ self.assertTrue(math.isnan(y))
+
+ def test_object(self):
+ self._test('function f() { return {}; }', {})
+ self._test('function f() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }', [42, 0])
+ self._test('function f() { let a; return a?.qq; }', JS_Undefined)
+ self._test('function f() { let a = {m1: 42, m2: 0 }; return a?.qq; }', JS_Undefined)
+
+ def test_regex(self):
+ self._test('function f() { let a=/,,[/,913,/](,)}/; }', None)
+
+ jsi = JSInterpreter('''
+ function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
+ ''')
+ attrs = set(('findall', 'finditer', 'match', 'scanner', 'search',
+ 'split', 'sub', 'subn'))
+ if sys.version_info >= (2, 7):
+ # documented for 2.6 but may not be found
+ attrs.update(('flags', 'groupindex', 'groups', 'pattern'))
+ self.assertSetEqual(set(dir(jsi.call_function('x'))) & attrs, attrs)
+
+ jsi = JSInterpreter('''
+ function x() { let a=/,,[/,913,/](,)}/i; return a; }
+ ''')
+ self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
+
+ jsi = JSInterpreter(r'function f() { let a=/,][}",],()}(\[)/; return a; }')
+ self.assertEqual(jsi.call_function('f').pattern, r',][}",],()}(\[)')
+
+ jsi = JSInterpreter(r'function f() { let a=[/[)\\]/]; return a[0]; }')
+ self.assertEqual(jsi.call_function('f').pattern, r'[)\\]')
+
+ def test_replace(self):
+ self._test('function f() { let a="data-name".replace("data-", ""); return a }',
+ 'name')
+ self._test('function f() { let a="data-name".replace(new RegExp("^.+-"), ""); return a; }',
+ 'name')
+ self._test('function f() { let a="data-name".replace(/^.+-/, ""); return a; }',
+ 'name')
+ self._test('function f() { let a="data-name".replace(/a/g, "o"); return a; }',
+ 'doto-nome')
+ self._test('function f() { let a="data-name".replaceAll("a", "o"); return a; }',
+ 'doto-nome')
+
+ def test_char_code_at(self):
+ jsi = JSInterpreter('function f(i){return "test".charCodeAt(i)}')
+ self._test(jsi, 116, args=[0])
+ self._test(jsi, 101, args=[1])
+ self._test(jsi, 115, args=[2])
+ self._test(jsi, 116, args=[3])
+ self._test(jsi, None, args=[4])
+ self._test(jsi, 116, args=['not_a_number'])
+
+ def test_bitwise_operators_overflow(self):
+ self._test('function f(){return -524999584 << 5}', 379882496)
+ self._test('function f(){return 1236566549 << 5}', 915423904)
+
+ def test_bitwise_operators_typecast(self):
+ # madness
+ self._test('function f(){return null << 5}', 0)
+ self._test('function f(){return undefined >> 5}', 0)
+ self._test('function f(){return 42 << NaN}', 42)
+ self._test('function f(){return 42 << Infinity}', 42)
+
+ def test_negative(self):
+ self._test('function f(){return 2 * -2.0 ;}', -4)
+ self._test('function f(){return 2 - - -2 ;}', 0)
+ self._test('function f(){return 2 - - - -2 ;}', 4)
+ self._test('function f(){return 2 - + + - -2;}', 0)
+ self._test('function f(){return 2 + - + - -2;}', 0)
+
+ def test_32066(self):
+ self._test(
+ "function f(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}",
+ 70)
+
+ @unittest.skip('Not yet working')
+ def test_packed(self):
+ self._test(
+ '''function f(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''',
+ '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|'))
+
+ def test_join(self):
+ test_input = list('test')
+ tests = [
+ 'function f(a, b){return a.join(b)}',
+ 'function f(a, b){return Array.prototype.join.call(a, b)}',
+ 'function f(a, b){return Array.prototype.join.apply(a, [b])}',
+ ]
+ for test in tests:
+ jsi = JSInterpreter(test)
+ self._test(jsi, 'test', args=[test_input, ''])
+ self._test(jsi, 't-e-s-t', args=[test_input, '-'])
+ self._test(jsi, '', args=[[], '-'])
+
+ def test_split(self):
+ test_result = list('test')
+ tests = [
+ 'function f(a, b){return a.split(b)}',
+ 'function f(a, b){return String.prototype.split.call(a, b)}',
+ 'function f(a, b){return String.prototype.split.apply(a, [b])}',
+ ]
+ for test in tests:
+ jsi = JSInterpreter(test)
+ self._test(jsi, test_result, args=['test', ''])
+ self._test(jsi, test_result, args=['t-e-s-t', '-'])
+ self._test(jsi, [''], args=['', '-'])
+ self._test(jsi, [], args=['', ''])
+
+ def test_slice(self):
+ self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
+ self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
+ self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(5)}', [5, 6, 7, 8])
+ self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(99)}', [])
+ self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-2)}', [7, 8])
+ self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-99)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
+ self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 0)}', [])
+ self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, 0)}', [])
+ self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 1)}', [0])
+ self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(3, 6)}', [3, 4, 5])
+ self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, -1)}', [1, 2, 3, 4, 5, 6, 7])
+ self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-1, 1)}', [])
+ self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-3, -1)}', [6, 7])
+ self._test('function f(){return "012345678".slice()}', '012345678')
+ self._test('function f(){return "012345678".slice(0)}', '012345678')
+ self._test('function f(){return "012345678".slice(5)}', '5678')
+ self._test('function f(){return "012345678".slice(99)}', '')
+ self._test('function f(){return "012345678".slice(-2)}', '78')
+ self._test('function f(){return "012345678".slice(-99)}', '012345678')
+ self._test('function f(){return "012345678".slice(0, 0)}', '')
+ self._test('function f(){return "012345678".slice(1, 0)}', '')
+ self._test('function f(){return "012345678".slice(0, 1)}', '0')
+ self._test('function f(){return "012345678".slice(3, 6)}', '345')
+ self._test('function f(){return "012345678".slice(1, -1)}', '1234567')
+ self._test('function f(){return "012345678".slice(-1, 1)}', '')
+ self._test('function f(){return "012345678".slice(-3, -1)}', '67')
if __name__ == '__main__':
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 550e0ca00..e005c78fc 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -38,6 +38,9 @@ class BaseTestSubtitles(unittest.TestCase):
self.DL = FakeYDL()
self.ie = self.IE()
self.DL.add_info_extractor(self.ie)
+ if not self.IE.working():
+ print('Skipping: %s marked as not _WORKING' % self.IE.ie_key())
+ self.skipTest('IE marked as not _WORKING')
def getInfoDict(self):
info_dict = self.DL.extract_info(self.url, download=False)
@@ -56,6 +59,21 @@ class BaseTestSubtitles(unittest.TestCase):
class TestYoutubeSubtitles(BaseTestSubtitles):
+ # Available subtitles for QRS8MkLhQmM:
+ # Language formats
+ # ru vtt, ttml, srv3, srv2, srv1, json3
+ # fr vtt, ttml, srv3, srv2, srv1, json3
+ # en vtt, ttml, srv3, srv2, srv1, json3
+ # nl vtt, ttml, srv3, srv2, srv1, json3
+ # de vtt, ttml, srv3, srv2, srv1, json3
+ # ko vtt, ttml, srv3, srv2, srv1, json3
+ # it vtt, ttml, srv3, srv2, srv1, json3
+ # zh-Hant vtt, ttml, srv3, srv2, srv1, json3
+ # hi vtt, ttml, srv3, srv2, srv1, json3
+ # pt-BR vtt, ttml, srv3, srv2, srv1, json3
+ # es-MX vtt, ttml, srv3, srv2, srv1, json3
+ # ja vtt, ttml, srv3, srv2, srv1, json3
+ # pl vtt, ttml, srv3, srv2, srv1, json3
url = 'QRS8MkLhQmM'
IE = YoutubeIE
@@ -64,41 +82,60 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
- self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
- self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
+ self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
+ self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9')
for lang in ['fr', 'de']:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
- def test_youtube_subtitles_ttml_format(self):
+ def _test_subtitles_format(self, fmt, md5_hash, lang='en'):
self.DL.params['writesubtitles'] = True
- self.DL.params['subtitlesformat'] = 'ttml'
+ self.DL.params['subtitlesformat'] = fmt
subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
+ self.assertEqual(md5(subtitles[lang]), md5_hash)
+
+ def test_youtube_subtitles_ttml_format(self):
+ self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2')
def test_youtube_subtitles_vtt_format(self):
- self.DL.params['writesubtitles'] = True
- self.DL.params['subtitlesformat'] = 'vtt'
+ self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d')
+
+ def test_youtube_subtitles_json3_format(self):
+ self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b')
+
+ def _test_automatic_captions(self, url, lang):
+ self.url = url
+ self.DL.params['writeautomaticsub'] = True
+ self.DL.params['subtitleslangs'] = [lang]
subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
+ self.assertTrue(subtitles[lang] is not None)
def test_youtube_automatic_captions(self):
- self.url = '8YoUxe5ncPo'
- self.DL.params['writeautomaticsub'] = True
- self.DL.params['subtitleslangs'] = ['it']
- subtitles = self.getSubtitles()
- self.assertTrue(subtitles['it'] is not None)
+ # Available automatic captions for 8YoUxe5ncPo:
+ # Language formats (all in vtt, ttml, srv3, srv2, srv1, json3)
+ # gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr,
+ # lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da,
+ # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv,
+ # bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy,
+ # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur,
+ # mt, ms, mr, ug, ta, my, af, sw, is, am,
+ # *it*, iw, sv, ar,
+ # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi,
+ # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl,
+ # ky, sd
+ # ...
+ self._test_automatic_captions('8YoUxe5ncPo', 'it')
+ @unittest.skip('ASR subs all in all supported langs now')
def test_youtube_translated_subtitles(self):
- # This video has a subtitles track, which can be translated
- self.url = 'Ky9eprVWzlI'
- self.DL.params['writeautomaticsub'] = True
- self.DL.params['subtitleslangs'] = ['it']
- subtitles = self.getSubtitles()
- self.assertTrue(subtitles['it'] is not None)
+ # This video has a subtitles track, which can be translated (#4555)
+ self._test_automatic_captions('Ky9eprVWzlI', 'it')
def test_youtube_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')
- self.url = 'n5BB19UTcdA'
+ # Available automatic captions for 8YoUxe5ncPo:
+ # ...
+ # 8YoUxe5ncPo has no subtitles
+ self.url = '8YoUxe5ncPo'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
@@ -128,6 +165,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
self.assertFalse(subtitles)
+@unittest.skip('IE broken')
class TestTedSubtitles(BaseTestSubtitles):
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
IE = TEDIE
@@ -152,18 +190,19 @@ class TestVimeoSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
- self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
- self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
+ self.assertEqual(md5(subtitles['en']), '386cbc9320b94e25cb364b97935e5dd1')
+ self.assertEqual(md5(subtitles['fr']), 'c9b69eef35bc6641c0d4da8a04f9dfac')
def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')
- self.url = 'http://vimeo.com/56015672'
+ self.url = 'http://vimeo.com/68093876'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertFalse(subtitles)
+@unittest.skip('IE broken')
class TestWallaSubtitles(BaseTestSubtitles):
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
IE = WallaIE
@@ -185,6 +224,7 @@ class TestWallaSubtitles(BaseTestSubtitles):
self.assertFalse(subtitles)
+@unittest.skip('IE broken')
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
IE = CeskaTelevizeIE
@@ -206,6 +246,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
self.assertFalse(subtitles)
+@unittest.skip('IE broken')
class TestLyndaSubtitles(BaseTestSubtitles):
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
IE = LyndaIE
@@ -218,6 +259,7 @@ class TestLyndaSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
+@unittest.skip('IE broken')
class TestNPOSubtitles(BaseTestSubtitles):
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
IE = NPOIE
@@ -230,6 +272,7 @@ class TestNPOSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
+@unittest.skip('IE broken')
class TestMTVSubtitles(BaseTestSubtitles):
url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
IE = ComedyCentralIE
@@ -252,9 +295,10 @@ class TestNRKSubtitles(BaseTestSubtitles):
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
+ self.DL.params['format'] = 'best/bestvideo'
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['no']))
- self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
+ self.assertEqual(set(subtitles.keys()), set(['nb-ttv']))
+ self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
class TestRaiPlaySubtitles(BaseTestSubtitles):
@@ -277,6 +321,7 @@ class TestRaiPlaySubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
+@unittest.skip('IE broken - DRM only')
class TestVikiSubtitles(BaseTestSubtitles):
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
IE = VikiIE
@@ -303,6 +348,7 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
+@unittest.skip('IE broken')
class TestThePlatformFeedSubtitles(BaseTestSubtitles):
url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
IE = ThePlatformFeedIE
@@ -338,7 +384,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
- self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
+ self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
def test_subtitles_in_page(self):
self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
@@ -346,7 +392,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
- self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
+ self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
if __name__ == '__main__':
diff --git a/test/test_swfinterp.py b/test/test_swfinterp.py
index 9f18055e6..7c282ee00 100644
--- a/test/test_swfinterp.py
+++ b/test/test_swfinterp.py
@@ -5,16 +5,18 @@ from __future__ import unicode_literals
import os
import sys
import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+dirn = os.path.dirname
+
+sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
import errno
-import io
import json
import re
import subprocess
from youtube_dl.swfinterp import SWFInterpreter
+from youtube_dl.compat import compat_open as open
TEST_DIR = os.path.join(
@@ -43,7 +45,7 @@ def _make_testfunc(testfile):
'-static-link-runtime-shared-libraries', as_file])
except OSError as ose:
if ose.errno == errno.ENOENT:
- print('mxmlc not found! Skipping test.')
+ self.skipTest('mxmlc not found!')
return
raise
@@ -51,7 +53,7 @@ def _make_testfunc(testfile):
swf_content = swf_f.read()
swfi = SWFInterpreter(swf_content)
- with io.open(as_file, 'r', encoding='utf-8') as as_f:
+ with open(as_file, 'r', encoding='utf-8') as as_f:
as_content = as_f.read()
def _find_spec(key):
diff --git a/test/test_traversal.py b/test/test_traversal.py
new file mode 100644
index 000000000..00a428edb
--- /dev/null
+++ b/test/test_traversal.py
@@ -0,0 +1,509 @@
+#!/usr/bin/env python
+# coding: utf-8
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import re
+
+from youtube_dl.traversal import (
+ dict_get,
+ get_first,
+ T,
+ traverse_obj,
+)
+from youtube_dl.compat import (
+ compat_etree_fromstring,
+ compat_http_cookies,
+ compat_str,
+)
+from youtube_dl.utils import (
+ int_or_none,
+ str_or_none,
+)
+
+_TEST_DATA = {
+ 100: 100,
+ 1.2: 1.2,
+ 'str': 'str',
+ 'None': None,
+ '...': Ellipsis,
+ 'urls': [
+ {'index': 0, 'url': 'https://www.example.com/0'},
+ {'index': 1, 'url': 'https://www.example.com/1'},
+ ],
+ 'data': (
+ {'index': 2},
+ {'index': 3},
+ ),
+ 'dict': {},
+}
+
+
+if sys.version_info < (3, 0):
+ class _TestCase(unittest.TestCase):
+
+ def assertCountEqual(self, *args, **kwargs):
+ return self.assertItemsEqual(*args, **kwargs)
+else:
+ _TestCase = unittest.TestCase
+
+
+class TestTraversal(_TestCase):
+ def assertMaybeCountEqual(self, *args, **kwargs):
+ if sys.version_info < (3, 7):
+ # random dict order
+ return self.assertCountEqual(*args, **kwargs)
+ else:
+ return self.assertEqual(*args, **kwargs)
+
+ def test_traverse_obj(self):
+ # instant compat
+ str = compat_str
+
+ # define a pukka Iterable
+ def iter_range(stop):
+ for from_ in range(stop):
+ yield from_
+
+ # Test base functionality
+ self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
+ msg='allow tuple path')
+ self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
+ msg='allow list path')
+ self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
+ msg='allow iterable path')
+ self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
+ msg='single items should be treated as a path')
+ self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
+ self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
+ self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
+
+ # Test Ellipsis behavior
+ self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
+ (item for item in _TEST_DATA.values() if item not in (None, {})),
+ msg='`...` should give all non-discarded values')
+ self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
+ msg='`...` selection for dicts should select all values')
+ self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
+ ['https://www.example.com/0', 'https://www.example.com/1'],
+ msg='nested `...` queries should work')
+ self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), iter_range(4),
+ msg='`...` query result should be flattened')
+ self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)),
+ msg='`...` should accept iterables')
+
+ # Test function as key
+ self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
+ [_TEST_DATA['urls']],
+ msg='function as query key should perform a filter based on (key, value)')
+ self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), set(('str',)),
+ msg='exceptions in the query function should be caught')
+ self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
+ msg='function key should accept iterables')
+ if __debug__:
+ with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
+ traverse_obj(_TEST_DATA, lambda a: Ellipsis)
+ with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
+ traverse_obj(_TEST_DATA, lambda a, b, c: Ellipsis)
+
+ # Test set as key (transformation/type, like `expected_type`)
+ self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper), )), ['STR'],
+ msg='Function in set should be a transformation')
+ self.assertEqual(traverse_obj(_TEST_DATA, ('fail', T(lambda _: 'const'))), 'const',
+ msg='Function in set should always be called')
+ self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str))), ['str'],
+ msg='Type in set should be a type filter')
+ self.assertMaybeCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str, int))), [100, 'str'],
+ msg='Multiple types in set should be a type filter')
+ self.assertEqual(traverse_obj(_TEST_DATA, T(dict)), _TEST_DATA,
+ msg='A single set should be wrapped into a path')
+ self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper))), ['STR'],
+ msg='Transformation function should not raise')
+ self.assertMaybeCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str_or_none))),
+ [item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
+ msg='Function in set should be a transformation')
+ if __debug__:
+ with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
+ traverse_obj(_TEST_DATA, set())
+ with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
+ traverse_obj(_TEST_DATA, set((str.upper, str)))
+
+ # Test `slice` as a key
+ _SLICE_DATA = [0, 1, 2, 3, 4]
+ self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
+ msg='slice on a dictionary should not throw')
+ self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
+ msg='slice key should apply slice to sequence')
+ self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
+ msg='slice key should apply slice to sequence')
+ self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
+ msg='slice key should apply slice to sequence')
+
+ # Test alternative paths
+ self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
+ msg='multiple `paths` should be treated as alternative paths')
+ self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
+ msg='alternatives should exit early')
+ self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
+ msg='alternatives should return `default` if exhausted')
+ self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, 'fail'), 100), 100,
+ msg='alternatives should track their own branching return')
+ self.assertEqual(traverse_obj(_TEST_DATA, ('dict', Ellipsis), ('data', Ellipsis)), list(_TEST_DATA['data']),
+ msg='alternatives on empty objects should search further')
+
+ # Test branch and path nesting
+ self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
+ msg='tuple as key should be treated as branches')
+ self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
+ msg='list as key should be treated as branches')
+ self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
+ msg='double nesting in path should be treated as paths')
+ self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
+ msg='do not fail early on branching')
+ self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
+ ['https://www.example.com/0', 'https://www.example.com/1'],
+ msg='triple nesting in path should be treated as branches')
+ self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (Ellipsis, 'url')))),
+ ['https://www.example.com/0', 'https://www.example.com/1'],
+ msg='ellipsis as branch path start gets flattened')
+
+ # Test dictionary as key
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
+ msg='dict key should result in a dict with the same keys')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
+ {0: 'https://www.example.com/0'},
+ msg='dict key should allow paths')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
+ {0: ['https://www.example.com/0']},
+ msg='tuple in dict path should be treated as branches')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
+ {0: ['https://www.example.com/0']},
+ msg='double nesting in dict path should be treated as paths')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
+ {0: ['https://www.example.com/1', 'https://www.example.com/0']},
+ msg='triple nesting in dict path should be treated as branches')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
+ msg='remove `None` values when top level dict key fails')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
+ msg='use `default` if key fails and `default`')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
+ msg='remove empty values when dict key')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis},
+ msg='use `default` when dict key and a default')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
+ msg='remove empty values when nested dict key fails')
+ self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
+ msg='default to dict if pruned')
+ self.assertEqual(traverse_obj(None, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
+ msg='default to dict if pruned and default is given')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=Ellipsis), {0: {0: Ellipsis}},
+ msg='use nested `default` when nested dict key fails and `default`')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {},
+ msg='remove key if branch in dict key not successful')
+
+ # Testing default parameter behavior
+ _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
+ msg='default value should be `None`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=Ellipsis), Ellipsis,
+ msg='chained fails should result in default')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
+ msg='should not short cirquit on `None`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
+ msg='invalid dict key should result in `default`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
+ msg='`None` is a deliberate sentinel and should become `default`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
+ msg='`IndexError` should result in `default`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=1), 1,
+ msg='if branched but not successful return `default` if defined, not `[]`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=None), None,
+ msg='if branched but not successful return `default` even if `default` is `None`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail')), [],
+ msg='if branched but not successful return `[]`, not `default`')
+ self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
+ msg='if branched but object is empty return `[]`, not `default`')
+ self.assertEqual(traverse_obj(None, Ellipsis), [],
+ msg='if branched but object is `None` return `[]`, not `default`')
+ self.assertEqual(traverse_obj({0: None}, (0, Ellipsis)), [],
+ msg='if branched but state is `None` return `[]`, not `default`')
+
+ branching_paths = [
+ ('fail', Ellipsis),
+ (Ellipsis, 'fail'),
+ 100 * ('fail',) + (Ellipsis,),
+ (Ellipsis,) + 100 * ('fail',),
+ ]
+ for branching_path in branching_paths:
+ self.assertEqual(traverse_obj({}, branching_path), [],
+ msg='if branched but state is `None`, return `[]` (not `default`)')
+ self.assertEqual(traverse_obj({}, 'fail', branching_path), [],
+ msg='if branching in last alternative and previous did not match, return `[]` (not `default`)')
+ self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x',
+ msg='if branching in last alternative and previous did match, return single value')
+ self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x',
+ msg='if branching in first alternative and non-branching path does match, return single value')
+ self.assertEqual(traverse_obj({}, branching_path, 'fail'), None,
+ msg='if branching in first alternative and non-branching path does not match, return `default`')
+
+ # Testing expected_type behavior
+ _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
+ self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
+ 'str', msg='accept matching `expected_type` type')
+ self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
+ None, msg='reject non-matching `expected_type` type')
+ self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
+ '0', msg='transform type using type function')
+ self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
+ None, msg='wrap expected_type function in try_call')
+ self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=str),
+ ['str'], msg='eliminate items that expected_type fails on')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int),
+ {0: 100}, msg='type as expected_type should filter dict values')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
+ {0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
+ self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, set((int_or_none,))), expected_type=int),
+ 1, msg='expected_type should not filter non-final dict values')
+ self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
+ {0: {0: 100}}, msg='expected_type should transform deep dict values')
+ self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)),
+ [{0: Ellipsis}, {0: Ellipsis}], msg='expected_type should transform branched dict values')
+ self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int),
+ [4], msg='expected_type regression for type matching in tuple branching')
+ self.assertEqual(traverse_obj(_TEST_DATA, ['data', Ellipsis], expected_type=int),
+ [], msg='expected_type regression for type matching in dict result')
+
+ # Test get_all behavior
+ _GET_ALL_DATA = {'key': [0, 1, 2]}
+ self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', Ellipsis), get_all=False), 0,
+ msg='if not `get_all`, return only first matching value')
+ self.assertEqual(traverse_obj(_GET_ALL_DATA, Ellipsis, get_all=False), [0, 1, 2],
+ msg='do not overflatten if not `get_all`')
+
+ # Test casesense behavior
+ _CASESENSE_DATA = {
+ 'KeY': 'value0',
+ 0: {
+ 'KeY': 'value1',
+ 0: {'KeY': 'value2'},
+ },
+ # FULLWIDTH LATIN CAPITAL LETTER K
+ '\uff2bey': 'value3',
+ }
+ self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
+ msg='dict keys should be case sensitive unless `casesense`')
+ self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
+ casesense=False), 'value0',
+ msg='allow non matching key case if `casesense`')
+ self.assertEqual(traverse_obj(_CASESENSE_DATA, '\uff4bey', # FULLWIDTH LATIN SMALL LETTER K
+ casesense=False), 'value3',
+ msg='allow non matching Unicode key case if `casesense`')
+ self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
+ casesense=False), ['value1'],
+ msg='allow non matching key case in branch if `casesense`')
+ self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
+ casesense=False), ['value2'],
+ msg='allow non matching key case in branch path if `casesense`')
+
+ # Test traverse_string behavior
+ _TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
+ msg='do not traverse into string if not `traverse_string`')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
+ _traverse_string=True), 's',
+ msg='traverse into string if `traverse_string`')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
+ _traverse_string=True), '.',
+ msg='traverse into converted data if `traverse_string`')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
+ _traverse_string=True), 'str',
+ msg='`...` should result in string (same value) if `traverse_string`')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
+ _traverse_string=True), 'sr',
+ msg='`slice` should result in string if `traverse_string`')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == 's'),
+ _traverse_string=True), 'str',
+ msg='function should result in string if `traverse_string`')
+ self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
+ _traverse_string=True), ['s', 'r'],
+ msg='branching should result in list if `traverse_string`')
+ self.assertEqual(traverse_obj({}, (0, Ellipsis), _traverse_string=True), [],
+ msg='branching should result in list if `traverse_string`')
+ self.assertEqual(traverse_obj({}, (0, lambda x, y: True), _traverse_string=True), [],
+ msg='branching should result in list if `traverse_string`')
+ self.assertEqual(traverse_obj({}, (0, slice(1)), _traverse_string=True), [],
+ msg='branching should result in list if `traverse_string`')
+
+ # Test re.Match as input obj
+ mobj = re.match(r'^0(12)(?P3)(4)?$', '0123')
+ self.assertEqual(traverse_obj(mobj, Ellipsis), [x for x in mobj.groups() if x is not None],
+ msg='`...` on a `re.Match` should give its `groups()`')
+ self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
+ msg='function on a `re.Match` should give groupno, value starting at 0')
+ self.assertEqual(traverse_obj(mobj, 'group'), '3',
+ msg='str key on a `re.Match` should give group with that name')
+ self.assertEqual(traverse_obj(mobj, 2), '3',
+ msg='int key on a `re.Match` should give group with that name')
+ self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
+ msg='str key on a `re.Match` should respect casesense')
+ self.assertEqual(traverse_obj(mobj, 'fail'), None,
+ msg='failing str key on a `re.Match` should return `default`')
+ self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
+ msg='failing str key on a `re.Match` should return `default`')
+ self.assertEqual(traverse_obj(mobj, 8), None,
+ msg='failing int key on a `re.Match` should return `default`')
+ self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
+ msg='function on a `re.Match` should give group name as well')
+
+ # Test xml.etree.ElementTree.Element as input obj
+ etree = compat_etree_fromstring('''
+
+
+ 1
+ 2008
+ 141100
+
+
+
+
+ 4
+ 2011
+ 59900
+
+
+
+ 68
+ 2011
+ 13600
+
+
+
+ ''')
+ self.assertEqual(traverse_obj(etree, ''), etree,
+ msg='empty str key should return the element itself')
+ self.assertEqual(traverse_obj(etree, 'country'), list(etree),
+ msg='str key should return all children with that tag name')
+ self.assertEqual(traverse_obj(etree, Ellipsis), list(etree),
+ msg='`...` as key should return all children')
+ self.assertEqual(traverse_obj(etree, lambda _, x: x[0].text == '4'), [etree[1]],
+ msg='function as key should get element as value')
+ self.assertEqual(traverse_obj(etree, lambda i, _: i == 1), [etree[1]],
+ msg='function as key should get index as key')
+ self.assertEqual(traverse_obj(etree, 0), etree[0],
+ msg='int key should return the nth child')
+ self.assertEqual(traverse_obj(etree, './/neighbor/@name'),
+ ['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'],
+ msg='`@` at end of path should give that attribute')
+ self.assertEqual(traverse_obj(etree, '//neighbor/@fail'), [None, None, None, None, None],
+ msg='`@` at end of path should give `None`')
+ self.assertEqual(traverse_obj(etree, ('//neighbor/@', 2)), {'name': 'Malaysia', 'direction': 'N'},
+ msg='`@` should give the full attribute dict')
+ self.assertEqual(traverse_obj(etree, '//year/text()'), ['2008', '2011', '2011'],
+ msg='`text()` at end of path should give the inner text')
+ self.assertEqual(traverse_obj(etree, '//*[@direction]/@direction'), ['E', 'W', 'N', 'W', 'E'],
+ msg='full python xpath features should be supported')
+ self.assertEqual(traverse_obj(etree, (0, '@name')), 'Liechtenstein',
+ msg='special transformations should act on current element')
+ self.assertEqual(traverse_obj(etree, ('country', 0, Ellipsis, 'text()', T(int_or_none))), [1, 2008, 141100],
+ msg='special transformations should act on current element')
+
+ def test_traversal_unbranching(self):
+ self.assertEqual(traverse_obj(_TEST_DATA, [(100, 1.2), all]), [100, 1.2],
+ msg='`all` should give all results as list')
+ self.assertEqual(traverse_obj(_TEST_DATA, [(100, 1.2), any]), 100,
+ msg='`any` should give the first result')
+ self.assertEqual(traverse_obj(_TEST_DATA, [100, all]), [100],
+ msg='`all` should give list if non branching')
+ self.assertEqual(traverse_obj(_TEST_DATA, [100, any]), 100,
+ msg='`any` should give single item if non branching')
+ self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100), all]), [100],
+ msg='`all` should filter `None` and empty dict')
+ self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100), any]), 100,
+ msg='`any` should filter `None` and empty dict')
+ self.assertEqual(traverse_obj(_TEST_DATA, [{
+ 'all': [('dict', 'None', 100, 1.2), all],
+ 'any': [('dict', 'None', 100, 1.2), any],
+ }]), {'all': [100, 1.2], 'any': 100},
+ msg='`all`/`any` should apply to each dict path separately')
+ self.assertEqual(traverse_obj(_TEST_DATA, [{
+ 'all': [('dict', 'None', 100, 1.2), all],
+ 'any': [('dict', 'None', 100, 1.2), any],
+ }], get_all=False), {'all': [100, 1.2], 'any': 100},
+ msg='`all`/`any` should apply to dict regardless of `get_all`')
+ self.assertIs(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, T(float)]), None,
+ msg='`all` should reset branching status')
+ self.assertIs(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), any, T(float)]), None,
+ msg='`any` should reset branching status')
+ self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, Ellipsis, T(float)]), [1.2],
+ msg='`all` should allow further branching')
+ self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 'urls', 'data'), any, Ellipsis, 'index']), [0, 1],
+ msg='`any` should allow further branching')
+
+ def test_traversal_morsel(self):
+ values = {
+ 'expires': 'a',
+ 'path': 'b',
+ 'comment': 'c',
+ 'domain': 'd',
+ 'max-age': 'e',
+ 'secure': 'f',
+ 'httponly': 'g',
+ 'version': 'h',
+ 'samesite': 'i',
+ }
+ # SameSite added in Py3.8, breaks .update for 3.5-3.7
+ if sys.version_info < (3, 8):
+ del values['samesite']
+ morsel = compat_http_cookies.Morsel()
+ morsel.set(str('item_key'), 'item_value', 'coded_value')
+ morsel.update(values)
+ values['key'] = str('item_key')
+ values['value'] = 'item_value'
+ values = dict((str(k), v) for k, v in values.items())
+ # make test pass even without ordered dict
+ value_set = set(values.values())
+
+ for key, value in values.items():
+ self.assertEqual(traverse_obj(morsel, key), value,
+ msg='Morsel should provide access to all values')
+ self.assertEqual(set(traverse_obj(morsel, Ellipsis)), value_set,
+ msg='`...` should yield all values')
+ self.assertEqual(set(traverse_obj(morsel, lambda k, v: True)), value_set,
+ msg='function key should yield all values')
+ self.assertIs(traverse_obj(morsel, [(None,), any]), morsel,
+ msg='Morsel should not be implicitly changed to dict on usage')
+
+ def test_get_first(self):
+ self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
+
+ def test_dict_get(self):
+ FALSE_VALUES = {
+ 'none': None,
+ 'false': False,
+ 'zero': 0,
+ 'empty_string': '',
+ 'empty_list': [],
+ }
+ d = FALSE_VALUES.copy()
+ d['a'] = 42
+ self.assertEqual(dict_get(d, 'a'), 42)
+ self.assertEqual(dict_get(d, 'b'), None)
+ self.assertEqual(dict_get(d, 'b', 42), 42)
+ self.assertEqual(dict_get(d, ('a', )), 42)
+ self.assertEqual(dict_get(d, ('b', 'a', )), 42)
+ self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
+ self.assertEqual(dict_get(d, ('b', 'c', )), None)
+ self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
+ for key, false_value in FALSE_VALUES.items():
+ self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
+ self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_unicode_literals.py b/test/test_unicode_literals.py
index 6c1b7ec91..0c83f2a0c 100644
--- a/test/test_unicode_literals.py
+++ b/test/test_unicode_literals.py
@@ -2,19 +2,21 @@ from __future__ import unicode_literals
# Allow direct execution
import os
+import re
import sys
import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-import io
-import re
+dirn = os.path.dirname
-rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+rootDir = dirn(dirn(os.path.abspath(__file__)))
+
+sys.path.insert(0, rootDir)
IGNORED_FILES = [
'setup.py', # http://bugs.python.org/issue13943
'conf.py',
'buildserver.py',
+ 'get-pip.py',
]
IGNORED_DIRS = [
@@ -23,6 +25,7 @@ IGNORED_DIRS = [
]
from test.helper import assertRegexpMatches
+from youtube_dl.compat import compat_open as open
class TestUnicodeLiterals(unittest.TestCase):
@@ -40,7 +43,7 @@ class TestUnicodeLiterals(unittest.TestCase):
continue
fn = os.path.join(dirpath, basename)
- with io.open(fn, encoding='utf-8') as inf:
+ with open(fn, encoding='utf-8') as inf:
code = inf.read()
if "'" not in code and '"' not in code:
diff --git a/test/test_utils.py b/test/test_utils.py
index 259c4763e..2947cce7e 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -12,13 +12,16 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Various small unit tests
import io
+import itertools
import json
+import types
import xml.etree.ElementTree
from youtube_dl.utils import (
+ _UnsafeExtensionError,
age_restricted,
args_to_str,
- encode_base_n,
+ base_url,
caesar,
clean_html,
clean_podcast_url,
@@ -26,11 +29,12 @@ from youtube_dl.utils import (
DateRange,
detect_exe_version,
determine_ext,
- dict_get,
+ encode_base_n,
encode_compat_str,
encodeFilename,
escape_rfc3986,
escape_url,
+ expand_path,
extract_attributes,
ExtractorError,
find_xpath_attr,
@@ -44,8 +48,11 @@ from youtube_dl.utils import (
int_or_none,
intlist_to_bytes,
is_html,
+ join_nonempty,
js_to_json,
+ LazyList,
limit_length,
+ lowercase_escape,
merge_dicts,
mimetype2ext,
month_by_name,
@@ -54,24 +61,26 @@ from youtube_dl.utils import (
OnDemandPagedList,
orderedSet,
parse_age_limit,
+ parse_bitrate,
parse_duration,
parse_filesize,
+ parse_codecs,
parse_count,
parse_iso8601,
parse_resolution,
- parse_bitrate,
+ parse_qs,
pkcs1pad,
- read_batch_urls,
- sanitize_filename,
- sanitize_path,
- sanitize_url,
- expand_path,
prepend_extension,
- replace_extension,
+ read_batch_urls,
remove_start,
remove_end,
remove_quotes,
+ replace_extension,
rot47,
+ sanitize_filename,
+ sanitize_path,
+ sanitize_url,
+ sanitized_Request,
shell_quote,
smuggle_url,
str_to_int,
@@ -79,19 +88,19 @@ from youtube_dl.utils import (
strip_or_none,
subtitles_filename,
timeconvert,
+ try_call,
unescapeHTML,
unified_strdate,
unified_timestamp,
unsmuggle_url,
uppercase_escape,
- lowercase_escape,
url_basename,
url_or_none,
- base_url,
urljoin,
urlencode_postdata,
urshift,
update_url_query,
+ variadic,
version_tuple,
xpath_with_ns,
xpath_element,
@@ -104,7 +113,7 @@ from youtube_dl.utils import (
cli_option,
cli_valueless_option,
cli_bool_option,
- parse_codecs,
+ YoutubeDLHandler,
)
from youtube_dl.compat import (
compat_chr,
@@ -112,12 +121,13 @@ from youtube_dl.compat import (
compat_getenv,
compat_os_name,
compat_setenv,
+ compat_str,
compat_urlparse,
- compat_parse_qs,
)
class TestUtil(unittest.TestCase):
+
def test_timeconvert(self):
self.assertTrue(timeconvert('') is None)
self.assertTrue(timeconvert('bougrg') is None)
@@ -236,6 +246,19 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
+ self.assertEqual(sanitize_url('foo bar'), 'foo bar')
+
+ def test_sanitized_Request(self):
+ self.assertFalse(sanitized_Request('http://foo.bar').has_header('Authorization'))
+ self.assertFalse(sanitized_Request('http://:foo.bar').has_header('Authorization'))
+ self.assertEqual(sanitized_Request('http://@foo.bar').get_header('Authorization'),
+ 'Basic Og==')
+ self.assertEqual(sanitized_Request('http://:pass@foo.bar').get_header('Authorization'),
+ 'Basic OnBhc3M=')
+ self.assertEqual(sanitized_Request('http://user:@foo.bar').get_header('Authorization'),
+ 'Basic dXNlcjo=')
+ self.assertEqual(sanitized_Request('http://user:pass@foo.bar').get_header('Authorization'),
+ 'Basic dXNlcjpwYXNz')
def test_expand_path(self):
def env(var):
@@ -249,6 +272,27 @@ class TestUtil(unittest.TestCase):
expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
'%s/expanded' % compat_getenv('HOME'))
+ _uncommon_extensions = [
+ ('exe', 'abc.exe.ext'),
+ ('de', 'abc.de.ext'),
+ ('../.mp4', None),
+ ('..\\.mp4', None),
+ ]
+
+ def assertUnsafeExtension(self, ext=None):
+ assert_raises = self.assertRaises(_UnsafeExtensionError)
+ assert_raises.ext = ext
+ orig_exit = assert_raises.__exit__
+
+ def my_exit(self_, exc_type, exc_val, exc_tb):
+ did_raise = orig_exit(exc_type, exc_val, exc_tb)
+ if did_raise and assert_raises.ext is not None:
+ self.assertEqual(assert_raises.ext, assert_raises.exception.extension, 'Unsafe extension not as unexpected')
+ return did_raise
+
+ assert_raises.__exit__ = types.MethodType(my_exit, assert_raises)
+ return assert_raises
+
def test_prepend_extension(self):
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
@@ -257,6 +301,19 @@ class TestUtil(unittest.TestCase):
self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
+ # Test uncommon extensions
+ self.assertEqual(prepend_extension('abc.ext', 'bin'), 'abc.bin.ext')
+ for ext, result in self._uncommon_extensions:
+ with self.assertUnsafeExtension(ext):
+ prepend_extension('abc', ext)
+ if result:
+ self.assertEqual(prepend_extension('abc.ext', ext, 'ext'), result)
+ else:
+ with self.assertUnsafeExtension(ext):
+ prepend_extension('abc.ext', ext, 'ext')
+ with self.assertUnsafeExtension(ext):
+ prepend_extension('abc.unexpected_ext', ext, 'ext')
+
def test_replace_extension(self):
self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
@@ -265,6 +322,16 @@ class TestUtil(unittest.TestCase):
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
+ # Test uncommon extensions
+ self.assertEqual(replace_extension('abc.ext', 'bin'), 'abc.unknown_video')
+ for ext, _ in self._uncommon_extensions:
+ with self.assertUnsafeExtension(ext):
+ replace_extension('abc', ext)
+ with self.assertUnsafeExtension(ext):
+ replace_extension('abc.ext', ext, 'ext')
+ with self.assertUnsafeExtension(ext):
+ replace_extension('abc.unexpected_ext', ext, 'ext')
+
def test_subtitles_filename(self):
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
@@ -370,6 +437,9 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
+ self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1)
+ self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
+ self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78)
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
@@ -491,11 +561,14 @@ class TestUtil(unittest.TestCase):
self.assertEqual(float_or_none(set()), None)
def test_int_or_none(self):
+ self.assertEqual(int_or_none(42), 42)
self.assertEqual(int_or_none('42'), 42)
self.assertEqual(int_or_none(''), None)
self.assertEqual(int_or_none(None), None)
self.assertEqual(int_or_none([]), None)
self.assertEqual(int_or_none(set()), None)
+ self.assertEqual(int_or_none('42', base=8), 34)
+ self.assertRaises(TypeError, int_or_none(42, base=8))
def test_str_to_int(self):
self.assertEqual(str_to_int('123,456'), 123456)
@@ -662,38 +735,36 @@ class TestUtil(unittest.TestCase):
self.assertTrue(isinstance(data, bytes))
def test_update_url_query(self):
- def query_dict(url):
- return compat_parse_qs(compat_urlparse.urlparse(url).query)
- self.assertEqual(query_dict(update_url_query(
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
- query_dict('http://example.com/path?quality=HD&format=mp4'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?quality=HD&format=mp4'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
- query_dict('http://example.com/path?system=LINUX&system=WINDOWS'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?system=LINUX&system=WINDOWS'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'fields': 'id,formats,subtitles'})),
- query_dict('http://example.com/path?fields=id,formats,subtitles'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?fields=id,formats,subtitles'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
- query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path?manifest=f4m', {'manifest': []})),
- query_dict('http://example.com/path'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
- query_dict('http://example.com/path?system=LINUX'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?system=LINUX'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'fields': b'id,formats,subtitles'})),
- query_dict('http://example.com/path?fields=id,formats,subtitles'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?fields=id,formats,subtitles'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'width': 1080, 'height': 720})),
- query_dict('http://example.com/path?width=1080&height=720'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?width=1080&height=720'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'bitrate': 5020.43})),
- query_dict('http://example.com/path?bitrate=5020.43'))
- self.assertEqual(query_dict(update_url_query(
+ parse_qs('http://example.com/path?bitrate=5020.43'))
+ self.assertEqual(parse_qs(update_url_query(
'http://example.com/path', {'test': '第二行тест'})),
- query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
+ parse_qs('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
def test_multipart_encode(self):
self.assertEqual(
@@ -705,28 +776,6 @@ class TestUtil(unittest.TestCase):
self.assertRaises(
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
- def test_dict_get(self):
- FALSE_VALUES = {
- 'none': None,
- 'false': False,
- 'zero': 0,
- 'empty_string': '',
- 'empty_list': [],
- }
- d = FALSE_VALUES.copy()
- d['a'] = 42
- self.assertEqual(dict_get(d, 'a'), 42)
- self.assertEqual(dict_get(d, 'b'), None)
- self.assertEqual(dict_get(d, 'b', 42), 42)
- self.assertEqual(dict_get(d, ('a', )), 42)
- self.assertEqual(dict_get(d, ('b', 'a', )), 42)
- self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
- self.assertEqual(dict_get(d, ('b', 'c', )), None)
- self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
- for key, false_value in FALSE_VALUES.items():
- self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
- self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
-
def test_merge_dicts(self):
self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
@@ -885,6 +934,111 @@ class TestUtil(unittest.TestCase):
)
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
+ def test_remove_dot_segments(self):
+
+ def remove_dot_segments(p):
+ q = '' if p.startswith('/') else '/'
+ p = 'http://example.com' + q + p
+ p = compat_urlparse.urlsplit(YoutubeDLHandler._fix_path(p)).path
+ return p[1:] if q else p
+
+ self.assertEqual(remove_dot_segments('/a/b/c/./../../g'), '/a/g')
+ self.assertEqual(remove_dot_segments('mid/content=5/../6'), 'mid/6')
+ self.assertEqual(remove_dot_segments('/ad/../cd'), '/cd')
+ self.assertEqual(remove_dot_segments('/ad/../cd/'), '/cd/')
+ self.assertEqual(remove_dot_segments('/..'), '/')
+ self.assertEqual(remove_dot_segments('/./'), '/')
+ self.assertEqual(remove_dot_segments('/./a'), '/a')
+ self.assertEqual(remove_dot_segments('/abc/./.././d/././e/.././f/./../../ghi'), '/ghi')
+ self.assertEqual(remove_dot_segments('/'), '/')
+ self.assertEqual(remove_dot_segments('/t'), '/t')
+ self.assertEqual(remove_dot_segments('t'), 't')
+ self.assertEqual(remove_dot_segments(''), '')
+ self.assertEqual(remove_dot_segments('/../a/b/c'), '/a/b/c')
+ self.assertEqual(remove_dot_segments('../a'), 'a')
+ self.assertEqual(remove_dot_segments('./a'), 'a')
+ self.assertEqual(remove_dot_segments('.'), '')
+ self.assertEqual(remove_dot_segments('////'), '////')
+
+ def test_js_to_json_vars_strings(self):
+ self.assertDictEqual(
+ json.loads(js_to_json(
+ '''{
+ 'null': a,
+ 'nullStr': b,
+ 'true': c,
+ 'trueStr': d,
+ 'false': e,
+ 'falseStr': f,
+ 'unresolvedVar': g,
+ }''',
+ {
+ 'a': 'null',
+ 'b': '"null"',
+ 'c': 'true',
+ 'd': '"true"',
+ 'e': 'false',
+ 'f': '"false"',
+ 'g': 'var',
+ }
+ )),
+ {
+ 'null': None,
+ 'nullStr': 'null',
+ 'true': True,
+ 'trueStr': 'true',
+ 'false': False,
+ 'falseStr': 'false',
+ 'unresolvedVar': 'var'
+ }
+ )
+
+ self.assertDictEqual(
+ json.loads(js_to_json(
+ '''{
+ 'int': a,
+ 'intStr': b,
+ 'float': c,
+ 'floatStr': d,
+ }''',
+ {
+ 'a': '123',
+ 'b': '"123"',
+ 'c': '1.23',
+ 'd': '"1.23"',
+ }
+ )),
+ {
+ 'int': 123,
+ 'intStr': '123',
+ 'float': 1.23,
+ 'floatStr': '1.23',
+ }
+ )
+
+ self.assertDictEqual(
+ json.loads(js_to_json(
+ '''{
+ 'object': a,
+ 'objectStr': b,
+ 'array': c,
+ 'arrayStr': d,
+ }''',
+ {
+ 'a': '{}',
+ 'b': '"{}"',
+ 'c': '[]',
+ 'd': '"[]"',
+ }
+ )),
+ {
+ 'object': {},
+ 'objectStr': '{}',
+ 'array': [],
+ 'arrayStr': '[]',
+ }
+ )
+
def test_js_to_json_realworld(self):
inp = '''{
'clip':{'provider':'pseudo'}
@@ -955,10 +1109,10 @@ class TestUtil(unittest.TestCase):
!42: 42
}''')
self.assertEqual(json.loads(on), {
- 'a': 0,
- 'b': 1,
- 'c': 0,
- 'd': 42.42,
+ 'a': True,
+ 'b': False,
+ 'c': False,
+ 'd': True,
'e': [],
'f': "abc",
'g': "",
@@ -1028,10 +1182,26 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{ "040": "040" }')
self.assertEqual(json.loads(on), {'040': '040'})
+ on = js_to_json('[1,//{},\n2]')
+ self.assertEqual(json.loads(on), [1, 2])
+
+ on = js_to_json(r'"\^\$\#"')
+ self.assertEqual(json.loads(on), R'^$#', msg='Unnecessary escapes should be stripped')
+
+ on = js_to_json('\'"\\""\'')
+ self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
+
def test_js_to_json_malformed(self):
self.assertEqual(js_to_json('42a1'), '42"a1"')
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
+ def test_js_to_json_template_literal(self):
+ self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"')
+ self.assertEqual(js_to_json('`${name}${name}`', {'name': '"X"'}), '"XX"')
+ self.assertEqual(js_to_json('`${name}${name}`', {'name': '5'}), '"55"')
+ self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
+ self.assertEqual(js_to_json('`${name}`', {}), '"name"')
+
def test_extract_attributes(self):
self.assertEqual(extract_attributes(''), {'x': 'y'})
self.assertEqual(extract_attributes(""), {'x': 'y'})
@@ -1475,6 +1645,84 @@ Line 1
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
+ def test_LazyList(self):
+ it = list(range(10))
+
+ self.assertEqual(list(LazyList(it)), it)
+ self.assertEqual(LazyList(it).exhaust(), it)
+ self.assertEqual(LazyList(it)[5], it[5])
+
+ self.assertEqual(LazyList(it)[5:], it[5:])
+ self.assertEqual(LazyList(it)[:5], it[:5])
+ self.assertEqual(LazyList(it)[::2], it[::2])
+ self.assertEqual(LazyList(it)[1::2], it[1::2])
+ self.assertEqual(LazyList(it)[5::-1], it[5::-1])
+ self.assertEqual(LazyList(it)[6:2:-2], it[6:2:-2])
+ self.assertEqual(LazyList(it)[::-1], it[::-1])
+
+ self.assertTrue(LazyList(it))
+ self.assertFalse(LazyList(range(0)))
+ self.assertEqual(len(LazyList(it)), len(it))
+ self.assertEqual(repr(LazyList(it)), repr(it))
+ self.assertEqual(compat_str(LazyList(it)), compat_str(it))
+
+ self.assertEqual(list(LazyList(it, reverse=True)), it[::-1])
+ self.assertEqual(list(reversed(LazyList(it))[::-1]), it)
+ self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7])
+
+ def test_LazyList_laziness(self):
+
+ def test(ll, idx, val, cache):
+ self.assertEqual(ll[idx], val)
+ self.assertEqual(ll._cache, list(cache))
+
+ ll = LazyList(range(10))
+ test(ll, 0, 0, range(1))
+ test(ll, 5, 5, range(6))
+ test(ll, -3, 7, range(10))
+
+ ll = LazyList(range(10), reverse=True)
+ test(ll, -1, 0, range(1))
+ test(ll, 3, 6, range(10))
+
+ ll = LazyList(itertools.count())
+ test(ll, 10, 10, range(11))
+ ll = reversed(ll)
+ test(ll, -15, 14, range(15))
+
+ def test_try_call(self):
+ def total(*x, **kwargs):
+ return sum(x) + sum(kwargs.values())
+
+ self.assertEqual(try_call(None), None,
+ msg='not a fn should give None')
+ self.assertEqual(try_call(lambda: 1), 1,
+ msg='int fn with no expected_type should give int')
+ self.assertEqual(try_call(lambda: 1, expected_type=int), 1,
+ msg='int fn with expected_type int should give int')
+ self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
+ msg='int fn with wrong expected_type should give None')
+ self.assertEqual(try_call(total, args=(0, 1, 0, ), expected_type=int), 1,
+ msg='fn should accept arglist')
+ self.assertEqual(try_call(total, kwargs={'a': 0, 'b': 1, 'c': 0}, expected_type=int), 1,
+ msg='fn should accept kwargs')
+ self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
+ msg='int fn with no expected_type should give None')
+ self.assertEqual(try_call(lambda x: {}, total, args=(42, ), expected_type=int), 42,
+ msg='expect first int result with expected_type int')
+
+ def test_variadic(self):
+ self.assertEqual(variadic(None), (None, ))
+ self.assertEqual(variadic('spam'), ('spam', ))
+ self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
+ self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
+
+ def test_join_nonempty(self):
+ self.assertEqual(join_nonempty('a', 'b'), 'a-b')
+ self.assertEqual(join_nonempty(
+ 'a', 'b', 'c', 'd',
+ from_dict={'a': 'c', 'c': [], 'b': 'd', 'd': None}), 'c-d')
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py
index 41abdfe3b..68e0a391d 100644
--- a/test/test_write_annotations.py
+++ b/test/test_write_annotations.py
@@ -11,12 +11,11 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params, try_rm
-import io
-
import xml.etree.ElementTree
import youtube_dl.YoutubeDL
import youtube_dl.extractor
+from youtube_dl.compat import compat_open as open
class YoutubeDL(youtube_dl.YoutubeDL):
@@ -51,7 +50,7 @@ class TestAnnotations(unittest.TestCase):
ydl.download([TEST_ID])
self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
annoxml = None
- with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
+ with open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
annoxml = xml.etree.ElementTree.parse(annof)
self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
root = annoxml.getroot()
diff --git a/test/test_youtube_chapters.py b/test/test_youtube_chapters.py
deleted file mode 100644
index e69c57377..000000000
--- a/test/test_youtube_chapters.py
+++ /dev/null
@@ -1,275 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-from __future__ import unicode_literals
-
-# Allow direct execution
-import os
-import sys
-import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from test.helper import expect_value
-from youtube_dl.extractor import YoutubeIE
-
-
-class TestYoutubeChapters(unittest.TestCase):
-
- _TEST_CASES = [
- (
- # https://www.youtube.com/watch?v=A22oy8dFjqc
- # pattern: 00:00 -
- '''This is the absolute ULTIMATE experience of Queen's set at LIVE AID, this is the best video mixed to the absolutely superior stereo radio broadcast. This vastly superior audio mix takes a huge dump on all of the official mixes. Best viewed in 1080p. ENJOY! ***MAKE SURE TO READ THE DESCRIPTION***
00:36 - Bohemian Rhapsody
02:42 - Radio Ga Ga
06:53 - Ay Oh!
07:34 - Hammer To Fall
12:08 - Crazy Little Thing Called Love
16:03 - We Will Rock You
17:18 - We Are The Champions
21:12 - Is This The World We Created...?
Short song analysis:
- "Bohemian Rhapsody": Although it's a short medley version, it's one of the best performances of the ballad section, with Freddie nailing the Bb4s with the correct studio phrasing (for the first time ever!).
- "Radio Ga Ga": Although it's missing one chorus, this is one of - if not the best - the best versions ever, Freddie nails all the Bb4s and sounds very clean! Spike Edney's Roland Jupiter 8 also really shines through on this mix, compared to the DVD releases!
- "Audience Improv": A great improv, Freddie sounds strong and confident. You gotta love when he sustains that A4 for 4 seconds!
- "Hammer To Fall": Despite missing a verse and a chorus, it's a strong version (possibly the best ever). Freddie sings the song amazingly, and even ad-libs a C#5 and a C5! Also notice how heavy Brian's guitar sounds compared to the thin DVD mixes - it roars!
- "Crazy Little Thing Called Love": A great version, the crowd loves the song, the jam is great as well! Only downside to this is the slight feedback issues.
- "We Will Rock You": Although cut down to the 1st verse and chorus, Freddie sounds strong. He nails the A4, and the solo from Dr. May is brilliant!
- "We Are the Champions": Perhaps the high-light of the performance - Freddie is very daring on this version, he sustains the pre-chorus Bb4s, nails the 1st C5, belts great A4s, but most importantly: He nails the chorus Bb4s, in all 3 choruses! This is the only time he has ever done so! It has to be said though, the last one sounds a bit rough, but that's a side effect of belting high notes for the past 18 minutes, with nodules AND laryngitis!
- "Is This The World We Created... ?": Freddie and Brian perform a beautiful version of this, and it is one of the best versions ever. It's both sad and hilarious that a couple of BBC engineers are talking over the song, one of them being completely oblivious of the fact that he is interrupting the performance, on live television... Which was being televised to almost 2 billion homes.
All rights go to their respective owners!
-----Copyright Disclaimer Under Section 107 of the Copyright Act 1976, allowance is made for fair use for purposes such as criticism, comment, news reporting, teaching, scholarship, and research. Fair use is a use permitted by copyright statute that might otherwise be infringing. Non-profit, educational or personal use tips the balance in favor of fair use''',
- 1477,
- [{
- 'start_time': 36,
- 'end_time': 162,
- 'title': 'Bohemian Rhapsody',
- }, {
- 'start_time': 162,
- 'end_time': 413,
- 'title': 'Radio Ga Ga',
- }, {
- 'start_time': 413,
- 'end_time': 454,
- 'title': 'Ay Oh!',
- }, {
- 'start_time': 454,
- 'end_time': 728,
- 'title': 'Hammer To Fall',
- }, {
- 'start_time': 728,
- 'end_time': 963,
- 'title': 'Crazy Little Thing Called Love',
- }, {
- 'start_time': 963,
- 'end_time': 1038,
- 'title': 'We Will Rock You',
- }, {
- 'start_time': 1038,
- 'end_time': 1272,
- 'title': 'We Are The Champions',
- }, {
- 'start_time': 1272,
- 'end_time': 1477,
- 'title': 'Is This The World We Created...?',
- }]
- ),
- (
- # https://www.youtube.com/watch?v=ekYlRhALiRQ
- # pattern: . 0:00
- '1. Those Beaten Paths of Confusion 0:00
2. Beyond the Shadows of Emptiness & Nothingness 11:47
3. Poison Yourself...With Thought 26:30
4. The Agents of Transformation 35:57
5. Drowning in the Pain of Consciousness 44:32
6. Deny the Disease of Life 53:07
More info/Buy: http://crepusculonegro.storenvy.com/products/257645-cn-03-arizmenda-within-the-vacuum-of-infinity
No copyright is intended. The rights to this video are assumed by the owner and its affiliates.',
- 4009,
- [{
- 'start_time': 0,
- 'end_time': 707,
- 'title': '1. Those Beaten Paths of Confusion',
- }, {
- 'start_time': 707,
- 'end_time': 1590,
- 'title': '2. Beyond the Shadows of Emptiness & Nothingness',
- }, {
- 'start_time': 1590,
- 'end_time': 2157,
- 'title': '3. Poison Yourself...With Thought',
- }, {
- 'start_time': 2157,
- 'end_time': 2672,
- 'title': '4. The Agents of Transformation',
- }, {
- 'start_time': 2672,
- 'end_time': 3187,
- 'title': '5. Drowning in the Pain of Consciousness',
- }, {
- 'start_time': 3187,
- 'end_time': 4009,
- 'title': '6. Deny the Disease of Life',
- }]
- ),
- (
- # https://www.youtube.com/watch?v=WjL4pSzog9w
- # pattern: 00:00
- 'https://arizmenda.bandcamp.com/merch/...
00:00 Christening Unborn Deformities
07:08 Taste of Purity
16:16 Sculpting Sins of a Universal Tongue
24:45 Birth
31:24 Neves
37:55 Libations in Limbo',
- 2705,
- [{
- 'start_time': 0,
- 'end_time': 428,
- 'title': 'Christening Unborn Deformities',
- }, {
- 'start_time': 428,
- 'end_time': 976,
- 'title': 'Taste of Purity',
- }, {
- 'start_time': 976,
- 'end_time': 1485,
- 'title': 'Sculpting Sins of a Universal Tongue',
- }, {
- 'start_time': 1485,
- 'end_time': 1884,
- 'title': 'Birth',
- }, {
- 'start_time': 1884,
- 'end_time': 2275,
- 'title': 'Neves',
- }, {
- 'start_time': 2275,
- 'end_time': 2705,
- 'title': 'Libations in Limbo',
- }]
- ),
- (
- # https://www.youtube.com/watch?v=o3r1sn-t3is
- # pattern: 00:00
- 'Download this show in MP3: http://sh.st/njZKK
Setlist:
I-E-A-I-A-I-O 00:45
Suite-Pee 4:26 (Incomplete)
Attack 5:31 (First live performance since 2011)
Prison Song 8:42
Know 12:32 (First live performance since 2011)
Aerials 15:32
Soldier Side - Intro 19:13
B.Y.O.B. 20:09
Soil 24:32
Darts 27:48
Radio/Video 30:38
Hypnotize 35:05
Temper 38:08 (First live performance since 1999)
CUBErt 41:00
Needles 42:57
Deer Dance 46:27
Bounce 49:38
Suggestions 51:25
Psycho 53:52
Chop Suey! 58:13
Lonely Day 1:01:15
Question! 1:04:14
Lost in Hollywood 1:08:10
Vicinity of Obscenity 1:13:40(First live performance since 2012)
Forest 1:16:17
Cigaro 1:20:02
Toxicity 1:23:57(with Chino Moreno)
Sugar 1:27:53',
- 5640,
- [{
- 'start_time': 45,
- 'end_time': 266,
- 'title': 'I-E-A-I-A-I-O',
- }, {
- 'start_time': 266,
- 'end_time': 331,
- 'title': 'Suite-Pee (Incomplete)',
- }, {
- 'start_time': 331,
- 'end_time': 522,
- 'title': 'Attack (First live performance since 2011)',
- }, {
- 'start_time': 522,
- 'end_time': 752,
- 'title': 'Prison Song',
- }, {
- 'start_time': 752,
- 'end_time': 932,
- 'title': 'Know (First live performance since 2011)',
- }, {
- 'start_time': 932,
- 'end_time': 1153,
- 'title': 'Aerials',
- }, {
- 'start_time': 1153,
- 'end_time': 1209,
- 'title': 'Soldier Side - Intro',
- }, {
- 'start_time': 1209,
- 'end_time': 1472,
- 'title': 'B.Y.O.B.',
- }, {
- 'start_time': 1472,
- 'end_time': 1668,
- 'title': 'Soil',
- }, {
- 'start_time': 1668,
- 'end_time': 1838,
- 'title': 'Darts',
- }, {
- 'start_time': 1838,
- 'end_time': 2105,
- 'title': 'Radio/Video',
- }, {
- 'start_time': 2105,
- 'end_time': 2288,
- 'title': 'Hypnotize',
- }, {
- 'start_time': 2288,
- 'end_time': 2460,
- 'title': 'Temper (First live performance since 1999)',
- }, {
- 'start_time': 2460,
- 'end_time': 2577,
- 'title': 'CUBErt',
- }, {
- 'start_time': 2577,
- 'end_time': 2787,
- 'title': 'Needles',
- }, {
- 'start_time': 2787,
- 'end_time': 2978,
- 'title': 'Deer Dance',
- }, {
- 'start_time': 2978,
- 'end_time': 3085,
- 'title': 'Bounce',
- }, {
- 'start_time': 3085,
- 'end_time': 3232,
- 'title': 'Suggestions',
- }, {
- 'start_time': 3232,
- 'end_time': 3493,
- 'title': 'Psycho',
- }, {
- 'start_time': 3493,
- 'end_time': 3675,
- 'title': 'Chop Suey!',
- }, {
- 'start_time': 3675,
- 'end_time': 3854,
- 'title': 'Lonely Day',
- }, {
- 'start_time': 3854,
- 'end_time': 4090,
- 'title': 'Question!',
- }, {
- 'start_time': 4090,
- 'end_time': 4420,
- 'title': 'Lost in Hollywood',
- }, {
- 'start_time': 4420,
- 'end_time': 4577,
- 'title': 'Vicinity of Obscenity (First live performance since 2012)',
- }, {
- 'start_time': 4577,
- 'end_time': 4802,
- 'title': 'Forest',
- }, {
- 'start_time': 4802,
- 'end_time': 5037,
- 'title': 'Cigaro',
- }, {
- 'start_time': 5037,
- 'end_time': 5273,
- 'title': 'Toxicity (with Chino Moreno)',
- }, {
- 'start_time': 5273,
- 'end_time': 5640,
- 'title': 'Sugar',
- }]
- ),
- (
- # https://www.youtube.com/watch?v=PkYLQbsqCE8
- # pattern: - [] 0:00:00
- '''Затемно (Zatemno) is an Obscure Black Metal Band from Russia.
"Во прах (Vo prakh)'' Into The Ashes", Debut mini-album released may 6, 2016, by Death Knell Productions
Released on 6 panel digipak CD, limited to 100 copies only
And digital format on Bandcamp
Tracklist
1 - Во прах [Vo prakh] 0:00:00
2 - Искупление [Iskupleniye] 0:08:10
3 - Из серпов луны...[Iz serpov luny] 0:14:30
Links:
https://deathknellprod.bandcamp.com/a...
https://www.facebook.com/DeathKnellProd/
I don't have any right about this artifact, my only intention is to spread the music of the band, all rights are reserved to the Затемно (Zatemno) and his producers, Death Knell Productions.
------------------------------------------------------------------
Subscribe for more videos like this.
My link: https://web.facebook.com/AttackOfTheD...''',
- 1138,
- [{
- 'start_time': 0,
- 'end_time': 490,
- 'title': '1 - Во прах [Vo prakh]',
- }, {
- 'start_time': 490,
- 'end_time': 870,
- 'title': '2 - Искупление [Iskupleniye]',
- }, {
- 'start_time': 870,
- 'end_time': 1138,
- 'title': '3 - Из серпов луны...[Iz serpov luny]',
- }]
- ),
- (
- # https://www.youtube.com/watch?v=xZW70zEasOk
- # time point more than duration
- '''● LCS Spring finals: Saturday and Sunday from 13:30 outside the venue!
● PAX East: Fri, Sat & Sun - more info in tomorrows video on the main channel!''',
- 283,
- []
- ),
- ]
-
- def test_youtube_chapters(self):
- for description, duration, expected_chapters in self._TEST_CASES:
- ie = YoutubeIE()
- expect_value(
- self, ie._extract_chapters_from_description(description, duration),
- expected_chapters, None)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index c4f0abbea..e0e8891ba 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# -*- coding: utf-8 -*-
from __future__ import unicode_literals
# Allow direct execution
@@ -9,10 +10,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
-
from youtube_dl.extractor import (
- YoutubePlaylistIE,
YoutubeIE,
+ YoutubePlaylistIE,
+ YoutubeTabIE,
)
@@ -24,47 +25,40 @@ class TestYoutubeLists(unittest.TestCase):
def test_youtube_playlist_noplaylist(self):
dl = FakeYDL()
dl.params['noplaylist'] = True
+ dl.params['format'] = 'best'
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
self.assertEqual(result['_type'], 'url')
+ result = dl.extract_info(result['url'], download=False, ie_key=result.get('ie_key'), process=False)
self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
- def test_youtube_course(self):
- dl = FakeYDL()
- ie = YoutubePlaylistIE(dl)
- # TODO find a > 100 (paginating?) videos course
- result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
- entries = list(result['entries'])
- self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
- self.assertEqual(len(entries), 25)
- self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
-
def test_youtube_mix(self):
dl = FakeYDL()
- ie = YoutubePlaylistIE(dl)
- result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')
- entries = result['entries']
- self.assertTrue(len(entries) >= 50)
+ dl.params['format'] = 'best'
+ ie = YoutubeTabIE(dl)
+ result = dl.extract_info('https://www.youtube.com/watch?v=tyITL_exICo&list=RDCLAK5uy_kLWIr9gv1XLlPbaDS965-Db4TrBoUTxQ8',
+ download=False, ie_key=ie.ie_key(), process=True)
+ entries = (result or {}).get('entries', [{'id': 'not_found', }])
+ self.assertTrue(len(entries) >= 25)
original_video = entries[0]
- self.assertEqual(original_video['id'], 'OQpdSVF_k_w')
+ self.assertEqual(original_video['id'], 'tyITL_exICo')
- def test_youtube_toptracks(self):
- print('Skipping: The playlist page gives error 500')
- return
- dl = FakeYDL()
- ie = YoutubePlaylistIE(dl)
- result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
- entries = result['entries']
- self.assertEqual(len(entries), 100)
-
- def test_youtube_flat_playlist_titles(self):
+ def test_youtube_flat_playlist_extraction(self):
dl = FakeYDL()
dl.params['extract_flat'] = True
- ie = YoutubePlaylistIE(dl)
- result = ie.extract('https://www.youtube.com/playlist?list=PL-KKIb8rvtMSrAO9YFbeM6UQrAqoFTUWv')
+ ie = YoutubeTabIE(dl)
+ result = ie.extract('https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc')
self.assertIsPlaylist(result)
- for entry in result['entries']:
- self.assertTrue(entry.get('title'))
+ entries = list(result['entries'])
+ self.assertTrue(len(entries) == 1)
+ video = entries[0]
+ self.assertEqual(video['_type'], 'url')
+ self.assertEqual(video['ie_key'], 'Youtube')
+ self.assertEqual(video['id'], 'BaW_jenozKc')
+ self.assertEqual(video['url'], 'BaW_jenozKc')
+ self.assertEqual(video['title'], 'youtube-dl test video "\'/\\ä↭𝕐')
+ self.assertEqual(video['duration'], 10)
+ self.assertEqual(video['uploader'], 'Philipp Hagemeister')
if __name__ == '__main__':
diff --git a/test/test_youtube_misc.py b/test/test_youtube_misc.py
new file mode 100644
index 000000000..e18e71101
--- /dev/null
+++ b/test/test_youtube_misc.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from youtube_dl.extractor import YoutubeIE
+
+
+class TestYoutubeMisc(unittest.TestCase):
+ def test_youtube_extract(self):
+ assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
+ assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
+ assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
+ assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
+ assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
+ assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
+ assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 69df30eda..56e92fac5 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -8,76 +8,192 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-import io
import re
import string
+from youtube_dl.compat import (
+ compat_open as open,
+ compat_str,
+ compat_urlretrieve,
+)
+
from test.helper import FakeYDL
from youtube_dl.extractor import YoutubeIE
-from youtube_dl.compat import compat_str, compat_urlretrieve
+from youtube_dl.jsinterp import JSInterpreter
-_TESTS = [
+_SIG_TESTS = [
(
'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
- 'js',
86,
'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
),
(
'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
- 'js',
85,
'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
),
(
'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
- 'js',
90,
']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
),
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
- 'js',
84,
'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
),
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
- 'js',
'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
),
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
- 'js',
84,
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
),
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
- 'js',
83,
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
),
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
- 'js',
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
),
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
- 'js',
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
)
]
+_NSIG_TESTS = [
+ (
+ 'https://www.youtube.com/s/player/7862ca1f/player_ias.vflset/en_US/base.js',
+ 'X_LCxVDjAavgE5t', 'yxJ1dM6iz5ogUg',
+ ),
+ (
+ 'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js',
+ 'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w',
+ ),
+ (
+ 'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js',
+ 'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN',
+ ),
+ (
+ 'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js',
+ 'oBo2h5euWy6osrUt', '3DIBbn3qdQ',
+ ),
+ (
+ 'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js',
+ 'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q',
+ ),
+ (
+ 'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js',
+ 'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
+ 'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js',
+ 'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA',
+ ),
+ (
+ 'https://www.youtube.com/s/player/324f67b9/player_ias.vflset/en_US/base.js',
+ 'xdftNy7dh9QGnhW', '22qLGxrmX8F1rA',
+ ),
+ (
+ 'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js',
+ 'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js',
+ 'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg',
+ ),
+ (
+ 'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
+ 'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js',
+ '5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
+ '5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
+ ),
+ (
+ 'https://www.youtube.com/s/player/c2199353/player_ias.vflset/en_US/base.js',
+ '5EHDMgYLV6HPGk_Mu-kk', 'AD5rgS85EkrE7',
+ ),
+ (
+ 'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js',
+ 'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg',
+ ),
+ (
+ 'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
+ '-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg',
+ ),
+ (
+ 'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
+ 'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
+ ),
+ (
+ 'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
+ 'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
+ ),
+ (
+ 'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js',
+ 'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
+ 'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
+ ),
+ (
+ 'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
+ '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
+ ),
+ (
+ 'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
+ '1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
+ ),
+ (
+ 'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
+ 'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
+ ),
+ (
+ 'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js',
+ 'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl',
+ ),
+ (
+ 'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
+ '7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
+ ),
+ (
+ 'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
+ '-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
+ ),
+ (
+ 'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
+ 'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
+ ),
+]
+
class TestPlayerInfo(unittest.TestCase):
def test_youtube_extract_player_info(self):
PLAYER_URLS = (
+ ('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'),
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
+ ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'),
+ ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
+ ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'),
+ ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'),
# obsolete
('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
@@ -86,59 +202,70 @@ class TestPlayerInfo(unittest.TestCase):
('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'),
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
- ('http://s.ytimg.com/yt/swfbin/watch_as3-vflrEm9Nq.swf', 'vflrEm9Nq'),
- ('https://s.ytimg.com/yts/swfbin/player-vflenCdZL/watch_as3.swf', 'vflenCdZL'),
)
for player_url, expected_player_id in PLAYER_URLS:
- expected_player_type = player_url.split('.')[-1]
- player_type, player_id = YoutubeIE._extract_player_info(player_url)
- self.assertEqual(player_type, expected_player_type)
+ player_id = YoutubeIE._extract_player_info(player_url)
self.assertEqual(player_id, expected_player_id)
class TestSignature(unittest.TestCase):
def setUp(self):
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
- self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
+ self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs')
if not os.path.exists(self.TESTDATA_DIR):
os.mkdir(self.TESTDATA_DIR)
+ def tearDown(self):
+ try:
+ for f in os.listdir(self.TESTDATA_DIR):
+ os.remove(f)
+ except OSError:
+ pass
-def make_tfunc(url, stype, sig_input, expected_sig):
- m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url)
- assert m, '%r should follow URL format' % url
- test_id = m.group(1)
- def test_func(self):
- basename = 'player-%s.%s' % (test_id, stype)
- fn = os.path.join(self.TESTDATA_DIR, basename)
+def t_factory(name, sig_func, url_pattern):
+ def make_tfunc(url, sig_input, expected_sig):
+ m = url_pattern.match(url)
+ assert m, '%r should follow URL format' % url
+ test_id = m.group('id')
- if not os.path.exists(fn):
- compat_urlretrieve(url, fn)
+ def test_func(self):
+ basename = 'player-{0}-{1}.js'.format(name, test_id)
+ fn = os.path.join(self.TESTDATA_DIR, basename)
- ydl = FakeYDL()
- ie = YoutubeIE(ydl)
- if stype == 'js':
- with io.open(fn, encoding='utf-8') as testf:
+ if not os.path.exists(fn):
+ compat_urlretrieve(url, fn)
+ with open(fn, encoding='utf-8') as testf:
jscode = testf.read()
- func = ie._parse_sig_js(jscode)
- else:
- assert stype == 'swf'
- with open(fn, 'rb') as testf:
- swfcode = testf.read()
- func = ie._parse_sig_swf(swfcode)
- src_sig = (
- compat_str(string.printable[:sig_input])
- if isinstance(sig_input, int) else sig_input)
- got_sig = func(src_sig)
- self.assertEqual(got_sig, expected_sig)
+ self.assertEqual(sig_func(jscode, sig_input), expected_sig)
- test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
- setattr(TestSignature, test_func.__name__, test_func)
+ test_func.__name__ = str('test_{0}_js_{1}'.format(name, test_id))
+ setattr(TestSignature, test_func.__name__, test_func)
+ return make_tfunc
-for test_spec in _TESTS:
- make_tfunc(*test_spec)
+def signature(jscode, sig_input):
+ func = YoutubeIE(FakeYDL())._parse_sig_js(jscode)
+ src_sig = (
+ compat_str(string.printable[:sig_input])
+ if isinstance(sig_input, int) else sig_input)
+ return func(src_sig)
+
+
+def n_sig(jscode, sig_input):
+ funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
+ return JSInterpreter(jscode).call_function(funcname, sig_input)
+
+
+make_sig_test = t_factory(
+ 'signature', signature, re.compile(r'.*-(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
+for test_spec in _SIG_TESTS:
+ make_sig_test(*test_spec)
+
+make_nsig_test = t_factory(
+ 'nsig', n_sig, re.compile(r'.+/player/(?P[a-zA-Z0-9_-]+)/.+.js$'))
+for test_spec in _NSIG_TESTS:
+ make_nsig_test(*test_spec)
if __name__ == '__main__':
diff --git a/test/testdata/mpd/range_only.mpd b/test/testdata/mpd/range_only.mpd
new file mode 100644
index 000000000..e0c2152d1
--- /dev/null
+++ b/test/testdata/mpd/range_only.mpd
@@ -0,0 +1,35 @@
+
+
+
+
+ manifest.mpd generated by GPAC
+
+
+
+
+
+ video_dashinit.mp4
+
+
+
+
+
+
+
+
+
+
+
+
+ audio_dashinit.mp4
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/testdata/mpd/subtitles.mpd b/test/testdata/mpd/subtitles.mpd
new file mode 100644
index 000000000..6f948adba
--- /dev/null
+++ b/test/testdata/mpd/subtitles.mpd
@@ -0,0 +1,351 @@
+
+
+
+
+ dash/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/testdata/mpd/url_and_range.mpd b/test/testdata/mpd/url_and_range.mpd
new file mode 100644
index 000000000..b8c68aad2
--- /dev/null
+++ b/test/testdata/mpd/url_and_range.mpd
@@ -0,0 +1,32 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index efd42fa63..9e5620eef 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -4,11 +4,10 @@
from __future__ import absolute_import, unicode_literals
import collections
-import contextlib
import copy
import datetime
import errno
-import fileinput
+import functools
import io
import itertools
import json
@@ -26,25 +25,39 @@ import tokenize
import traceback
import random
+try:
+ from ssl import OPENSSL_VERSION
+except ImportError:
+ # Must be Python 2.6, should be built against 1.0.2
+ OPENSSL_VERSION = 'OpenSSL 1.0.2(?)'
from string import ascii_letters
from .compat import (
compat_basestring,
- compat_cookiejar,
+ compat_collections_chain_map as ChainMap,
+ compat_filter as filter,
compat_get_terminal_size,
compat_http_client,
+ compat_http_cookiejar_Cookie,
+ compat_http_cookies_SimpleCookie,
+ compat_integer_types,
compat_kwargs,
+ compat_map as map,
compat_numeric_types,
+ compat_open as open,
compat_os_name,
compat_str,
compat_tokenize_tokenize,
compat_urllib_error,
+ compat_urllib_parse,
compat_urllib_request,
compat_urllib_request_DataHandler,
)
from .utils import (
+ _UnsafeExtensionError,
age_restricted,
args_to_str,
+ bug_reports_message,
ContentTooShortError,
date_from_str,
DateRange,
@@ -62,7 +75,9 @@ from .utils import (
GeoRestrictedError,
int_or_none,
ISO3166Utils,
+ join_nonempty,
locked_file,
+ LazyList,
make_HTTPS_handler,
MaxDownloadsReached,
orderedSet,
@@ -73,6 +88,7 @@ from .utils import (
PostProcessingError,
preferredencoding,
prepend_extension,
+ process_communicate_or_kill,
register_socks_protocols,
render_table,
replace_extension,
@@ -84,6 +100,7 @@ from .utils import (
std_headers,
str_or_none,
subtitles_filename,
+ traverse_obj,
UnavailableVideoError,
url_basename,
version_tuple,
@@ -93,6 +110,7 @@ from .utils import (
YoutubeDLCookieProcessor,
YoutubeDLHandler,
YoutubeDLRedirectHandler,
+ ytdl_is_updateable,
)
from .cache import Cache
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
@@ -113,6 +131,20 @@ if compat_os_name == 'nt':
import ctypes
+def _catch_unsafe_file_extension(func):
+ @functools.wraps(func)
+ def wrapper(self, *args, **kwargs):
+ try:
+ return func(self, *args, **kwargs)
+ except _UnsafeExtensionError as error:
+ self.report_error(
+ '{0} found; to avoid damaging your system, this value is disallowed.'
+ ' If you believe this is an error{1}'.format(
+ error_to_compat_str(error), bug_reports_message(',')))
+
+ return wrapper
+
+
class YoutubeDL(object):
"""YoutubeDL class.
@@ -163,6 +195,7 @@ class YoutubeDL(object):
simulate: Do not download the video files.
format: Video format code. See options.py for more information.
outtmpl: Template for output names.
+ outtmpl_na_placeholder: Placeholder for unavailable meta fields.
restrictfilenames: Do not allow "&" and spaces in file names
ignoreerrors: Do not stop on download errors.
force_generic_extractor: Force downloader to use the generic extractor
@@ -361,6 +394,9 @@ class YoutubeDL(object):
self.params.update(params)
self.cache = Cache(self)
+ self._header_cookies = []
+ self._load_cookies_from_headers(self.params.get('http_headers'))
+
def check_deprecated(param, option, suggestion):
if self.params.get(param) is not None:
self.report_warning(
@@ -567,7 +603,7 @@ class YoutubeDL(object):
if self.params.get('cookiefile') is not None:
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
- def trouble(self, message=None, tb=None):
+ def trouble(self, *args, **kwargs):
"""Determine action to take when a download problem appears.
Depending on if the downloader has been configured to ignore
@@ -576,6 +612,11 @@ class YoutubeDL(object):
tb, if given, is additional traceback information.
"""
+ # message=None, tb=None, is_error=True
+ message = args[0] if len(args) > 0 else kwargs.get('message', None)
+ tb = args[1] if len(args) > 1 else kwargs.get('tb', None)
+ is_error = args[2] if len(args) > 2 else kwargs.get('is_error', True)
+
if message is not None:
self.to_stderr(message)
if self.params.get('verbose'):
@@ -588,7 +629,10 @@ class YoutubeDL(object):
else:
tb_data = traceback.format_list(traceback.extract_stack())
tb = ''.join(tb_data)
- self.to_stderr(tb)
+ if tb:
+ self.to_stderr(tb)
+ if not is_error:
+ return
if not self.params.get('ignoreerrors', False):
if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
exc_info = sys.exc_info()[1].exc_info
@@ -597,11 +641,18 @@ class YoutubeDL(object):
raise DownloadError(message, exc_info)
self._download_retcode = 1
- def report_warning(self, message):
+ def report_warning(self, message, only_once=False, _cache={}):
'''
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
'''
+ if only_once:
+ m_hash = hash((self, message))
+ m_cnt = _cache.setdefault(m_hash, 0)
+ _cache[m_hash] = m_cnt + 1
+ if m_cnt > 0:
+ return
+
if self.params.get('logger') is not None:
self.params['logger'].warning(message)
else:
@@ -614,7 +665,7 @@ class YoutubeDL(object):
warning_message = '%s %s' % (_msg_header, message)
self.to_stderr(warning_message)
- def report_error(self, message, tb=None):
+ def report_error(self, message, *args, **kwargs):
'''
Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file.
@@ -623,8 +674,18 @@ class YoutubeDL(object):
_msg_header = '\033[0;31mERROR:\033[0m'
else:
_msg_header = 'ERROR:'
- error_message = '%s %s' % (_msg_header, message)
- self.trouble(error_message, tb)
+ kwargs['message'] = '%s %s' % (_msg_header, message)
+ self.trouble(*args, **kwargs)
+
+ def report_unscoped_cookies(self, *args, **kwargs):
+ # message=None, tb=False, is_error=False
+ if len(args) <= 2:
+ kwargs.setdefault('is_error', False)
+ if len(args) <= 0:
+ kwargs.setdefault(
+ 'message',
+ 'Unscoped cookies are not allowed: please specify some sort of scoping')
+ self.report_error(*args, **kwargs)
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
@@ -658,7 +719,7 @@ class YoutubeDL(object):
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
for k, v in template_dict.items()
if v is not None and not isinstance(v, (list, tuple, dict)))
- template_dict = collections.defaultdict(lambda: 'NA', template_dict)
+ template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
@@ -678,8 +739,8 @@ class YoutubeDL(object):
# Missing numeric fields used together with integer presentation types
# in format specification will break the argument substitution since
- # string 'NA' is returned for missing fields. We will patch output
- # template for missing fields to meet string presentation type.
+ # string NA placeholder is returned for missing fields. We will patch
+ # output template for missing fields to meet string presentation type.
for numeric_field in self._NUMERIC_FIELDS:
if numeric_field not in template_dict:
# As of [1] format syntax is:
@@ -719,7 +780,7 @@ class YoutubeDL(object):
filename = encodeFilename(filename, True).decode(preferredencoding())
return sanitize_path(filename)
except ValueError as err:
- self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
+ self.report_error('Error in output template: ' + error_to_compat_str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None
def _match_entry(self, info_dict, incomplete):
@@ -772,11 +833,20 @@ class YoutubeDL(object):
def extract_info(self, url, download=True, ie_key=None, extra_info={},
process=True, force_generic_extractor=False):
- '''
- Returns a list with a dictionary for each video we find.
- If 'download', also downloads the videos.
- extra_info is a dict containing the extra values to add to each result
- '''
+ """
+ Return a list with a dictionary for each video extracted.
+
+ Arguments:
+ url -- URL to extract
+
+ Keyword arguments:
+ download -- whether to download videos during extraction
+ ie_key -- extractor key hint
+ extra_info -- dictionary containing the extra values to add to each result
+ process -- whether to resolve all unresolved references (URLs, playlist items),
+ must be True for download to work.
+ force_generic_extractor -- force using the generic extractor
+ """
if not ie_key and force_generic_extractor:
ie_key = 'Generic'
@@ -811,7 +881,7 @@ class YoutubeDL(object):
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
self.report_error(msg)
except ExtractorError as e: # An error we somewhat expected
- self.report_error(compat_str(e), e.format_traceback())
+ self.report_error(compat_str(e), tb=e.format_traceback())
except MaxDownloadsReached:
raise
except Exception as e:
@@ -821,8 +891,83 @@ class YoutubeDL(object):
raise
return wrapper
+ def _remove_cookie_header(self, http_headers):
+ """Filters out `Cookie` header from an `http_headers` dict
+ The `Cookie` header is removed to prevent leaks as a result of unscoped cookies.
+ See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
+
+ @param http_headers An `http_headers` dict from which any `Cookie` header
+ should be removed, or None
+ """
+ return dict(filter(lambda pair: pair[0].lower() != 'cookie', (http_headers or {}).items()))
+
+ def _load_cookies(self, data, **kwargs):
+ """Loads cookies from a `Cookie` header
+
+ This tries to work around the security vulnerability of passing cookies to every domain.
+
+ @param data The Cookie header as a string to load the cookies from
+ @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
+ If `True`, save cookies for later to be stored in the jar with a limited scope
+ If a URL, save cookies in the jar with the domain of the URL
+ """
+ # autoscope=True (kw-only)
+ autoscope = kwargs.get('autoscope', True)
+
+ for cookie in compat_http_cookies_SimpleCookie(data).values() if data else []:
+ if autoscope and any(cookie.values()):
+ raise ValueError('Invalid syntax in Cookie Header')
+
+ domain = cookie.get('domain') or ''
+ expiry = cookie.get('expires')
+ if expiry == '': # 0 is valid so we check for `''` explicitly
+ expiry = None
+ prepared_cookie = compat_http_cookiejar_Cookie(
+ cookie.get('version') or 0, cookie.key, cookie.value, None, False,
+ domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
+ bool(cookie.get('secure')), expiry, False, None, None, {})
+
+ if domain:
+ self.cookiejar.set_cookie(prepared_cookie)
+ elif autoscope is True:
+ self.report_warning(
+ 'Passing cookies as a header is a potential security risk; '
+ 'they will be scoped to the domain of the downloaded urls. '
+ 'Please consider loading cookies from a file or browser instead.',
+ only_once=True)
+ self._header_cookies.append(prepared_cookie)
+ elif autoscope:
+ self.report_warning(
+ 'The extractor result contains an unscoped cookie as an HTTP header. '
+ 'If you are specifying an input URL, ' + bug_reports_message(),
+ only_once=True)
+ self._apply_header_cookies(autoscope, [prepared_cookie])
+ else:
+ self.report_unscoped_cookies()
+
+ def _load_cookies_from_headers(self, headers):
+ self._load_cookies(traverse_obj(headers, 'cookie', casesense=False))
+
+ def _apply_header_cookies(self, url, cookies=None):
+ """This method applies stray header cookies to the provided url
+
+ This loads header cookies and scopes them to the domain provided in `url`.
+ While this is not ideal, it helps reduce the risk of them being sent to
+ an unintended destination.
+ """
+ parsed = compat_urllib_parse.urlparse(url)
+ if not parsed.hostname:
+ return
+
+ for cookie in map(copy.copy, cookies or self._header_cookies):
+ cookie.domain = '.' + parsed.hostname
+ self.cookiejar.set_cookie(cookie)
+
@__handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process):
+ # Compat with passing cookies in http headers
+ self._apply_header_cookies(url)
+
ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
return
@@ -848,7 +993,7 @@ class YoutubeDL(object):
def process_ie_result(self, ie_result, download=True, extra_info={}):
"""
- Take the result of the ie(may be modified) and resolve all unresolved
+ Take the result of the ie (may be modified) and resolve all unresolved
references (URLs, playlist items).
It will also download the videos if 'download'.
@@ -910,8 +1055,8 @@ class YoutubeDL(object):
elif result_type in ('playlist', 'multi_video'):
# Protect from infinite recursion due to recursively nested playlists
# (see https://github.com/ytdl-org/youtube-dl/issues/27833)
- webpage_url = ie_result['webpage_url']
- if webpage_url in self._playlist_urls:
+ webpage_url = ie_result.get('webpage_url') # not all pl/mv have this
+ if webpage_url and webpage_url in self._playlist_urls:
self.to_screen(
'[download] Skipping already downloaded playlist: %s'
% ie_result.get('title') or ie_result.get('id'))
@@ -919,6 +1064,10 @@ class YoutubeDL(object):
self._playlist_level += 1
self._playlist_urls.add(webpage_url)
+ new_result = dict((k, v) for k, v in extra_info.items() if k not in ie_result)
+ if new_result:
+ new_result.update(ie_result)
+ ie_result = new_result
try:
return self.__process_playlist(ie_result, download)
finally:
@@ -1375,17 +1524,16 @@ class YoutubeDL(object):
'abr': formats_info[1].get('abr'),
'ext': output_ext,
}
- video_selector, audio_selector = map(_build_selector_function, selector.selector)
def selector_function(ctx):
- for pair in itertools.product(
- video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
+ selector_fn = lambda x: _build_selector_function(x)(ctx)
+ for pair in itertools.product(*map(selector_fn, selector.selector)):
yield _merge(pair)
filters = [self._build_format_filter(f) for f in selector.filters]
def final_selector(ctx):
- ctx_copy = copy.deepcopy(ctx)
+ ctx_copy = dict(ctx)
for _filter in filters:
ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
return selector_function(ctx_copy)
@@ -1420,29 +1568,73 @@ class YoutubeDL(object):
parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
return _build_selector_function(parsed_selector)
- def _calc_headers(self, info_dict):
- res = std_headers.copy()
+ def _calc_headers(self, info_dict, load_cookies=False):
+ if load_cookies: # For --load-info-json
+ # load cookies from http_headers in legacy info.json
+ self._load_cookies(traverse_obj(info_dict, ('http_headers', 'Cookie'), casesense=False),
+ autoscope=info_dict['url'])
+ # load scoped cookies from info.json
+ self._load_cookies(info_dict.get('cookies'), autoscope=False)
- add_headers = info_dict.get('http_headers')
- if add_headers:
- res.update(add_headers)
-
- cookies = self._calc_cookies(info_dict)
+ cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
if cookies:
- res['Cookie'] = cookies
+ # Make a string like name1=val1; attr1=a_val1; ...name2=val2; ...
+ # By convention a cookie name can't be a well-known attribute name
+ # so this syntax is unambiguous and can be parsed by (eg) SimpleCookie
+ encoder = compat_http_cookies_SimpleCookie()
+ values = []
+ attributes = (('Domain', '='), ('Path', '='), ('Secure',), ('Expires', '='), ('Version', '='))
+ attributes = tuple([x[0].lower()] + list(x) for x in attributes)
+ for cookie in cookies:
+ _, value = encoder.value_encode(cookie.value)
+ # Py 2 '' --> '', Py 3 '' --> '""'
+ if value == '':
+ value = '""'
+ values.append('='.join((cookie.name, value)))
+ for attr in attributes:
+ value = getattr(cookie, attr[0], None)
+ if value:
+ values.append('%s%s' % (''.join(attr[1:]), value if len(attr) == 3 else ''))
+ info_dict['cookies'] = '; '.join(values)
+
+ res = std_headers.copy()
+ res.update(info_dict.get('http_headers') or {})
+ res = self._remove_cookie_header(res)
if 'X-Forwarded-For' not in res:
x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
if x_forwarded_for_ip:
res['X-Forwarded-For'] = x_forwarded_for_ip
- return res
+ return res or None
def _calc_cookies(self, info_dict):
pr = sanitized_Request(info_dict['url'])
self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie')
+ def _fill_common_fields(self, info_dict, final=True):
+
+ for ts_key, date_key in (
+ ('timestamp', 'upload_date'),
+ ('release_timestamp', 'release_date'),
+ ):
+ if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
+ # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+ # see http://bugs.python.org/issue1646728)
+ try:
+ upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+ info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
+ except (ValueError, OverflowError, OSError):
+ pass
+
+ # Auto generate title fields corresponding to the *_number fields when missing
+ # in order to always have clean titles. This is very common for TV series.
+ if final:
+ for field in ('chapter', 'season', 'episode'):
+ if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
+ info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+
def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video'
@@ -1510,20 +1702,7 @@ class YoutubeDL(object):
if 'display_id' not in info_dict and 'id' in info_dict:
info_dict['display_id'] = info_dict['id']
- if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
- # Working around out-of-range timestamp values (e.g. negative ones on Windows,
- # see http://bugs.python.org/issue1646728)
- try:
- upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
- info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
- except (ValueError, OverflowError, OSError):
- pass
-
- # Auto generate title fields corresponding to the *_number fields when missing
- # in order to always have clean titles. This is very common for TV series.
- for field in ('chapter', 'season', 'episode'):
- if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
- info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+ self._fill_common_fields(info_dict)
for cc_kind in ('subtitles', 'automatic_captions'):
cc = info_dict.get(cc_kind)
@@ -1555,9 +1734,6 @@ class YoutubeDL(object):
else:
formats = info_dict['formats']
- if not formats:
- raise ExtractorError('No video formats found!')
-
def is_wellformed(f):
url = f.get('url')
if not url:
@@ -1570,7 +1746,10 @@ class YoutubeDL(object):
return True
# Filter out malformed formats for better extraction robustness
- formats = list(filter(is_wellformed, formats))
+ formats = list(filter(is_wellformed, formats or []))
+
+ if not formats:
+ raise ExtractorError('No video formats found!')
formats_dict = {}
@@ -1611,10 +1790,13 @@ class YoutubeDL(object):
format['protocol'] = determine_protocol(format)
# Add HTTP headers, so that external programs can use them from the
# json output
- full_format_info = info_dict.copy()
- full_format_info.update(format)
- format['http_headers'] = self._calc_headers(full_format_info)
- # Remove private housekeeping stuff
+ format['http_headers'] = self._calc_headers(ChainMap(format, info_dict), load_cookies=True)
+
+ # Safeguard against old/insecure infojson when using --load-info-json
+ info_dict['http_headers'] = self._remove_cookie_header(
+ info_dict.get('http_headers') or {}) or None
+
+ # Remove private housekeeping stuff (copied to http_headers in _calc_headers())
if '__x_forwarded_for_ip' in info_dict:
del info_dict['__x_forwarded_for_ip']
@@ -1757,17 +1939,17 @@ class YoutubeDL(object):
self.to_stdout(formatSeconds(info_dict['duration']))
print_mandatory('format')
if self.params.get('forcejson', False):
- self.to_stdout(json.dumps(info_dict))
+ self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
+ @_catch_unsafe_file_extension
def process_info(self, info_dict):
"""Process a single resolved IE result."""
assert info_dict.get('_type', 'video') == 'video'
- max_downloads = self.params.get('max_downloads')
- if max_downloads is not None:
- if self._num_downloads >= int(max_downloads):
- raise MaxDownloadsReached()
+ max_downloads = int_or_none(self.params.get('max_downloads')) or float('inf')
+ if self._num_downloads >= max_downloads:
+ raise MaxDownloadsReached()
# TODO: backward compatibility, to be removed
info_dict['fulltitle'] = info_dict['title']
@@ -1818,7 +2000,7 @@ class YoutubeDL(object):
else:
try:
self.to_screen('[info] Writing video description to: ' + descfn)
- with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+ with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
descfile.write(info_dict['description'])
except (OSError, IOError):
self.report_error('Cannot write description file ' + descfn)
@@ -1833,7 +2015,7 @@ class YoutubeDL(object):
else:
try:
self.to_screen('[info] Writing video annotations to: ' + annofn)
- with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
+ with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
annofile.write(info_dict['annotations'])
except (KeyError, TypeError):
self.report_warning('There are no annotations to write.')
@@ -1860,7 +2042,7 @@ class YoutubeDL(object):
try:
# Use newline='' to prevent conversion of newline characters
# See https://github.com/ytdl-org/youtube-dl/issues/10268
- with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
+ with open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data'])
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
@@ -1869,36 +2051,41 @@ class YoutubeDL(object):
try:
sub_data = ie._request_webpage(
sub_info['url'], info_dict['id'], note=False).read()
- with io.open(encodeFilename(sub_filename), 'wb') as subfile:
+ with open(encodeFilename(sub_filename), 'wb') as subfile:
subfile.write(sub_data)
except (ExtractorError, IOError, OSError, ValueError) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))
continue
- if self.params.get('writeinfojson', False):
- infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
- self.to_screen('[info] Video description metadata is already present')
- else:
- self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
- try:
- write_json_file(self.filter_requested_info(info_dict), infofn)
- except (OSError, IOError):
- self.report_error('Cannot write metadata to JSON file ' + infofn)
- return
+ self._write_info_json(
+ 'video description', info_dict,
+ replace_extension(filename, 'info.json', info_dict.get('ext')))
self._write_thumbnails(info_dict, filename)
if not self.params.get('skip_download', False):
try:
+ def checked_get_suitable_downloader(info_dict, params):
+ ed_args = params.get('external_downloader_args')
+ dler = get_suitable_downloader(info_dict, params)
+ if ed_args and not params.get('external_downloader_args'):
+ # external_downloader_args was cleared because external_downloader was rejected
+ self.report_warning('Requested external downloader cannot be used: '
+ 'ignoring --external-downloader-args.')
+ return dler
+
def dl(name, info):
- fd = get_suitable_downloader(info, self.params)(self, self.params)
+ fd = checked_get_suitable_downloader(info, self.params)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
if self.params.get('verbose'):
self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
- return fd.download(name, info)
+
+ new_info = dict((k, v) for k, v in info.items() if not k.startswith('__p'))
+ new_info['http_headers'] = self._calc_headers(new_info)
+
+ return fd.download(name, new_info)
if info_dict.get('requested_formats') is not None:
downloaded = []
@@ -1927,18 +2114,26 @@ class YoutubeDL(object):
# TODO: Check acodec/vcodec
return False
- filename_real_ext = os.path.splitext(filename)[1][1:]
- filename_wo_ext = (
- os.path.splitext(filename)[0]
- if filename_real_ext == info_dict['ext']
- else filename)
+ exts = [info_dict['ext']]
requested_formats = info_dict['requested_formats']
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
info_dict['ext'] = 'mkv'
self.report_warning(
'Requested formats are incompatible for merge and will be merged into mkv.')
+ exts.append(info_dict['ext'])
+
# Ensure filename always has a correct extension for successful merge
- filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
+ def correct_ext(filename, ext=exts[1]):
+ if filename == '-':
+ return filename
+ f_name, f_real_ext = os.path.splitext(filename)
+ f_real_ext = f_real_ext[1:]
+ filename_wo_ext = f_name if f_real_ext in exts else filename
+ if ext is None:
+ ext = f_real_ext or None
+ return join_nonempty(filename_wo_ext, ext, delim='.')
+
+ filename = correct_ext(filename)
if os.path.exists(encodeFilename(filename)):
self.to_screen(
'[download] %s has already been downloaded and '
@@ -1948,8 +2143,9 @@ class YoutubeDL(object):
new_info = dict(info_dict)
new_info.update(f)
fname = prepend_extension(
- self.prepare_filename(new_info),
- 'f%s' % f['format_id'], new_info['ext'])
+ correct_ext(
+ self.prepare_filename(new_info), new_info['ext']),
+ 'f%s' % (f['format_id'],), new_info['ext'])
if not ensure_dir_exists(fname):
return
downloaded.append(fname)
@@ -2035,9 +2231,12 @@ class YoutubeDL(object):
try:
self.post_process(filename, info_dict)
except (PostProcessingError) as err:
- self.report_error('postprocessing: %s' % str(err))
+ self.report_error('postprocessing: %s' % error_to_compat_str(err))
return
self.record_download_archive(info_dict)
+ # avoid possible nugatory search for further items (PR #26638)
+ if self._num_downloads >= max_downloads:
+ raise MaxDownloadsReached()
def download(self, url_list):
"""Download a given list of URLs."""
@@ -2060,16 +2259,13 @@ class YoutubeDL(object):
raise
else:
if self.params.get('dump_single_json', False):
- self.to_stdout(json.dumps(res))
+ self.to_stdout(json.dumps(self.sanitize_info(res)))
return self._download_retcode
def download_with_info_file(self, info_filename):
- with contextlib.closing(fileinput.FileInput(
- [info_filename], mode='r',
- openhook=fileinput.hook_encoded('utf-8'))) as f:
- # FileInput doesn't have a read method, we can't call json.load
- info = self.filter_requested_info(json.loads('\n'.join(f)))
+ with open(info_filename, encoding='utf-8') as f:
+ info = self.filter_requested_info(json.load(f))
try:
self.process_ie_result(info, download=True)
except DownloadError:
@@ -2082,10 +2278,36 @@ class YoutubeDL(object):
return self._download_retcode
@staticmethod
- def filter_requested_info(info_dict):
- return dict(
- (k, v) for k, v in info_dict.items()
- if k not in ['requested_formats', 'requested_subtitles'])
+ def sanitize_info(info_dict, remove_private_keys=False):
+ ''' Sanitize the infodict for converting to json '''
+ if info_dict is None:
+ return info_dict
+
+ if remove_private_keys:
+ reject = lambda k, v: (v is None
+ or k.startswith('__')
+ or k in ('requested_formats',
+ 'requested_subtitles'))
+ else:
+ reject = lambda k, v: False
+
+ def filter_fn(obj):
+ if isinstance(obj, dict):
+ return dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v))
+ elif isinstance(obj, (list, tuple, set, LazyList)):
+ return list(map(filter_fn, obj))
+ elif obj is None or any(isinstance(obj, c)
+ for c in (compat_integer_types,
+ (compat_str, float, bool))):
+ return obj
+ else:
+ return repr(obj)
+
+ return filter_fn(info_dict)
+
+ @classmethod
+ def filter_requested_info(cls, info_dict):
+ return cls.sanitize_info(info_dict, True)
def post_process(self, filename, ie_info):
"""Run all the postprocessors on the given file."""
@@ -2292,18 +2514,21 @@ class YoutubeDL(object):
self.get_encoding()))
write_string(encoding_str, encoding=None)
- self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
+ writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))
+ writeln_debug('youtube-dl version ', __version__)
if _LAZY_LOADER:
- self._write_string('[debug] Lazy loading extractors enabled' + '\n')
+ writeln_debug('Lazy loading extractors enabled')
+ if ytdl_is_updateable():
+ writeln_debug('Single file build')
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
- out, err = sp.communicate()
+ out, err = process_communicate_or_kill(sp)
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
- self._write_string('[debug] Git HEAD: ' + out + '\n')
+ writeln_debug('Git HEAD: ', out)
except Exception:
try:
sys.exc_clear()
@@ -2316,9 +2541,22 @@ class YoutubeDL(object):
return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
return impl_name
- self._write_string('[debug] Python version %s (%s) - %s\n' % (
- platform.python_version(), python_implementation(),
- platform_name()))
+ def libc_ver():
+ try:
+ return platform.libc_ver()
+ except OSError: # We may not have access to the executable
+ return []
+
+ libc = join_nonempty(*libc_ver(), delim=' ')
+ writeln_debug('Python %s (%s %s %s) - %s - %s%s' % (
+ platform.python_version(),
+ python_implementation(),
+ platform.machine(),
+ platform.architecture()[0],
+ platform_name(),
+ OPENSSL_VERSION,
+ (' - %s' % (libc, )) if libc else ''
+ ))
exe_versions = FFmpegPostProcessor.get_versions(self)
exe_versions['rtmpdump'] = rtmpdump_version()
@@ -2330,17 +2568,17 @@ class YoutubeDL(object):
)
if not exe_str:
exe_str = 'none'
- self._write_string('[debug] exe versions: %s\n' % exe_str)
+ writeln_debug('exe versions: %s' % (exe_str, ))
proxy_map = {}
for handler in self._opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
- self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
+ writeln_debug('Proxy map: ', compat_str(proxy_map))
if self.params.get('call_home', False):
ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
- self._write_string('[debug] Public IP address: %s\n' % ipaddr)
+ writeln_debug('Public IP address: %s' % (ipaddr, ))
latest_version = self.urlopen(
'https://yt-dl.org/latest/version').read().decode('utf-8')
if version_tuple(latest_version) > version_tuple(__version__):
@@ -2357,7 +2595,7 @@ class YoutubeDL(object):
opts_proxy = self.params.get('proxy')
if opts_cookiefile is None:
- self.cookiejar = compat_cookiejar.CookieJar()
+ self.cookiejar = YoutubeDLCookieJar()
else:
opts_cookiefile = expand_path(opts_cookiefile)
self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
@@ -2418,6 +2656,28 @@ class YoutubeDL(object):
encoding = preferredencoding()
return encoding
+ def _write_info_json(self, label, info_dict, infofn, overwrite=None):
+ if not self.params.get('writeinfojson', False):
+ return False
+
+ def msg(fmt, lbl):
+ return fmt % (lbl + ' metadata',)
+
+ if overwrite is None:
+ overwrite = not self.params.get('nooverwrites', False)
+
+ if not overwrite and os.path.exists(encodeFilename(infofn)):
+ self.to_screen(msg('[info] %s is already present', label.title()))
+ return 'exists'
+ else:
+ self.to_screen(msg('[info] Writing %s as JSON to: ', label) + infofn)
+ try:
+ write_json_file(self.filter_requested_info(info_dict), infofn)
+ return True
+ except (OSError, IOError):
+ self.report_error(msg('Cannot write %s to JSON file ', label) + infofn)
+ return
+
def _write_thumbnails(self, info_dict, filename):
if self.params.get('writethumbnail', False):
thumbnails = info_dict.get('thumbnails')
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 9a659fc65..06bdfb689 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -5,7 +5,6 @@ from __future__ import unicode_literals
__license__ = 'Public Domain'
-import codecs
import io
import os
import random
@@ -17,10 +16,12 @@ from .options import (
)
from .compat import (
compat_getpass,
+ compat_register_utf8,
compat_shlex_split,
workaround_optparse_bug9161,
)
from .utils import (
+ _UnsafeExtensionError,
DateRange,
decodeOption,
DEFAULT_OUTTMPL,
@@ -46,10 +47,8 @@ from .YoutubeDL import YoutubeDL
def _real_main(argv=None):
- # Compatibility fixes for Windows
- if sys.platform == 'win32':
- # https://github.com/ytdl-org/youtube-dl/issues/820
- codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
+ # Compatibility fix for Windows
+ compat_register_utf8()
workaround_optparse_bug9161()
@@ -175,6 +174,9 @@ def _real_main(argv=None):
if opts.ap_mso and opts.ap_mso not in MSO_INFO:
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
+ if opts.no_check_extensions:
+ _UnsafeExtensionError.lenient = True
+
def parse_retries(retries):
if retries in ('inf', 'infinite'):
parsed_retries = float('inf')
@@ -340,6 +342,7 @@ def _real_main(argv=None):
'format': opts.format,
'listformats': opts.listformats,
'outtmpl': outtmpl,
+ 'outtmpl_na_placeholder': opts.outtmpl_na_placeholder,
'autonumber_size': opts.autonumber_size,
'autonumber_start': opts.autonumber_start,
'restrictfilenames': opts.restrictfilenames,
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py
index 461bb6d41..a94a41079 100644
--- a/youtube_dl/aes.py
+++ b/youtube_dl/aes.py
@@ -8,6 +8,18 @@ from .utils import bytes_to_intlist, intlist_to_bytes
BLOCK_SIZE_BYTES = 16
+def pkcs7_padding(data):
+ """
+ PKCS#7 padding
+
+ @param {int[]} data cleartext
+ @returns {int[]} padding data
+ """
+
+ remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES
+ return data + [remaining_length] * remaining_length
+
+
def aes_ctr_decrypt(data, key, counter):
"""
Decrypt with aes in counter mode
@@ -76,8 +88,7 @@ def aes_cbc_encrypt(data, key, iv):
previous_cipher_block = iv
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
- remaining_length = BLOCK_SIZE_BYTES - len(block)
- block += [remaining_length] * remaining_length
+ block = pkcs7_padding(block)
mixed_block = xor(block, previous_cipher_block)
encrypted_block = aes_encrypt(mixed_block, expanded_key)
@@ -88,6 +99,28 @@ def aes_cbc_encrypt(data, key, iv):
return encrypted_data
+def aes_ecb_encrypt(data, key):
+ """
+ Encrypt with aes in ECB mode. Using PKCS#7 padding
+
+ @param {int[]} data cleartext
+ @param {int[]} key 16/24/32-Byte cipher key
+ @returns {int[]} encrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ encrypted_data = []
+ for i in range(block_count):
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ block = pkcs7_padding(block)
+
+ encrypted_block = aes_encrypt(block, expanded_key)
+ encrypted_data += encrypted_block
+
+ return encrypted_data
+
+
def key_expansion(data):
"""
Generate key schedule
@@ -303,7 +336,7 @@ def xor(data1, data2):
def rijndael_mul(a, b):
- if(a == 0 or b == 0):
+ if (a == 0 or b == 0):
return 0
return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py
index 7bdade1bd..54123da0e 100644
--- a/youtube_dl/cache.py
+++ b/youtube_dl/cache.py
@@ -1,21 +1,32 @@
from __future__ import unicode_literals
import errno
-import io
import json
import os
import re
import shutil
import traceback
-from .compat import compat_getenv
+from .compat import (
+ compat_getenv,
+ compat_open as open,
+)
from .utils import (
+ error_to_compat_str,
expand_path,
+ is_outdated_version,
+ try_get,
write_json_file,
)
+from .version import __version__
class Cache(object):
+
+ _YTDL_DIR = 'youtube-dl'
+ _VERSION_KEY = _YTDL_DIR + '_version'
+ _DEFAULT_VERSION = '2021.12.17'
+
def __init__(self, ydl):
self._ydl = ydl
@@ -23,7 +34,7 @@ class Cache(object):
res = self._ydl.params.get('cachedir')
if res is None:
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
- res = os.path.join(cache_root, 'youtube-dl')
+ res = os.path.join(cache_root, self._YTDL_DIR)
return expand_path(res)
def _get_cache_fn(self, section, key, dtype):
@@ -50,13 +61,22 @@ class Cache(object):
except OSError as ose:
if ose.errno != errno.EEXIST:
raise
- write_json_file(data, fn)
+ write_json_file({self._VERSION_KEY: __version__, 'data': data}, fn)
except Exception:
tb = traceback.format_exc()
self._ydl.report_warning(
'Writing cache to %r failed: %s' % (fn, tb))
- def load(self, section, key, dtype='json', default=None):
+ def _validate(self, data, min_ver):
+ version = try_get(data, lambda x: x[self._VERSION_KEY])
+ if not version: # Backward compatibility
+ data, version = {'data': data}, self._DEFAULT_VERSION
+ if not is_outdated_version(version, min_ver or '0', assume_new=False):
+ return data['data']
+ self._ydl.to_screen(
+ 'Discarding old cache from version {version} (needs {min_ver})'.format(**locals()))
+
+ def load(self, section, key, dtype='json', default=None, min_ver=None):
assert dtype in ('json',)
if not self.enabled:
@@ -65,13 +85,13 @@ class Cache(object):
cache_fn = self._get_cache_fn(section, key, dtype)
try:
try:
- with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
- return json.load(cachef)
+ with open(cache_fn, 'r', encoding='utf-8') as cachef:
+ return self._validate(json.load(cachef), min_ver)
except ValueError:
try:
file_size = os.path.getsize(cache_fn)
except (OSError, IOError) as oe:
- file_size = str(oe)
+ file_size = error_to_compat_str(oe)
self._ydl.report_warning(
'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
except IOError:
diff --git a/youtube_dl/casefold.py b/youtube_dl/casefold.py
new file mode 100644
index 000000000..ad9c66f8e
--- /dev/null
+++ b/youtube_dl/casefold.py
@@ -0,0 +1,1667 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .compat import (
+ compat_str,
+ compat_chr,
+)
+
+# Below is included the text of icu/CaseFolding.txt retrieved from
+# https://github.com/unicode-org/icu/blob/main/icu4c/source/data/unidata/CaseFolding.txt
+# In case newly foldable Unicode characters are defined, paste the new version
+# of the text inside the ''' marks.
+# The text is expected to have only blank lines andlines with 1st character #,
+# all ignored, and fold definitions like this:
+# `from_hex_code; space_separated_to_hex_code_list; comment`
+
+_map_str = '''
+# CaseFolding-15.0.0.txt
+# Date: 2022-02-02, 23:35:35 GMT
+# © 2022 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see https://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see https://www.unicode.org/reports/tr44/
+#
+# Case Folding Properties
+#
+# This file is a supplement to the UnicodeData file.
+# It provides a case folding mapping generated from the Unicode Character Database.
+# If all characters are mapped according to the full mapping below, then
+# case differences (according to UnicodeData.txt and SpecialCasing.txt)
+# are eliminated.
+#
+# The data supports both implementations that require simple case foldings
+# (where string lengths don't change), and implementations that allow full case folding
+# (where string lengths may grow). Note that where they can be supported, the
+# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
+#
+# All code points not listed in this file map to themselves.
+#
+# NOTE: case folding does not preserve normalization formats!
+#
+# For information on case folding, including how to have case folding
+# preserve normalization formats, see Section 3.13 Default Case Algorithms in
+# The Unicode Standard.
+#
+# ================================================================================
+# Format
+# ================================================================================
+# The entries in this file are in the following machine-readable format:
+#
+# ; ; ; #
+#
+# The status field is:
+# C: common case folding, common mappings shared by both simple and full mappings.
+# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
+# S: simple case folding, mappings to single characters where different from F.
+# T: special case for uppercase I and dotted uppercase I
+# - For non-Turkic languages, this mapping is normally not used.
+# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
+# Note that the Turkic mappings do not maintain canonical equivalence without additional processing.
+# See the discussions of case mapping in the Unicode Standard for more information.
+#
+# Usage:
+# A. To do a simple case folding, use the mappings with status C + S.
+# B. To do a full case folding, use the mappings with status C + F.
+#
+# The mappings with status T can be used or omitted depending on the desired case-folding
+# behavior. (The default option is to exclude them.)
+#
+# =================================================================
+
+# Property: Case_Folding
+
+# All code points not explicitly listed for Case_Folding
+# have the value C for the status field, and the code point itself for the mapping field.
+
+# =================================================================
+0041; C; 0061; # LATIN CAPITAL LETTER A
+0042; C; 0062; # LATIN CAPITAL LETTER B
+0043; C; 0063; # LATIN CAPITAL LETTER C
+0044; C; 0064; # LATIN CAPITAL LETTER D
+0045; C; 0065; # LATIN CAPITAL LETTER E
+0046; C; 0066; # LATIN CAPITAL LETTER F
+0047; C; 0067; # LATIN CAPITAL LETTER G
+0048; C; 0068; # LATIN CAPITAL LETTER H
+0049; C; 0069; # LATIN CAPITAL LETTER I
+0049; T; 0131; # LATIN CAPITAL LETTER I
+004A; C; 006A; # LATIN CAPITAL LETTER J
+004B; C; 006B; # LATIN CAPITAL LETTER K
+004C; C; 006C; # LATIN CAPITAL LETTER L
+004D; C; 006D; # LATIN CAPITAL LETTER M
+004E; C; 006E; # LATIN CAPITAL LETTER N
+004F; C; 006F; # LATIN CAPITAL LETTER O
+0050; C; 0070; # LATIN CAPITAL LETTER P
+0051; C; 0071; # LATIN CAPITAL LETTER Q
+0052; C; 0072; # LATIN CAPITAL LETTER R
+0053; C; 0073; # LATIN CAPITAL LETTER S
+0054; C; 0074; # LATIN CAPITAL LETTER T
+0055; C; 0075; # LATIN CAPITAL LETTER U
+0056; C; 0076; # LATIN CAPITAL LETTER V
+0057; C; 0077; # LATIN CAPITAL LETTER W
+0058; C; 0078; # LATIN CAPITAL LETTER X
+0059; C; 0079; # LATIN CAPITAL LETTER Y
+005A; C; 007A; # LATIN CAPITAL LETTER Z
+00B5; C; 03BC; # MICRO SIGN
+00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
+00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
+00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
+00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
+00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
+00C6; C; 00E6; # LATIN CAPITAL LETTER AE
+00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
+00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
+00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
+00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
+00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
+00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
+00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
+00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
+00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
+00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
+00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
+00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
+00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
+00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
+00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
+00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
+00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
+00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
+00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
+00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
+0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
+0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
+0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
+0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE
+0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE
+010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON
+010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON
+0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE
+0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON
+0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE
+0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE
+0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK
+011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON
+011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE
+0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE
+0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA
+0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
+0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
+012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
+012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
+012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
+0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
+0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
+0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE
+013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA
+013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON
+013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
+0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE
+0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE
+0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA
+0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON
+0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+014A; C; 014B; # LATIN CAPITAL LETTER ENG
+014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON
+014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE
+0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0152; C; 0153; # LATIN CAPITAL LIGATURE OE
+0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE
+0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA
+0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON
+015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE
+015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA
+0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON
+0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA
+0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON
+0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE
+0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE
+016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON
+016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE
+016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE
+0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK
+0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS
+0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE
+017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON
+017F; C; 0073; # LATIN SMALL LETTER LONG S
+0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK
+0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR
+0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX
+0186; C; 0254; # LATIN CAPITAL LETTER OPEN O
+0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK
+0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D
+018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK
+018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR
+018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E
+018F; C; 0259; # LATIN CAPITAL LETTER SCHWA
+0190; C; 025B; # LATIN CAPITAL LETTER OPEN E
+0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK
+0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK
+0194; C; 0263; # LATIN CAPITAL LETTER GAMMA
+0196; C; 0269; # LATIN CAPITAL LETTER IOTA
+0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE
+0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK
+019C; C; 026F; # LATIN CAPITAL LETTER TURNED M
+019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK
+019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
+01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN
+01A2; C; 01A3; # LATIN CAPITAL LETTER OI
+01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK
+01A6; C; 0280; # LATIN LETTER YR
+01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO
+01A9; C; 0283; # LATIN CAPITAL LETTER ESH
+01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK
+01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
+01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN
+01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON
+01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK
+01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK
+01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE
+01B7; C; 0292; # LATIN CAPITAL LETTER EZH
+01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED
+01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE
+01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON
+01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+01C7; C; 01C9; # LATIN CAPITAL LETTER LJ
+01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
+01CA; C; 01CC; # LATIN CAPITAL LETTER NJ
+01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
+01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
+01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
+01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
+01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
+01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
+01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
+01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
+01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
+01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
+01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
+01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON
+01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE
+01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON
+01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON
+01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK
+01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
+01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON
+01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
+01F1; C; 01F3; # LATIN CAPITAL LETTER DZ
+01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
+01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE
+01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR
+01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN
+01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE
+01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
+01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE
+01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
+0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
+0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE
+0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
+0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
+0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
+020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
+020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
+020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
+0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
+0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE
+0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
+0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE
+0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW
+021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW
+021C; C; 021D; # LATIN CAPITAL LETTER YOGH
+021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON
+0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
+0222; C; 0223; # LATIN CAPITAL LETTER OU
+0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK
+0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE
+0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA
+022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
+022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
+022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
+0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
+0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
+023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE
+023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE
+023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR
+023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
+0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP
+0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE
+0244; C; 0289; # LATIN CAPITAL LETTER U BAR
+0245; C; 028C; # LATIN CAPITAL LETTER TURNED V
+0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE
+0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE
+024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
+024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE
+024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE
+0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
+0370; C; 0371; # GREEK CAPITAL LETTER HETA
+0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI
+0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
+037F; C; 03F3; # GREEK CAPITAL LETTER YOT
+0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
+0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
+0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
+038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS
+038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS
+038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS
+038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS
+0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA
+0392; C; 03B2; # GREEK CAPITAL LETTER BETA
+0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA
+0394; C; 03B4; # GREEK CAPITAL LETTER DELTA
+0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON
+0396; C; 03B6; # GREEK CAPITAL LETTER ZETA
+0397; C; 03B7; # GREEK CAPITAL LETTER ETA
+0398; C; 03B8; # GREEK CAPITAL LETTER THETA
+0399; C; 03B9; # GREEK CAPITAL LETTER IOTA
+039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA
+039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA
+039C; C; 03BC; # GREEK CAPITAL LETTER MU
+039D; C; 03BD; # GREEK CAPITAL LETTER NU
+039E; C; 03BE; # GREEK CAPITAL LETTER XI
+039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON
+03A0; C; 03C0; # GREEK CAPITAL LETTER PI
+03A1; C; 03C1; # GREEK CAPITAL LETTER RHO
+03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA
+03A4; C; 03C4; # GREEK CAPITAL LETTER TAU
+03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON
+03A6; C; 03C6; # GREEK CAPITAL LETTER PHI
+03A7; C; 03C7; # GREEK CAPITAL LETTER CHI
+03A8; C; 03C8; # GREEK CAPITAL LETTER PSI
+03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA
+03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA
+03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL
+03D0; C; 03B2; # GREEK BETA SYMBOL
+03D1; C; 03B8; # GREEK THETA SYMBOL
+03D5; C; 03C6; # GREEK PHI SYMBOL
+03D6; C; 03C0; # GREEK PI SYMBOL
+03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA
+03DA; C; 03DB; # GREEK LETTER STIGMA
+03DC; C; 03DD; # GREEK LETTER DIGAMMA
+03DE; C; 03DF; # GREEK LETTER KOPPA
+03E0; C; 03E1; # GREEK LETTER SAMPI
+03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI
+03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI
+03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI
+03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI
+03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA
+03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA
+03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
+03F0; C; 03BA; # GREEK KAPPA SYMBOL
+03F1; C; 03C1; # GREEK RHO SYMBOL
+03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
+03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
+03F7; C; 03F8; # GREEK CAPITAL LETTER SHO
+03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL
+03FA; C; 03FB; # GREEK CAPITAL LETTER SAN
+03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
+03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
+03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
+0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
+0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
+0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
+0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE
+0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE
+0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0407; C; 0457; # CYRILLIC CAPITAL LETTER YI
+0408; C; 0458; # CYRILLIC CAPITAL LETTER JE
+0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE
+040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE
+040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE
+040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE
+040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE
+040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U
+040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE
+0410; C; 0430; # CYRILLIC CAPITAL LETTER A
+0411; C; 0431; # CYRILLIC CAPITAL LETTER BE
+0412; C; 0432; # CYRILLIC CAPITAL LETTER VE
+0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE
+0414; C; 0434; # CYRILLIC CAPITAL LETTER DE
+0415; C; 0435; # CYRILLIC CAPITAL LETTER IE
+0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE
+0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE
+0418; C; 0438; # CYRILLIC CAPITAL LETTER I
+0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I
+041A; C; 043A; # CYRILLIC CAPITAL LETTER KA
+041B; C; 043B; # CYRILLIC CAPITAL LETTER EL
+041C; C; 043C; # CYRILLIC CAPITAL LETTER EM
+041D; C; 043D; # CYRILLIC CAPITAL LETTER EN
+041E; C; 043E; # CYRILLIC CAPITAL LETTER O
+041F; C; 043F; # CYRILLIC CAPITAL LETTER PE
+0420; C; 0440; # CYRILLIC CAPITAL LETTER ER
+0421; C; 0441; # CYRILLIC CAPITAL LETTER ES
+0422; C; 0442; # CYRILLIC CAPITAL LETTER TE
+0423; C; 0443; # CYRILLIC CAPITAL LETTER U
+0424; C; 0444; # CYRILLIC CAPITAL LETTER EF
+0425; C; 0445; # CYRILLIC CAPITAL LETTER HA
+0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE
+0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE
+0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA
+0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA
+042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN
+042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU
+042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN
+042D; C; 044D; # CYRILLIC CAPITAL LETTER E
+042E; C; 044E; # CYRILLIC CAPITAL LETTER YU
+042F; C; 044F; # CYRILLIC CAPITAL LETTER YA
+0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA
+0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT
+0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E
+0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS
+0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
+046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS
+046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
+046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI
+0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI
+0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA
+0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA
+0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
+0478; C; 0479; # CYRILLIC CAPITAL LETTER UK
+047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA
+047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
+047E; C; 047F; # CYRILLIC CAPITAL LETTER OT
+0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA
+048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
+048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
+048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK
+0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
+0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
+0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
+0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
+049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
+049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
+049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE
+04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA
+04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
+04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE
+04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
+04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
+04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
+04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
+04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U
+04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
+04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
+04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE
+04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
+04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
+04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
+04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
+04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
+04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA
+04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
+04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
+04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
+04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK
+04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL
+04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
+04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL
+04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE
+04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
+04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE
+04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE
+04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA
+04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
+04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
+04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
+04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
+04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON
+04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
+04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
+04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O
+04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
+04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
+04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON
+04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
+04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
+04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
+04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
+04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
+04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
+04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK
+04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE
+0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
+0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
+0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
+0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE
+0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE
+050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
+050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
+050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
+0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE
+0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK
+0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA
+0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA
+0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE
+051A; C; 051B; # CYRILLIC CAPITAL LETTER QA
+051C; C; 051D; # CYRILLIC CAPITAL LETTER WE
+051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA
+0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
+0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
+0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER
+0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
+0528; C; 0529; # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK
+052A; C; 052B; # CYRILLIC CAPITAL LETTER DZZHE
+052C; C; 052D; # CYRILLIC CAPITAL LETTER DCHE
+052E; C; 052F; # CYRILLIC CAPITAL LETTER EL WITH DESCENDER
+0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
+0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
+0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
+0534; C; 0564; # ARMENIAN CAPITAL LETTER DA
+0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH
+0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA
+0537; C; 0567; # ARMENIAN CAPITAL LETTER EH
+0538; C; 0568; # ARMENIAN CAPITAL LETTER ET
+0539; C; 0569; # ARMENIAN CAPITAL LETTER TO
+053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE
+053B; C; 056B; # ARMENIAN CAPITAL LETTER INI
+053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN
+053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH
+053E; C; 056E; # ARMENIAN CAPITAL LETTER CA
+053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN
+0540; C; 0570; # ARMENIAN CAPITAL LETTER HO
+0541; C; 0571; # ARMENIAN CAPITAL LETTER JA
+0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD
+0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH
+0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN
+0545; C; 0575; # ARMENIAN CAPITAL LETTER YI
+0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW
+0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA
+0548; C; 0578; # ARMENIAN CAPITAL LETTER VO
+0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA
+054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH
+054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH
+054C; C; 057C; # ARMENIAN CAPITAL LETTER RA
+054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH
+054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW
+054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN
+0550; C; 0580; # ARMENIAN CAPITAL LETTER REH
+0551; C; 0581; # ARMENIAN CAPITAL LETTER CO
+0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN
+0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR
+0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH
+0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
+0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
+0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
+10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN
+10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN
+10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN
+10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON
+10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN
+10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN
+10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN
+10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN
+10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN
+10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN
+10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS
+10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN
+10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR
+10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON
+10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR
+10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR
+10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE
+10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN
+10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR
+10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN
+10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR
+10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR
+10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN
+10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR
+10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN
+10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN
+10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN
+10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL
+10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL
+10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR
+10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN
+10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN
+10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE
+10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE
+10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE
+10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE
+10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR
+10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE
+10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN
+10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN
+13F8; C; 13F0; # CHEROKEE SMALL LETTER YE
+13F9; C; 13F1; # CHEROKEE SMALL LETTER YI
+13FA; C; 13F2; # CHEROKEE SMALL LETTER YO
+13FB; C; 13F3; # CHEROKEE SMALL LETTER YU
+13FC; C; 13F4; # CHEROKEE SMALL LETTER YV
+13FD; C; 13F5; # CHEROKEE SMALL LETTER MV
+1C80; C; 0432; # CYRILLIC SMALL LETTER ROUNDED VE
+1C81; C; 0434; # CYRILLIC SMALL LETTER LONG-LEGGED DE
+1C82; C; 043E; # CYRILLIC SMALL LETTER NARROW O
+1C83; C; 0441; # CYRILLIC SMALL LETTER WIDE ES
+1C84; C; 0442; # CYRILLIC SMALL LETTER TALL TE
+1C85; C; 0442; # CYRILLIC SMALL LETTER THREE-LEGGED TE
+1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN
+1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT
+1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK
+1C90; C; 10D0; # GEORGIAN MTAVRULI CAPITAL LETTER AN
+1C91; C; 10D1; # GEORGIAN MTAVRULI CAPITAL LETTER BAN
+1C92; C; 10D2; # GEORGIAN MTAVRULI CAPITAL LETTER GAN
+1C93; C; 10D3; # GEORGIAN MTAVRULI CAPITAL LETTER DON
+1C94; C; 10D4; # GEORGIAN MTAVRULI CAPITAL LETTER EN
+1C95; C; 10D5; # GEORGIAN MTAVRULI CAPITAL LETTER VIN
+1C96; C; 10D6; # GEORGIAN MTAVRULI CAPITAL LETTER ZEN
+1C97; C; 10D7; # GEORGIAN MTAVRULI CAPITAL LETTER TAN
+1C98; C; 10D8; # GEORGIAN MTAVRULI CAPITAL LETTER IN
+1C99; C; 10D9; # GEORGIAN MTAVRULI CAPITAL LETTER KAN
+1C9A; C; 10DA; # GEORGIAN MTAVRULI CAPITAL LETTER LAS
+1C9B; C; 10DB; # GEORGIAN MTAVRULI CAPITAL LETTER MAN
+1C9C; C; 10DC; # GEORGIAN MTAVRULI CAPITAL LETTER NAR
+1C9D; C; 10DD; # GEORGIAN MTAVRULI CAPITAL LETTER ON
+1C9E; C; 10DE; # GEORGIAN MTAVRULI CAPITAL LETTER PAR
+1C9F; C; 10DF; # GEORGIAN MTAVRULI CAPITAL LETTER ZHAR
+1CA0; C; 10E0; # GEORGIAN MTAVRULI CAPITAL LETTER RAE
+1CA1; C; 10E1; # GEORGIAN MTAVRULI CAPITAL LETTER SAN
+1CA2; C; 10E2; # GEORGIAN MTAVRULI CAPITAL LETTER TAR
+1CA3; C; 10E3; # GEORGIAN MTAVRULI CAPITAL LETTER UN
+1CA4; C; 10E4; # GEORGIAN MTAVRULI CAPITAL LETTER PHAR
+1CA5; C; 10E5; # GEORGIAN MTAVRULI CAPITAL LETTER KHAR
+1CA6; C; 10E6; # GEORGIAN MTAVRULI CAPITAL LETTER GHAN
+1CA7; C; 10E7; # GEORGIAN MTAVRULI CAPITAL LETTER QAR
+1CA8; C; 10E8; # GEORGIAN MTAVRULI CAPITAL LETTER SHIN
+1CA9; C; 10E9; # GEORGIAN MTAVRULI CAPITAL LETTER CHIN
+1CAA; C; 10EA; # GEORGIAN MTAVRULI CAPITAL LETTER CAN
+1CAB; C; 10EB; # GEORGIAN MTAVRULI CAPITAL LETTER JIL
+1CAC; C; 10EC; # GEORGIAN MTAVRULI CAPITAL LETTER CIL
+1CAD; C; 10ED; # GEORGIAN MTAVRULI CAPITAL LETTER CHAR
+1CAE; C; 10EE; # GEORGIAN MTAVRULI CAPITAL LETTER XAN
+1CAF; C; 10EF; # GEORGIAN MTAVRULI CAPITAL LETTER JHAN
+1CB0; C; 10F0; # GEORGIAN MTAVRULI CAPITAL LETTER HAE
+1CB1; C; 10F1; # GEORGIAN MTAVRULI CAPITAL LETTER HE
+1CB2; C; 10F2; # GEORGIAN MTAVRULI CAPITAL LETTER HIE
+1CB3; C; 10F3; # GEORGIAN MTAVRULI CAPITAL LETTER WE
+1CB4; C; 10F4; # GEORGIAN MTAVRULI CAPITAL LETTER HAR
+1CB5; C; 10F5; # GEORGIAN MTAVRULI CAPITAL LETTER HOE
+1CB6; C; 10F6; # GEORGIAN MTAVRULI CAPITAL LETTER FI
+1CB7; C; 10F7; # GEORGIAN MTAVRULI CAPITAL LETTER YN
+1CB8; C; 10F8; # GEORGIAN MTAVRULI CAPITAL LETTER ELIFI
+1CB9; C; 10F9; # GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN
+1CBA; C; 10FA; # GEORGIAN MTAVRULI CAPITAL LETTER AIN
+1CBD; C; 10FD; # GEORGIAN MTAVRULI CAPITAL LETTER AEN
+1CBE; C; 10FE; # GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN
+1CBF; C; 10FF; # GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
+1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
+1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
+1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
+1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW
+1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
+1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE
+1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW
+1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW
+1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA
+1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
+1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
+1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
+1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
+1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW
+1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
+1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE
+1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON
+1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE
+1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW
+1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS
+1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
+1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
+1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
+1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
+1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
+1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
+1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
+1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW
+1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
+1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW
+1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
+1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE
+1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE
+1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW
+1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE
+1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW
+1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW
+1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
+1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
+1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
+1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
+1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
+1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE
+1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE
+1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE
+1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW
+1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
+1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW
+1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE
+1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW
+1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
+1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
+1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
+1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE
+1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW
+1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW
+1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
+1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
+1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW
+1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
+1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
+1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
+1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE
+1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW
+1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE
+1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE
+1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS
+1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE
+1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW
+1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE
+1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS
+1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE
+1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
+1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW
+1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW
+1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW
+1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS
+1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE
+1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
+1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
+1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE
+1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S
+1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S
+1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW
+1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE
+1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
+1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
+1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
+1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
+1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
+1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
+1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
+1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
+1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
+1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
+1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW
+1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE
+1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE
+1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
+1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
+1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
+1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
+1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
+1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
+1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
+1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
+1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
+1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
+1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
+1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
+1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
+1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
+1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
+1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
+1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
+1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE
+1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
+1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW
+1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE
+1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
+1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
+1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
+1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE
+1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
+1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE
+1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW
+1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
+1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE
+1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL
+1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V
+1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP
+1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI
+1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA
+1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
+1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
+1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
+1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
+1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
+1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
+1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI
+1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA
+1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
+1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
+1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
+1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI
+1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA
+1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
+1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
+1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
+1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
+1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
+1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
+1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI
+1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA
+1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
+1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
+1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
+1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
+1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
+1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
+1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI
+1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA
+1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
+1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
+1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
+1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
+1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
+1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
+1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
+1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA
+1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
+1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
+1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
+1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI
+1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA
+1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
+1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
+1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
+1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
+1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
+1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
+1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
+1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
+1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
+1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
+1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
+1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
+1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
+1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
+1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
+1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY
+1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON
+1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA
+1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA
+1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBE; C; 03B9; # GREEK PROSGEGRAMMENI
+1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
+1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
+1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
+1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA
+1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA
+1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA
+1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA
+1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
+1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
+1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
+1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
+1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON
+1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA
+1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
+1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
+1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
+1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
+1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
+1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY
+1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON
+1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA
+1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA
+1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA
+1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
+1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
+1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
+1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA
+1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA
+1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA
+1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA
+1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+2126; C; 03C9; # OHM SIGN
+212A; C; 006B; # KELVIN SIGN
+212B; C; 00E5; # ANGSTROM SIGN
+2132; C; 214E; # TURNED CAPITAL F
+2160; C; 2170; # ROMAN NUMERAL ONE
+2161; C; 2171; # ROMAN NUMERAL TWO
+2162; C; 2172; # ROMAN NUMERAL THREE
+2163; C; 2173; # ROMAN NUMERAL FOUR
+2164; C; 2174; # ROMAN NUMERAL FIVE
+2165; C; 2175; # ROMAN NUMERAL SIX
+2166; C; 2176; # ROMAN NUMERAL SEVEN
+2167; C; 2177; # ROMAN NUMERAL EIGHT
+2168; C; 2178; # ROMAN NUMERAL NINE
+2169; C; 2179; # ROMAN NUMERAL TEN
+216A; C; 217A; # ROMAN NUMERAL ELEVEN
+216B; C; 217B; # ROMAN NUMERAL TWELVE
+216C; C; 217C; # ROMAN NUMERAL FIFTY
+216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
+216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
+216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
+2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED
+24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
+24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
+24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
+24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D
+24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E
+24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F
+24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G
+24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H
+24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I
+24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J
+24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K
+24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L
+24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M
+24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N
+24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O
+24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P
+24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q
+24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R
+24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S
+24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T
+24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U
+24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V
+24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W
+24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
+24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
+24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
+2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU
+2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY
+2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE
+2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI
+2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO
+2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU
+2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE
+2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO
+2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA
+2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE
+2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE
+2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I
+2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI
+2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO
+2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE
+2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE
+2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI
+2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU
+2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI
+2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI
+2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO
+2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO
+2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU
+2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU
+2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU
+2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU
+2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE
+2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA
+2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI
+2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI
+2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA
+2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU
+2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI
+2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI
+2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA
+2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU
+2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS
+2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL
+2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO
+2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS
+2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS
+2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS
+2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA
+2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA
+2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC
+2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A
+2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
+2C2F; C; 2C5F; # GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI
+2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR
+2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE
+2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE
+2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL
+2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER
+2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER
+2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER
+2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA
+2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK
+2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A
+2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA
+2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK
+2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H
+2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL
+2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL
+2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA
+2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA
+2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA
+2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA
+2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE
+2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU
+2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA
+2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE
+2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE
+2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA
+2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA
+2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA
+2C98; C; 2C99; # COPTIC CAPITAL LETTER MI
+2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI
+2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI
+2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O
+2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI
+2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO
+2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA
+2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU
+2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA
+2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI
+2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI
+2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI
+2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU
+2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF
+2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN
+2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
+2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA
+2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI
+2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
+2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU
+2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI
+2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI
+2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI
+2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH
+2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI
+2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI
+2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI
+2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA
+2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA
+2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI
+2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT
+2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA
+2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA
+2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA
+2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
+2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI
+2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI
+2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU
+2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
+2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
+2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI
+A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA
+A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO
+A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE
+A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA
+A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV
+A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK
+A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA
+A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER
+A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER
+A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT
+A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU
+A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A
+A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS
+A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS
+A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
+A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN
+A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE
+A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE
+A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL
+A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM
+A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O
+A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O
+A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O
+A680; C; A681; # CYRILLIC CAPITAL LETTER DWE
+A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE
+A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE
+A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE
+A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE
+A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK
+A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE
+A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE
+A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE
+A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE
+A694; C; A695; # CYRILLIC CAPITAL LETTER HWE
+A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE
+A698; C; A699; # CYRILLIC CAPITAL LETTER DOUBLE O
+A69A; C; A69B; # CYRILLIC CAPITAL LETTER CROSSED O
+A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
+A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
+A726; C; A727; # LATIN CAPITAL LETTER HENG
+A728; C; A729; # LATIN CAPITAL LETTER TZ
+A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO
+A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO
+A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA
+A732; C; A733; # LATIN CAPITAL LETTER AA
+A734; C; A735; # LATIN CAPITAL LETTER AO
+A736; C; A737; # LATIN CAPITAL LETTER AU
+A738; C; A739; # LATIN CAPITAL LETTER AV
+A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
+A73C; C; A73D; # LATIN CAPITAL LETTER AY
+A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT
+A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE
+A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
+A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
+A746; C; A747; # LATIN CAPITAL LETTER BROKEN L
+A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE
+A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
+A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP
+A74E; C; A74F; # LATIN CAPITAL LETTER OO
+A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
+A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH
+A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
+A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
+A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
+A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA
+A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA
+A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
+A760; C; A761; # LATIN CAPITAL LETTER VY
+A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z
+A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE
+A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
+A768; C; A769; # LATIN CAPITAL LETTER VEND
+A76A; C; A76B; # LATIN CAPITAL LETTER ET
+A76C; C; A76D; # LATIN CAPITAL LETTER IS
+A76E; C; A76F; # LATIN CAPITAL LETTER CON
+A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D
+A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F
+A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G
+A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G
+A780; C; A781; # LATIN CAPITAL LETTER TURNED L
+A782; C; A783; # LATIN CAPITAL LETTER INSULAR R
+A784; C; A785; # LATIN CAPITAL LETTER INSULAR S
+A786; C; A787; # LATIN CAPITAL LETTER INSULAR T
+A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO
+A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H
+A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER
+A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR
+A796; C; A797; # LATIN CAPITAL LETTER B WITH FLOURISH
+A798; C; A799; # LATIN CAPITAL LETTER F WITH STROKE
+A79A; C; A79B; # LATIN CAPITAL LETTER VOLAPUK AE
+A79C; C; A79D; # LATIN CAPITAL LETTER VOLAPUK OE
+A79E; C; A79F; # LATIN CAPITAL LETTER VOLAPUK UE
+A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
+A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
+A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
+A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
+A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
+A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK
+A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E
+A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G
+A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT
+A7AE; C; 026A; # LATIN CAPITAL LETTER SMALL CAPITAL I
+A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K
+A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T
+A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL
+A7B3; C; AB53; # LATIN CAPITAL LETTER CHI
+A7B4; C; A7B5; # LATIN CAPITAL LETTER BETA
+A7B6; C; A7B7; # LATIN CAPITAL LETTER OMEGA
+A7B8; C; A7B9; # LATIN CAPITAL LETTER U WITH STROKE
+A7BA; C; A7BB; # LATIN CAPITAL LETTER GLOTTAL A
+A7BC; C; A7BD; # LATIN CAPITAL LETTER GLOTTAL I
+A7BE; C; A7BF; # LATIN CAPITAL LETTER GLOTTAL U
+A7C0; C; A7C1; # LATIN CAPITAL LETTER OLD POLISH O
+A7C2; C; A7C3; # LATIN CAPITAL LETTER ANGLICANA W
+A7C4; C; A794; # LATIN CAPITAL LETTER C WITH PALATAL HOOK
+A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK
+A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK
+A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
+A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
+A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G
+A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S
+A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S
+A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H
+AB70; C; 13A0; # CHEROKEE SMALL LETTER A
+AB71; C; 13A1; # CHEROKEE SMALL LETTER E
+AB72; C; 13A2; # CHEROKEE SMALL LETTER I
+AB73; C; 13A3; # CHEROKEE SMALL LETTER O
+AB74; C; 13A4; # CHEROKEE SMALL LETTER U
+AB75; C; 13A5; # CHEROKEE SMALL LETTER V
+AB76; C; 13A6; # CHEROKEE SMALL LETTER GA
+AB77; C; 13A7; # CHEROKEE SMALL LETTER KA
+AB78; C; 13A8; # CHEROKEE SMALL LETTER GE
+AB79; C; 13A9; # CHEROKEE SMALL LETTER GI
+AB7A; C; 13AA; # CHEROKEE SMALL LETTER GO
+AB7B; C; 13AB; # CHEROKEE SMALL LETTER GU
+AB7C; C; 13AC; # CHEROKEE SMALL LETTER GV
+AB7D; C; 13AD; # CHEROKEE SMALL LETTER HA
+AB7E; C; 13AE; # CHEROKEE SMALL LETTER HE
+AB7F; C; 13AF; # CHEROKEE SMALL LETTER HI
+AB80; C; 13B0; # CHEROKEE SMALL LETTER HO
+AB81; C; 13B1; # CHEROKEE SMALL LETTER HU
+AB82; C; 13B2; # CHEROKEE SMALL LETTER HV
+AB83; C; 13B3; # CHEROKEE SMALL LETTER LA
+AB84; C; 13B4; # CHEROKEE SMALL LETTER LE
+AB85; C; 13B5; # CHEROKEE SMALL LETTER LI
+AB86; C; 13B6; # CHEROKEE SMALL LETTER LO
+AB87; C; 13B7; # CHEROKEE SMALL LETTER LU
+AB88; C; 13B8; # CHEROKEE SMALL LETTER LV
+AB89; C; 13B9; # CHEROKEE SMALL LETTER MA
+AB8A; C; 13BA; # CHEROKEE SMALL LETTER ME
+AB8B; C; 13BB; # CHEROKEE SMALL LETTER MI
+AB8C; C; 13BC; # CHEROKEE SMALL LETTER MO
+AB8D; C; 13BD; # CHEROKEE SMALL LETTER MU
+AB8E; C; 13BE; # CHEROKEE SMALL LETTER NA
+AB8F; C; 13BF; # CHEROKEE SMALL LETTER HNA
+AB90; C; 13C0; # CHEROKEE SMALL LETTER NAH
+AB91; C; 13C1; # CHEROKEE SMALL LETTER NE
+AB92; C; 13C2; # CHEROKEE SMALL LETTER NI
+AB93; C; 13C3; # CHEROKEE SMALL LETTER NO
+AB94; C; 13C4; # CHEROKEE SMALL LETTER NU
+AB95; C; 13C5; # CHEROKEE SMALL LETTER NV
+AB96; C; 13C6; # CHEROKEE SMALL LETTER QUA
+AB97; C; 13C7; # CHEROKEE SMALL LETTER QUE
+AB98; C; 13C8; # CHEROKEE SMALL LETTER QUI
+AB99; C; 13C9; # CHEROKEE SMALL LETTER QUO
+AB9A; C; 13CA; # CHEROKEE SMALL LETTER QUU
+AB9B; C; 13CB; # CHEROKEE SMALL LETTER QUV
+AB9C; C; 13CC; # CHEROKEE SMALL LETTER SA
+AB9D; C; 13CD; # CHEROKEE SMALL LETTER S
+AB9E; C; 13CE; # CHEROKEE SMALL LETTER SE
+AB9F; C; 13CF; # CHEROKEE SMALL LETTER SI
+ABA0; C; 13D0; # CHEROKEE SMALL LETTER SO
+ABA1; C; 13D1; # CHEROKEE SMALL LETTER SU
+ABA2; C; 13D2; # CHEROKEE SMALL LETTER SV
+ABA3; C; 13D3; # CHEROKEE SMALL LETTER DA
+ABA4; C; 13D4; # CHEROKEE SMALL LETTER TA
+ABA5; C; 13D5; # CHEROKEE SMALL LETTER DE
+ABA6; C; 13D6; # CHEROKEE SMALL LETTER TE
+ABA7; C; 13D7; # CHEROKEE SMALL LETTER DI
+ABA8; C; 13D8; # CHEROKEE SMALL LETTER TI
+ABA9; C; 13D9; # CHEROKEE SMALL LETTER DO
+ABAA; C; 13DA; # CHEROKEE SMALL LETTER DU
+ABAB; C; 13DB; # CHEROKEE SMALL LETTER DV
+ABAC; C; 13DC; # CHEROKEE SMALL LETTER DLA
+ABAD; C; 13DD; # CHEROKEE SMALL LETTER TLA
+ABAE; C; 13DE; # CHEROKEE SMALL LETTER TLE
+ABAF; C; 13DF; # CHEROKEE SMALL LETTER TLI
+ABB0; C; 13E0; # CHEROKEE SMALL LETTER TLO
+ABB1; C; 13E1; # CHEROKEE SMALL LETTER TLU
+ABB2; C; 13E2; # CHEROKEE SMALL LETTER TLV
+ABB3; C; 13E3; # CHEROKEE SMALL LETTER TSA
+ABB4; C; 13E4; # CHEROKEE SMALL LETTER TSE
+ABB5; C; 13E5; # CHEROKEE SMALL LETTER TSI
+ABB6; C; 13E6; # CHEROKEE SMALL LETTER TSO
+ABB7; C; 13E7; # CHEROKEE SMALL LETTER TSU
+ABB8; C; 13E8; # CHEROKEE SMALL LETTER TSV
+ABB9; C; 13E9; # CHEROKEE SMALL LETTER WA
+ABBA; C; 13EA; # CHEROKEE SMALL LETTER WE
+ABBB; C; 13EB; # CHEROKEE SMALL LETTER WI
+ABBC; C; 13EC; # CHEROKEE SMALL LETTER WO
+ABBD; C; 13ED; # CHEROKEE SMALL LETTER WU
+ABBE; C; 13EE; # CHEROKEE SMALL LETTER WV
+ABBF; C; 13EF; # CHEROKEE SMALL LETTER YA
+FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
+FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
+FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
+FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
+FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
+FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
+FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
+FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
+FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
+FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI
+FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW
+FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH
+FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A
+FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B
+FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C
+FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D
+FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E
+FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F
+FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G
+FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H
+FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I
+FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J
+FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K
+FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L
+FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M
+FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N
+FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O
+FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P
+FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q
+FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R
+FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S
+FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T
+FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U
+FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V
+FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W
+FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X
+FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y
+FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
+10400; C; 10428; # DESERET CAPITAL LETTER LONG I
+10401; C; 10429; # DESERET CAPITAL LETTER LONG E
+10402; C; 1042A; # DESERET CAPITAL LETTER LONG A
+10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH
+10404; C; 1042C; # DESERET CAPITAL LETTER LONG O
+10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO
+10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I
+10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E
+10408; C; 10430; # DESERET CAPITAL LETTER SHORT A
+10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH
+1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O
+1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO
+1040C; C; 10434; # DESERET CAPITAL LETTER AY
+1040D; C; 10435; # DESERET CAPITAL LETTER OW
+1040E; C; 10436; # DESERET CAPITAL LETTER WU
+1040F; C; 10437; # DESERET CAPITAL LETTER YEE
+10410; C; 10438; # DESERET CAPITAL LETTER H
+10411; C; 10439; # DESERET CAPITAL LETTER PEE
+10412; C; 1043A; # DESERET CAPITAL LETTER BEE
+10413; C; 1043B; # DESERET CAPITAL LETTER TEE
+10414; C; 1043C; # DESERET CAPITAL LETTER DEE
+10415; C; 1043D; # DESERET CAPITAL LETTER CHEE
+10416; C; 1043E; # DESERET CAPITAL LETTER JEE
+10417; C; 1043F; # DESERET CAPITAL LETTER KAY
+10418; C; 10440; # DESERET CAPITAL LETTER GAY
+10419; C; 10441; # DESERET CAPITAL LETTER EF
+1041A; C; 10442; # DESERET CAPITAL LETTER VEE
+1041B; C; 10443; # DESERET CAPITAL LETTER ETH
+1041C; C; 10444; # DESERET CAPITAL LETTER THEE
+1041D; C; 10445; # DESERET CAPITAL LETTER ES
+1041E; C; 10446; # DESERET CAPITAL LETTER ZEE
+1041F; C; 10447; # DESERET CAPITAL LETTER ESH
+10420; C; 10448; # DESERET CAPITAL LETTER ZHEE
+10421; C; 10449; # DESERET CAPITAL LETTER ER
+10422; C; 1044A; # DESERET CAPITAL LETTER EL
+10423; C; 1044B; # DESERET CAPITAL LETTER EM
+10424; C; 1044C; # DESERET CAPITAL LETTER EN
+10425; C; 1044D; # DESERET CAPITAL LETTER ENG
+10426; C; 1044E; # DESERET CAPITAL LETTER OI
+10427; C; 1044F; # DESERET CAPITAL LETTER EW
+104B0; C; 104D8; # OSAGE CAPITAL LETTER A
+104B1; C; 104D9; # OSAGE CAPITAL LETTER AI
+104B2; C; 104DA; # OSAGE CAPITAL LETTER AIN
+104B3; C; 104DB; # OSAGE CAPITAL LETTER AH
+104B4; C; 104DC; # OSAGE CAPITAL LETTER BRA
+104B5; C; 104DD; # OSAGE CAPITAL LETTER CHA
+104B6; C; 104DE; # OSAGE CAPITAL LETTER EHCHA
+104B7; C; 104DF; # OSAGE CAPITAL LETTER E
+104B8; C; 104E0; # OSAGE CAPITAL LETTER EIN
+104B9; C; 104E1; # OSAGE CAPITAL LETTER HA
+104BA; C; 104E2; # OSAGE CAPITAL LETTER HYA
+104BB; C; 104E3; # OSAGE CAPITAL LETTER I
+104BC; C; 104E4; # OSAGE CAPITAL LETTER KA
+104BD; C; 104E5; # OSAGE CAPITAL LETTER EHKA
+104BE; C; 104E6; # OSAGE CAPITAL LETTER KYA
+104BF; C; 104E7; # OSAGE CAPITAL LETTER LA
+104C0; C; 104E8; # OSAGE CAPITAL LETTER MA
+104C1; C; 104E9; # OSAGE CAPITAL LETTER NA
+104C2; C; 104EA; # OSAGE CAPITAL LETTER O
+104C3; C; 104EB; # OSAGE CAPITAL LETTER OIN
+104C4; C; 104EC; # OSAGE CAPITAL LETTER PA
+104C5; C; 104ED; # OSAGE CAPITAL LETTER EHPA
+104C6; C; 104EE; # OSAGE CAPITAL LETTER SA
+104C7; C; 104EF; # OSAGE CAPITAL LETTER SHA
+104C8; C; 104F0; # OSAGE CAPITAL LETTER TA
+104C9; C; 104F1; # OSAGE CAPITAL LETTER EHTA
+104CA; C; 104F2; # OSAGE CAPITAL LETTER TSA
+104CB; C; 104F3; # OSAGE CAPITAL LETTER EHTSA
+104CC; C; 104F4; # OSAGE CAPITAL LETTER TSHA
+104CD; C; 104F5; # OSAGE CAPITAL LETTER DHA
+104CE; C; 104F6; # OSAGE CAPITAL LETTER U
+104CF; C; 104F7; # OSAGE CAPITAL LETTER WA
+104D0; C; 104F8; # OSAGE CAPITAL LETTER KHA
+104D1; C; 104F9; # OSAGE CAPITAL LETTER GHA
+104D2; C; 104FA; # OSAGE CAPITAL LETTER ZA
+104D3; C; 104FB; # OSAGE CAPITAL LETTER ZHA
+10570; C; 10597; # VITHKUQI CAPITAL LETTER A
+10571; C; 10598; # VITHKUQI CAPITAL LETTER BBE
+10572; C; 10599; # VITHKUQI CAPITAL LETTER BE
+10573; C; 1059A; # VITHKUQI CAPITAL LETTER CE
+10574; C; 1059B; # VITHKUQI CAPITAL LETTER CHE
+10575; C; 1059C; # VITHKUQI CAPITAL LETTER DE
+10576; C; 1059D; # VITHKUQI CAPITAL LETTER DHE
+10577; C; 1059E; # VITHKUQI CAPITAL LETTER EI
+10578; C; 1059F; # VITHKUQI CAPITAL LETTER E
+10579; C; 105A0; # VITHKUQI CAPITAL LETTER FE
+1057A; C; 105A1; # VITHKUQI CAPITAL LETTER GA
+1057C; C; 105A3; # VITHKUQI CAPITAL LETTER HA
+1057D; C; 105A4; # VITHKUQI CAPITAL LETTER HHA
+1057E; C; 105A5; # VITHKUQI CAPITAL LETTER I
+1057F; C; 105A6; # VITHKUQI CAPITAL LETTER IJE
+10580; C; 105A7; # VITHKUQI CAPITAL LETTER JE
+10581; C; 105A8; # VITHKUQI CAPITAL LETTER KA
+10582; C; 105A9; # VITHKUQI CAPITAL LETTER LA
+10583; C; 105AA; # VITHKUQI CAPITAL LETTER LLA
+10584; C; 105AB; # VITHKUQI CAPITAL LETTER ME
+10585; C; 105AC; # VITHKUQI CAPITAL LETTER NE
+10586; C; 105AD; # VITHKUQI CAPITAL LETTER NJE
+10587; C; 105AE; # VITHKUQI CAPITAL LETTER O
+10588; C; 105AF; # VITHKUQI CAPITAL LETTER PE
+10589; C; 105B0; # VITHKUQI CAPITAL LETTER QA
+1058A; C; 105B1; # VITHKUQI CAPITAL LETTER RE
+1058C; C; 105B3; # VITHKUQI CAPITAL LETTER SE
+1058D; C; 105B4; # VITHKUQI CAPITAL LETTER SHE
+1058E; C; 105B5; # VITHKUQI CAPITAL LETTER TE
+1058F; C; 105B6; # VITHKUQI CAPITAL LETTER THE
+10590; C; 105B7; # VITHKUQI CAPITAL LETTER U
+10591; C; 105B8; # VITHKUQI CAPITAL LETTER VE
+10592; C; 105B9; # VITHKUQI CAPITAL LETTER XE
+10594; C; 105BB; # VITHKUQI CAPITAL LETTER Y
+10595; C; 105BC; # VITHKUQI CAPITAL LETTER ZE
+10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A
+10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA
+10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB
+10C83; C; 10CC3; # OLD HUNGARIAN CAPITAL LETTER AMB
+10C84; C; 10CC4; # OLD HUNGARIAN CAPITAL LETTER EC
+10C85; C; 10CC5; # OLD HUNGARIAN CAPITAL LETTER ENC
+10C86; C; 10CC6; # OLD HUNGARIAN CAPITAL LETTER ECS
+10C87; C; 10CC7; # OLD HUNGARIAN CAPITAL LETTER ED
+10C88; C; 10CC8; # OLD HUNGARIAN CAPITAL LETTER AND
+10C89; C; 10CC9; # OLD HUNGARIAN CAPITAL LETTER E
+10C8A; C; 10CCA; # OLD HUNGARIAN CAPITAL LETTER CLOSE E
+10C8B; C; 10CCB; # OLD HUNGARIAN CAPITAL LETTER EE
+10C8C; C; 10CCC; # OLD HUNGARIAN CAPITAL LETTER EF
+10C8D; C; 10CCD; # OLD HUNGARIAN CAPITAL LETTER EG
+10C8E; C; 10CCE; # OLD HUNGARIAN CAPITAL LETTER EGY
+10C8F; C; 10CCF; # OLD HUNGARIAN CAPITAL LETTER EH
+10C90; C; 10CD0; # OLD HUNGARIAN CAPITAL LETTER I
+10C91; C; 10CD1; # OLD HUNGARIAN CAPITAL LETTER II
+10C92; C; 10CD2; # OLD HUNGARIAN CAPITAL LETTER EJ
+10C93; C; 10CD3; # OLD HUNGARIAN CAPITAL LETTER EK
+10C94; C; 10CD4; # OLD HUNGARIAN CAPITAL LETTER AK
+10C95; C; 10CD5; # OLD HUNGARIAN CAPITAL LETTER UNK
+10C96; C; 10CD6; # OLD HUNGARIAN CAPITAL LETTER EL
+10C97; C; 10CD7; # OLD HUNGARIAN CAPITAL LETTER ELY
+10C98; C; 10CD8; # OLD HUNGARIAN CAPITAL LETTER EM
+10C99; C; 10CD9; # OLD HUNGARIAN CAPITAL LETTER EN
+10C9A; C; 10CDA; # OLD HUNGARIAN CAPITAL LETTER ENY
+10C9B; C; 10CDB; # OLD HUNGARIAN CAPITAL LETTER O
+10C9C; C; 10CDC; # OLD HUNGARIAN CAPITAL LETTER OO
+10C9D; C; 10CDD; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE
+10C9E; C; 10CDE; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE
+10C9F; C; 10CDF; # OLD HUNGARIAN CAPITAL LETTER OEE
+10CA0; C; 10CE0; # OLD HUNGARIAN CAPITAL LETTER EP
+10CA1; C; 10CE1; # OLD HUNGARIAN CAPITAL LETTER EMP
+10CA2; C; 10CE2; # OLD HUNGARIAN CAPITAL LETTER ER
+10CA3; C; 10CE3; # OLD HUNGARIAN CAPITAL LETTER SHORT ER
+10CA4; C; 10CE4; # OLD HUNGARIAN CAPITAL LETTER ES
+10CA5; C; 10CE5; # OLD HUNGARIAN CAPITAL LETTER ESZ
+10CA6; C; 10CE6; # OLD HUNGARIAN CAPITAL LETTER ET
+10CA7; C; 10CE7; # OLD HUNGARIAN CAPITAL LETTER ENT
+10CA8; C; 10CE8; # OLD HUNGARIAN CAPITAL LETTER ETY
+10CA9; C; 10CE9; # OLD HUNGARIAN CAPITAL LETTER ECH
+10CAA; C; 10CEA; # OLD HUNGARIAN CAPITAL LETTER U
+10CAB; C; 10CEB; # OLD HUNGARIAN CAPITAL LETTER UU
+10CAC; C; 10CEC; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE
+10CAD; C; 10CED; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE
+10CAE; C; 10CEE; # OLD HUNGARIAN CAPITAL LETTER EV
+10CAF; C; 10CEF; # OLD HUNGARIAN CAPITAL LETTER EZ
+10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS
+10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN
+10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US
+118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA
+118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A
+118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI
+118A3; C; 118C3; # WARANG CITI CAPITAL LETTER YU
+118A4; C; 118C4; # WARANG CITI CAPITAL LETTER YA
+118A5; C; 118C5; # WARANG CITI CAPITAL LETTER YO
+118A6; C; 118C6; # WARANG CITI CAPITAL LETTER II
+118A7; C; 118C7; # WARANG CITI CAPITAL LETTER UU
+118A8; C; 118C8; # WARANG CITI CAPITAL LETTER E
+118A9; C; 118C9; # WARANG CITI CAPITAL LETTER O
+118AA; C; 118CA; # WARANG CITI CAPITAL LETTER ANG
+118AB; C; 118CB; # WARANG CITI CAPITAL LETTER GA
+118AC; C; 118CC; # WARANG CITI CAPITAL LETTER KO
+118AD; C; 118CD; # WARANG CITI CAPITAL LETTER ENY
+118AE; C; 118CE; # WARANG CITI CAPITAL LETTER YUJ
+118AF; C; 118CF; # WARANG CITI CAPITAL LETTER UC
+118B0; C; 118D0; # WARANG CITI CAPITAL LETTER ENN
+118B1; C; 118D1; # WARANG CITI CAPITAL LETTER ODD
+118B2; C; 118D2; # WARANG CITI CAPITAL LETTER TTE
+118B3; C; 118D3; # WARANG CITI CAPITAL LETTER NUNG
+118B4; C; 118D4; # WARANG CITI CAPITAL LETTER DA
+118B5; C; 118D5; # WARANG CITI CAPITAL LETTER AT
+118B6; C; 118D6; # WARANG CITI CAPITAL LETTER AM
+118B7; C; 118D7; # WARANG CITI CAPITAL LETTER BU
+118B8; C; 118D8; # WARANG CITI CAPITAL LETTER PU
+118B9; C; 118D9; # WARANG CITI CAPITAL LETTER HIYO
+118BA; C; 118DA; # WARANG CITI CAPITAL LETTER HOLO
+118BB; C; 118DB; # WARANG CITI CAPITAL LETTER HORR
+118BC; C; 118DC; # WARANG CITI CAPITAL LETTER HAR
+118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU
+118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII
+118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO
+16E40; C; 16E60; # MEDEFAIDRIN CAPITAL LETTER M
+16E41; C; 16E61; # MEDEFAIDRIN CAPITAL LETTER S
+16E42; C; 16E62; # MEDEFAIDRIN CAPITAL LETTER V
+16E43; C; 16E63; # MEDEFAIDRIN CAPITAL LETTER W
+16E44; C; 16E64; # MEDEFAIDRIN CAPITAL LETTER ATIU
+16E45; C; 16E65; # MEDEFAIDRIN CAPITAL LETTER Z
+16E46; C; 16E66; # MEDEFAIDRIN CAPITAL LETTER KP
+16E47; C; 16E67; # MEDEFAIDRIN CAPITAL LETTER P
+16E48; C; 16E68; # MEDEFAIDRIN CAPITAL LETTER T
+16E49; C; 16E69; # MEDEFAIDRIN CAPITAL LETTER G
+16E4A; C; 16E6A; # MEDEFAIDRIN CAPITAL LETTER F
+16E4B; C; 16E6B; # MEDEFAIDRIN CAPITAL LETTER I
+16E4C; C; 16E6C; # MEDEFAIDRIN CAPITAL LETTER K
+16E4D; C; 16E6D; # MEDEFAIDRIN CAPITAL LETTER A
+16E4E; C; 16E6E; # MEDEFAIDRIN CAPITAL LETTER J
+16E4F; C; 16E6F; # MEDEFAIDRIN CAPITAL LETTER E
+16E50; C; 16E70; # MEDEFAIDRIN CAPITAL LETTER B
+16E51; C; 16E71; # MEDEFAIDRIN CAPITAL LETTER C
+16E52; C; 16E72; # MEDEFAIDRIN CAPITAL LETTER U
+16E53; C; 16E73; # MEDEFAIDRIN CAPITAL LETTER YU
+16E54; C; 16E74; # MEDEFAIDRIN CAPITAL LETTER L
+16E55; C; 16E75; # MEDEFAIDRIN CAPITAL LETTER Q
+16E56; C; 16E76; # MEDEFAIDRIN CAPITAL LETTER HP
+16E57; C; 16E77; # MEDEFAIDRIN CAPITAL LETTER NY
+16E58; C; 16E78; # MEDEFAIDRIN CAPITAL LETTER X
+16E59; C; 16E79; # MEDEFAIDRIN CAPITAL LETTER D
+16E5A; C; 16E7A; # MEDEFAIDRIN CAPITAL LETTER OE
+16E5B; C; 16E7B; # MEDEFAIDRIN CAPITAL LETTER N
+16E5C; C; 16E7C; # MEDEFAIDRIN CAPITAL LETTER R
+16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O
+16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI
+16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y
+1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF
+1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI
+1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM
+1E903; C; 1E925; # ADLAM CAPITAL LETTER MIIM
+1E904; C; 1E926; # ADLAM CAPITAL LETTER BA
+1E905; C; 1E927; # ADLAM CAPITAL LETTER SINNYIIYHE
+1E906; C; 1E928; # ADLAM CAPITAL LETTER PE
+1E907; C; 1E929; # ADLAM CAPITAL LETTER BHE
+1E908; C; 1E92A; # ADLAM CAPITAL LETTER RA
+1E909; C; 1E92B; # ADLAM CAPITAL LETTER E
+1E90A; C; 1E92C; # ADLAM CAPITAL LETTER FA
+1E90B; C; 1E92D; # ADLAM CAPITAL LETTER I
+1E90C; C; 1E92E; # ADLAM CAPITAL LETTER O
+1E90D; C; 1E92F; # ADLAM CAPITAL LETTER DHA
+1E90E; C; 1E930; # ADLAM CAPITAL LETTER YHE
+1E90F; C; 1E931; # ADLAM CAPITAL LETTER WAW
+1E910; C; 1E932; # ADLAM CAPITAL LETTER NUN
+1E911; C; 1E933; # ADLAM CAPITAL LETTER KAF
+1E912; C; 1E934; # ADLAM CAPITAL LETTER YA
+1E913; C; 1E935; # ADLAM CAPITAL LETTER U
+1E914; C; 1E936; # ADLAM CAPITAL LETTER JIIM
+1E915; C; 1E937; # ADLAM CAPITAL LETTER CHI
+1E916; C; 1E938; # ADLAM CAPITAL LETTER HA
+1E917; C; 1E939; # ADLAM CAPITAL LETTER QAAF
+1E918; C; 1E93A; # ADLAM CAPITAL LETTER GA
+1E919; C; 1E93B; # ADLAM CAPITAL LETTER NYA
+1E91A; C; 1E93C; # ADLAM CAPITAL LETTER TU
+1E91B; C; 1E93D; # ADLAM CAPITAL LETTER NHA
+1E91C; C; 1E93E; # ADLAM CAPITAL LETTER VA
+1E91D; C; 1E93F; # ADLAM CAPITAL LETTER KHA
+1E91E; C; 1E940; # ADLAM CAPITAL LETTER GBE
+1E91F; C; 1E941; # ADLAM CAPITAL LETTER ZAL
+1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO
+1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA
+'''
+
+
+def _parse_unichr(s):
+ s = int(s, 16)
+ try:
+ return compat_chr(s)
+ except ValueError:
+ # work around "unichr() arg not in range(0x10000) (narrow Python build)"
+ return ('\\U%08x' % s).decode('unicode-escape')
+
+
+_map = dict(
+ (_parse_unichr(from_), ''.join(map(_parse_unichr, to_.split(' '))))
+ for from_, type_, to_, _ in (
+ l.split('; ', 3) for l in _map_str.splitlines() if l and not l[0] == '#')
+ if type_ in ('C', 'F'))
+del _map_str
+
+
+def casefold(s):
+ assert isinstance(s, compat_str)
+ return ''.join((_map.get(c, c) for c in s))
+
+
+__all__ = [
+ 'casefold',
+]
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 6c3d49d45..ed1a33cf2 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -1,10 +1,12 @@
# coding: utf-8
from __future__ import unicode_literals
+from __future__ import division
import base64
import binascii
import collections
import ctypes
+import datetime
import email
import getpass
import io
@@ -19,14 +21,64 @@ import socket
import struct
import subprocess
import sys
+import types
import xml.etree.ElementTree
+# naming convention
+# 'compat_' + Python3_name.replace('.', '_')
+# other aliases exist for convenience and/or legacy
+
+# deal with critical unicode/str things first
+try:
+ # Python 2
+ compat_str, compat_basestring, compat_chr = (
+ unicode, basestring, unichr
+ )
+except NameError:
+ compat_str, compat_basestring, compat_chr = (
+ str, (str, bytes), chr
+ )
+
+# casefold
+try:
+ compat_str.casefold
+ compat_casefold = lambda s: s.casefold()
+except AttributeError:
+ from .casefold import casefold as compat_casefold
+
+try:
+ import collections.abc as compat_collections_abc
+except ImportError:
+ import collections as compat_collections_abc
try:
import urllib.request as compat_urllib_request
except ImportError: # Python 2
import urllib2 as compat_urllib_request
+# Also fix up lack of method arg in old Pythons
+try:
+ type(compat_urllib_request.Request('http://127.0.0.1', method='GET'))
+except TypeError:
+ def _add_init_method_arg(cls):
+
+ init = cls.__init__
+
+ def wrapped_init(self, *args, **kwargs):
+ method = kwargs.pop('method', 'GET')
+ init(self, *args, **kwargs)
+ if any(callable(x.__dict__.get('get_method')) for x in (self.__class__, self) if x != cls):
+ # allow instance or its subclass to override get_method()
+ return
+ if self.has_data() and method == 'GET':
+ method = 'POST'
+ self.get_method = types.MethodType(lambda _: method, self)
+
+ cls.__init__ = wrapped_init
+
+ _add_init_method_arg(compat_urllib_request.Request)
+ del _add_init_method_arg
+
try:
import urllib.error as compat_urllib_error
except ImportError: # Python 2
@@ -36,26 +88,32 @@ try:
import urllib.parse as compat_urllib_parse
except ImportError: # Python 2
import urllib as compat_urllib_parse
+ import urlparse as _urlparse
+ for a in dir(_urlparse):
+ if not hasattr(compat_urllib_parse, a):
+ setattr(compat_urllib_parse, a, getattr(_urlparse, a))
+ del _urlparse
-try:
- from urllib.parse import urlparse as compat_urllib_parse_urlparse
-except ImportError: # Python 2
- from urlparse import urlparse as compat_urllib_parse_urlparse
-
-try:
- import urllib.parse as compat_urlparse
-except ImportError: # Python 2
- import urlparse as compat_urlparse
+# unfavoured aliases
+compat_urlparse = compat_urllib_parse
+compat_urllib_parse_urlparse = compat_urllib_parse.urlparse
try:
import urllib.response as compat_urllib_response
except ImportError: # Python 2
import urllib as compat_urllib_response
+try:
+ compat_urllib_response.addinfourl.status
+except AttributeError:
+ # .getcode() is deprecated in Py 3.
+ compat_urllib_response.addinfourl.status = property(lambda self: self.getcode())
+
try:
import http.cookiejar as compat_cookiejar
except ImportError: # Python 2
import cookielib as compat_cookiejar
+compat_http_cookiejar = compat_cookiejar
if sys.version_info[0] == 2:
class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
@@ -67,11 +125,35 @@ if sys.version_info[0] == 2:
compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
else:
compat_cookiejar_Cookie = compat_cookiejar.Cookie
+compat_http_cookiejar_Cookie = compat_cookiejar_Cookie
try:
import http.cookies as compat_cookies
except ImportError: # Python 2
import Cookie as compat_cookies
+compat_http_cookies = compat_cookies
+
+if sys.version_info[0] == 2 or sys.version_info < (3, 3):
+ class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
+ def load(self, rawdata):
+ must_have_value = 0
+ if not isinstance(rawdata, dict):
+ if sys.version_info[:2] != (2, 7) or sys.platform.startswith('java'):
+ # attribute must have value for parsing
+ rawdata, must_have_value = re.subn(
+ r'(?i)(;\s*)(secure|httponly)(\s*(?:;|$))', r'\1\2=\2\3', rawdata)
+ if sys.version_info[0] == 2:
+ if isinstance(rawdata, compat_str):
+ rawdata = str(rawdata)
+ super(compat_cookies_SimpleCookie, self).load(rawdata)
+ if must_have_value > 0:
+ for morsel in self.values():
+ for attr in ('secure', 'httponly'):
+ if morsel.get(attr):
+ morsel[attr] = True
+else:
+ compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
+compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
try:
import html.entities as compat_html_entities
@@ -2320,39 +2402,45 @@ try:
import http.client as compat_http_client
except ImportError: # Python 2
import httplib as compat_http_client
+try:
+ compat_http_client.HTTPResponse.getcode
+except AttributeError:
+ # Py < 3.1
+ compat_http_client.HTTPResponse.getcode = lambda self: self.status
try:
from urllib.error import HTTPError as compat_HTTPError
except ImportError: # Python 2
from urllib2 import HTTPError as compat_HTTPError
+compat_urllib_HTTPError = compat_HTTPError
try:
from urllib.request import urlretrieve as compat_urlretrieve
except ImportError: # Python 2
from urllib import urlretrieve as compat_urlretrieve
+compat_urllib_request_urlretrieve = compat_urlretrieve
try:
+ from HTMLParser import (
+ HTMLParser as compat_HTMLParser,
+ HTMLParseError as compat_HTMLParseError)
+except ImportError: # Python 3
from html.parser import HTMLParser as compat_HTMLParser
-except ImportError: # Python 2
- from HTMLParser import HTMLParser as compat_HTMLParser
-
-try: # Python 2
- from HTMLParser import HTMLParseError as compat_HTMLParseError
-except ImportError: # Python <3.4
try:
from html.parser import HTMLParseError as compat_HTMLParseError
except ImportError: # Python >3.4
-
- # HTMLParseError has been deprecated in Python 3.3 and removed in
+ # HTMLParseError was deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exception handling
class compat_HTMLParseError(Exception):
pass
+compat_html_parser_HTMLParser = compat_HTMLParser
+compat_html_parser_HTMLParseError = compat_HTMLParseError
try:
- from subprocess import DEVNULL
- compat_subprocess_get_DEVNULL = lambda: DEVNULL
-except ImportError:
+ _DEVNULL = subprocess.DEVNULL
+ compat_subprocess_get_DEVNULL = lambda: _DEVNULL
+except AttributeError:
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
try:
@@ -2360,15 +2448,12 @@ try:
except ImportError:
import BaseHTTPServer as compat_http_server
-try:
- compat_str = unicode # Python 2
-except NameError:
- compat_str = str
-
try:
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
from urllib.parse import unquote as compat_urllib_parse_unquote
from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
+ from urllib.parse import urlencode as compat_urllib_parse_urlencode
+ from urllib.parse import parse_qs as compat_parse_qs
except ImportError: # Python 2
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
else re.compile(r'([\x00-\x7f]+)'))
@@ -2435,9 +2520,6 @@ except ImportError: # Python 2
string = string.replace('+', ' ')
return compat_urllib_parse_unquote(string, encoding, errors)
-try:
- from urllib.parse import urlencode as compat_urllib_parse_urlencode
-except ImportError: # Python 2
# Python 2 will choke in urlencode on mixture of byte and unicode strings.
# Possible solutions are to either port it from python 3 with all
# the friends or manually ensure input query contains only byte strings.
@@ -2459,7 +2541,62 @@ except ImportError: # Python 2
def encode_list(l):
return [encode_elem(e) for e in l]
- return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
+ return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
+
+ # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
+ # Python 2's version is apparently totally broken
+ def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
+ encoding='utf-8', errors='replace'):
+ qs, _coerce_result = qs, compat_str
+ pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+ r = []
+ for name_value in pairs:
+ if not name_value and not strict_parsing:
+ continue
+ nv = name_value.split('=', 1)
+ if len(nv) != 2:
+ if strict_parsing:
+ raise ValueError('bad query field: %r' % (name_value,))
+ # Handle case of a control-name with no equal sign
+ if keep_blank_values:
+ nv.append('')
+ else:
+ continue
+ if len(nv[1]) or keep_blank_values:
+ name = nv[0].replace('+', ' ')
+ name = compat_urllib_parse_unquote(
+ name, encoding=encoding, errors=errors)
+ name = _coerce_result(name)
+ value = nv[1].replace('+', ' ')
+ value = compat_urllib_parse_unquote(
+ value, encoding=encoding, errors=errors)
+ value = _coerce_result(value)
+ r.append((name, value))
+ return r
+
+ def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
+ encoding='utf-8', errors='replace'):
+ parsed_result = {}
+ pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
+ encoding=encoding, errors=errors)
+ for name, value in pairs:
+ if name in parsed_result:
+ parsed_result[name].append(value)
+ else:
+ parsed_result[name] = [value]
+ return parsed_result
+
+ setattr(compat_urllib_parse, '_urlencode',
+ getattr(compat_urllib_parse, 'urlencode'))
+ for name, fix in (
+ ('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes),
+ ('parse_unquote', compat_urllib_parse_unquote),
+ ('unquote_plus', compat_urllib_parse_unquote_plus),
+ ('urlencode', compat_urllib_parse_urlencode),
+ ('parse_qs', compat_parse_qs)):
+ setattr(compat_urllib_parse, name, fix)
+
+compat_urllib_parse_parse_qs = compat_parse_qs
try:
from urllib.request import DataHandler as compat_urllib_request_DataHandler
@@ -2495,21 +2632,11 @@ except ImportError: # Python < 3.4
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
-try:
- compat_basestring = basestring # Python 2
-except NameError:
- compat_basestring = str
-
-try:
- compat_chr = unichr # Python 2
-except NameError:
- compat_chr = chr
-
try:
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
except ImportError: # Python 2.6
from xml.parsers.expat import ExpatError as compat_xml_parse_error
-
+compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error
etree = xml.etree.ElementTree
@@ -2523,10 +2650,11 @@ try:
# xml.etree.ElementTree.Element is a method in Python <=2.6 and
# the following will crash with:
# TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
- isinstance(None, xml.etree.ElementTree.Element)
+ isinstance(None, etree.Element)
from xml.etree.ElementTree import Element as compat_etree_Element
except TypeError: # Python <=2.6
from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
+compat_xml_etree_ElementTree_Element = compat_etree_Element
if sys.version_info[0] >= 3:
def compat_etree_fromstring(text):
@@ -2582,6 +2710,7 @@ else:
if k == uri or v == prefix:
del etree._namespace_map[k]
etree._namespace_map[uri] = prefix
+compat_xml_etree_register_namespace = compat_etree_register_namespace
if sys.version_info < (2, 7):
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
@@ -2590,55 +2719,222 @@ if sys.version_info < (2, 7):
if isinstance(xpath, compat_str):
xpath = xpath.encode('ascii')
return xpath
+
+ # further code below based on CPython 2.7 source
+ import functools
+
+ _xpath_tokenizer_re = re.compile(r'''(?x)
+ ( # (1)
+ '[^']*'|"[^"]*"| # quoted strings, or
+ ::|//?|\.\.|\(\)|[/.*:[\]()@=] # navigation specials
+ )| # or (2)
+ ((?:\{[^}]+\})?[^/[\]()@=\s]+)| # token: optional {ns}, no specials
+ \s+ # or white space
+ ''')
+
+ def _xpath_tokenizer(pattern, namespaces=None):
+ for token in _xpath_tokenizer_re.findall(pattern):
+ tag = token[1]
+ if tag and tag[0] != "{" and ":" in tag:
+ try:
+ if not namespaces:
+ raise KeyError
+ prefix, uri = tag.split(":", 1)
+ yield token[0], "{%s}%s" % (namespaces[prefix], uri)
+ except KeyError:
+ raise SyntaxError("prefix %r not found in prefix map" % prefix)
+ else:
+ yield token
+
+ def _get_parent_map(context):
+ parent_map = context.parent_map
+ if parent_map is None:
+ context.parent_map = parent_map = {}
+ for p in context.root.getiterator():
+ for e in p:
+ parent_map[e] = p
+ return parent_map
+
+ def _select(context, result, filter_fn=lambda *_: True):
+ for elem in result:
+ for e in elem:
+ if filter_fn(e, elem):
+ yield e
+
+ def _prepare_child(next_, token):
+ tag = token[1]
+ return functools.partial(_select, filter_fn=lambda e, _: e.tag == tag)
+
+ def _prepare_star(next_, token):
+ return _select
+
+ def _prepare_self(next_, token):
+ return lambda _, result: (e for e in result)
+
+ def _prepare_descendant(next_, token):
+ token = next(next_)
+ if token[0] == "*":
+ tag = "*"
+ elif not token[0]:
+ tag = token[1]
+ else:
+ raise SyntaxError("invalid descendant")
+
+ def select(context, result):
+ for elem in result:
+ for e in elem.getiterator(tag):
+ if e is not elem:
+ yield e
+ return select
+
+ def _prepare_parent(next_, token):
+ def select(context, result):
+ # FIXME: raise error if .. is applied at toplevel?
+ parent_map = _get_parent_map(context)
+ result_map = {}
+ for elem in result:
+ if elem in parent_map:
+ parent = parent_map[elem]
+ if parent not in result_map:
+ result_map[parent] = None
+ yield parent
+ return select
+
+ def _prepare_predicate(next_, token):
+ signature = []
+ predicate = []
+ for token in next_:
+ if token[0] == "]":
+ break
+ if token[0] and token[0][:1] in "'\"":
+ token = "'", token[0][1:-1]
+ signature.append(token[0] or "-")
+ predicate.append(token[1])
+
+ def select(context, result, filter_fn=lambda _: True):
+ for elem in result:
+ if filter_fn(elem):
+ yield elem
+
+ signature = "".join(signature)
+ # use signature to determine predicate type
+ if signature == "@-":
+ # [@attribute] predicate
+ key = predicate[1]
+ return functools.partial(
+ select, filter_fn=lambda el: el.get(key) is not None)
+ if signature == "@-='":
+ # [@attribute='value']
+ key = predicate[1]
+ value = predicate[-1]
+ return functools.partial(
+ select, filter_fn=lambda el: el.get(key) == value)
+ if signature == "-" and not re.match(r"\d+$", predicate[0]):
+ # [tag]
+ tag = predicate[0]
+ return functools.partial(
+ select, filter_fn=lambda el: el.find(tag) is not None)
+ if signature == "-='" and not re.match(r"\d+$", predicate[0]):
+ # [tag='value']
+ tag = predicate[0]
+ value = predicate[-1]
+
+ def itertext(el):
+ for e in el.getiterator():
+ e = e.text
+ if e:
+ yield e
+
+ def select(context, result):
+ for elem in result:
+ for e in elem.findall(tag):
+ if "".join(itertext(e)) == value:
+ yield elem
+ break
+ return select
+ if signature == "-" or signature == "-()" or signature == "-()-":
+ # [index] or [last()] or [last()-index]
+ if signature == "-":
+ index = int(predicate[0]) - 1
+ else:
+ if predicate[0] != "last":
+ raise SyntaxError("unsupported function")
+ if signature == "-()-":
+ try:
+ index = int(predicate[2]) - 1
+ except ValueError:
+ raise SyntaxError("unsupported expression")
+ else:
+ index = -1
+
+ def select(context, result):
+ parent_map = _get_parent_map(context)
+ for elem in result:
+ try:
+ parent = parent_map[elem]
+ # FIXME: what if the selector is "*" ?
+ elems = list(parent.findall(elem.tag))
+ if elems[index] is elem:
+ yield elem
+ except (IndexError, KeyError):
+ pass
+ return select
+ raise SyntaxError("invalid predicate")
+
+ ops = {
+ "": _prepare_child,
+ "*": _prepare_star,
+ ".": _prepare_self,
+ "..": _prepare_parent,
+ "//": _prepare_descendant,
+ "[": _prepare_predicate,
+ }
+
+ _cache = {}
+
+ class _SelectorContext:
+ parent_map = None
+
+ def __init__(self, root):
+ self.root = root
+
+ ##
+ # Generate all matching objects.
+
+ def compat_etree_iterfind(elem, path, namespaces=None):
+ # compile selector pattern
+ if path[-1:] == "/":
+ path = path + "*" # implicit all (FIXME: keep this?)
+ try:
+ selector = _cache[path]
+ except KeyError:
+ if len(_cache) > 100:
+ _cache.clear()
+ if path[:1] == "/":
+ raise SyntaxError("cannot use absolute path on element")
+ tokens = _xpath_tokenizer(path, namespaces)
+ selector = []
+ for token in tokens:
+ if token[0] == "/":
+ continue
+ try:
+ selector.append(ops[token[0]](tokens, token))
+ except StopIteration:
+ raise SyntaxError("invalid path")
+ _cache[path] = selector
+ # execute selector pattern
+ result = [elem]
+ context = _SelectorContext(elem)
+ for select in selector:
+ result = select(context, result)
+ return result
+
+ # end of code based on CPython 2.7 source
+
+
else:
compat_xpath = lambda xpath: xpath
-
-try:
- from urllib.parse import parse_qs as compat_parse_qs
-except ImportError: # Python 2
- # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
- # Python 2's version is apparently totally broken
-
- def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
- qs, _coerce_result = qs, compat_str
- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
- r = []
- for name_value in pairs:
- if not name_value and not strict_parsing:
- continue
- nv = name_value.split('=', 1)
- if len(nv) != 2:
- if strict_parsing:
- raise ValueError('bad query field: %r' % (name_value,))
- # Handle case of a control-name with no equal sign
- if keep_blank_values:
- nv.append('')
- else:
- continue
- if len(nv[1]) or keep_blank_values:
- name = nv[0].replace('+', ' ')
- name = compat_urllib_parse_unquote(
- name, encoding=encoding, errors=errors)
- name = _coerce_result(name)
- value = nv[1].replace('+', ' ')
- value = compat_urllib_parse_unquote(
- value, encoding=encoding, errors=errors)
- value = _coerce_result(value)
- r.append((name, value))
- return r
-
- def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
- parsed_result = {}
- pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
- encoding=encoding, errors=errors)
- for name, value in pairs:
- if name in parsed_result:
- parsed_result[name].append(value)
- else:
- parsed_result[name] = [value]
- return parsed_result
+ compat_etree_iterfind = lambda element, match: element.iterfind(match)
compat_os_name = os._name if os.name == 'java' else os.name
@@ -2674,7 +2970,7 @@ except (AssertionError, UnicodeEncodeError):
def compat_ord(c):
- if type(c) is int:
+ if isinstance(c, int):
return c
else:
return ord(c)
@@ -2764,6 +3060,8 @@ else:
else:
compat_expanduser = os.path.expanduser
+compat_os_path_expanduser = compat_expanduser
+
if compat_os_name == 'nt' and sys.version_info < (3, 8):
# os.path.realpath on Windows does not follow symbolic links
@@ -2775,6 +3073,8 @@ if compat_os_name == 'nt' and sys.version_info < (3, 8):
else:
compat_realpath = os.path.realpath
+compat_os_path_realpath = compat_realpath
+
if sys.version_info < (3, 0):
def compat_print(s):
@@ -2795,11 +3095,15 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
else:
compat_getpass = getpass.getpass
+compat_getpass_getpass = compat_getpass
+
+
try:
compat_input = raw_input
except NameError: # Python 3
compat_input = input
+
# Python < 2.6.5 require kwargs to be bytes
try:
def _testfunc(x):
@@ -2850,6 +3154,51 @@ else:
compat_socket_create_connection = socket.create_connection
+try:
+ from contextlib import suppress as compat_contextlib_suppress
+except ImportError:
+ class compat_contextlib_suppress(object):
+ _exceptions = None
+
+ def __init__(self, *exceptions):
+ super(compat_contextlib_suppress, self).__init__()
+ # TODO: [Base]ExceptionGroup (3.12+)
+ self._exceptions = exceptions
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ return exc_type is not None and issubclass(exc_type, self._exceptions or tuple())
+
+
+# subprocess.Popen context manager
+# avoids leaking handles if .communicate() is not called
+try:
+ _Popen = subprocess.Popen
+ # check for required context manager attributes
+ _Popen.__enter__ and _Popen.__exit__
+ compat_subprocess_Popen = _Popen
+except AttributeError:
+ # not a context manager - make one
+ from contextlib import contextmanager
+
+ @contextmanager
+ def compat_subprocess_Popen(*args, **kwargs):
+ popen = None
+ try:
+ popen = _Popen(*args, **kwargs)
+ yield popen
+ finally:
+ if popen:
+ for f in (popen.stdin, popen.stdout, popen.stderr):
+ if f:
+ # repeated .close() is OK, but just in case
+ with compat_contextlib_suppress(EnvironmentError):
+ f.close()
+ popen.wait()
+
+
# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
# See http://bugs.python.org/issue9161 for what is broken
def workaround_optparse_bug9161():
@@ -2877,6 +3226,7 @@ else:
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
def compat_get_terminal_size(fallback=(80, 24)):
+ from .utils import process_communicate_or_kill
columns = compat_getenv('COLUMNS')
if columns:
columns = int(columns)
@@ -2893,7 +3243,7 @@ else:
sp = subprocess.Popen(
['stty', 'size'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- out, err = sp.communicate()
+ out, err = process_communicate_or_kill(sp)
_lines, _columns = map(int, out.split())
except Exception:
_columns, _lines = _terminal_size(*fallback)
@@ -2904,15 +3254,16 @@ else:
lines = _lines
return _terminal_size(columns, lines)
+
try:
itertools.count(start=0, step=1)
compat_itertools_count = itertools.count
except TypeError: # Python 2.6
def compat_itertools_count(start=0, step=1):
- n = start
while True:
- yield n
- n += step
+ yield start
+ start += step
+
if sys.version_info >= (3, 0):
from tokenize import tokenize as compat_tokenize_tokenize
@@ -2953,6 +3304,24 @@ else:
compat_Struct = struct.Struct
+# compat_map/filter() returning an iterator, supposedly the
+# same versioning as for zip below
+try:
+ from future_builtins import map as compat_map
+except ImportError:
+ try:
+ from itertools import imap as compat_map
+ except ImportError:
+ compat_map = map
+
+try:
+ from future_builtins import filter as compat_filter
+except ImportError:
+ try:
+ from itertools import ifilter as compat_filter
+ except ImportError:
+ compat_filter = filter
+
try:
from future_builtins import zip as compat_zip
except ImportError: # not 2.6+ or is 3.x
@@ -2962,6 +3331,82 @@ except ImportError: # not 2.6+ or is 3.x
compat_zip = zip
+# method renamed between Py2/3
+try:
+ from itertools import zip_longest as compat_itertools_zip_longest
+except ImportError:
+ from itertools import izip_longest as compat_itertools_zip_longest
+
+
+# new class in collections
+try:
+ from collections import ChainMap as compat_collections_chain_map
+ # Py3.3's ChainMap is deficient
+ if sys.version_info < (3, 4):
+ raise ImportError
+except ImportError:
+ # Py <= 3.3
+ class compat_collections_chain_map(compat_collections_abc.MutableMapping):
+
+ maps = [{}]
+
+ def __init__(self, *maps):
+ self.maps = list(maps) or [{}]
+
+ def __getitem__(self, k):
+ for m in self.maps:
+ if k in m:
+ return m[k]
+ raise KeyError(k)
+
+ def __setitem__(self, k, v):
+ self.maps[0].__setitem__(k, v)
+ return
+
+ def __contains__(self, k):
+ return any((k in m) for m in self.maps)
+
+ def __delitem(self, k):
+ if k in self.maps[0]:
+ del self.maps[0][k]
+ return
+ raise KeyError(k)
+
+ def __delitem__(self, k):
+ self.__delitem(k)
+
+ def __iter__(self):
+ return itertools.chain(*reversed(self.maps))
+
+ def __len__(self):
+ return len(iter(self))
+
+ # to match Py3, don't del directly
+ def pop(self, k, *args):
+ if self.__contains__(k):
+ off = self.__getitem__(k)
+ self.__delitem(k)
+ return off
+ elif len(args) > 0:
+ return args[0]
+ raise KeyError(k)
+
+ def new_child(self, m=None, **kwargs):
+ m = m or {}
+ m.update(kwargs)
+ return compat_collections_chain_map(m, *self.maps)
+
+ @property
+ def parents(self):
+ return compat_collections_chain_map(*(self.maps[1:]))
+
+
+# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
+compat_re_Pattern = type(re.compile(''))
+# and on the type of a match
+compat_re_Match = type(re.match('a', 'a'))
+
+
if sys.version_info < (3, 3):
def compat_b64decode(s, *args, **kwargs):
if isinstance(s, compat_str):
@@ -2970,6 +3415,8 @@ if sys.version_info < (3, 3):
else:
compat_b64decode = base64.b64decode
+compat_base64_b64decode = compat_b64decode
+
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
@@ -2989,25 +3436,97 @@ else:
return ctypes.WINFUNCTYPE(*args, **kwargs)
-__all__ = [
+if sys.version_info < (3, 0):
+ # open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None
+ def compat_open(file_, *args, **kwargs):
+ if len(args) > 6 or 'opener' in kwargs:
+ raise ValueError('open: unsupported argument "opener"')
+ return io.open(file_, *args, **kwargs)
+else:
+ compat_open = open
+
+
+# compat_register_utf8
+def compat_register_utf8():
+ if sys.platform == 'win32':
+ # https://github.com/ytdl-org/youtube-dl/issues/820
+ from codecs import register, lookup
+ register(
+ lambda name: lookup('utf-8') if name == 'cp65001' else None)
+
+
+# compat_datetime_timedelta_total_seconds
+try:
+ compat_datetime_timedelta_total_seconds = datetime.timedelta.total_seconds
+except AttributeError:
+ # Py 2.6
+ def compat_datetime_timedelta_total_seconds(td):
+ return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
+
+# optional decompression packages
+# PyPi brotli package implements 'br' Content-Encoding
+try:
+ import brotli as compat_brotli
+except ImportError:
+ compat_brotli = None
+# PyPi ncompress package implements 'compress' Content-Encoding
+try:
+ import ncompress as compat_ncompress
+except ImportError:
+ compat_ncompress = None
+
+
+legacy = [
'compat_HTMLParseError',
'compat_HTMLParser',
'compat_HTTPError',
- 'compat_Struct',
'compat_b64decode',
- 'compat_basestring',
- 'compat_chr',
'compat_cookiejar',
'compat_cookiejar_Cookie',
'compat_cookies',
- 'compat_ctypes_WINFUNCTYPE',
+ 'compat_cookies_SimpleCookie',
'compat_etree_Element',
- 'compat_etree_fromstring',
'compat_etree_register_namespace',
'compat_expanduser',
+ 'compat_getpass',
+ 'compat_parse_qs',
+ 'compat_realpath',
+ 'compat_urllib_parse_parse_qs',
+ 'compat_urllib_parse_unquote',
+ 'compat_urllib_parse_unquote_plus',
+ 'compat_urllib_parse_unquote_to_bytes',
+ 'compat_urllib_parse_urlencode',
+ 'compat_urllib_parse_urlparse',
+ 'compat_urlparse',
+ 'compat_urlretrieve',
+ 'compat_xml_parse_error',
+]
+
+
+__all__ = [
+ 'compat_html_parser_HTMLParseError',
+ 'compat_html_parser_HTMLParser',
+ 'compat_Struct',
+ 'compat_base64_b64decode',
+ 'compat_basestring',
+ 'compat_brotli',
+ 'compat_casefold',
+ 'compat_chr',
+ 'compat_collections_abc',
+ 'compat_collections_chain_map',
+ 'compat_datetime_timedelta_total_seconds',
+ 'compat_http_cookiejar',
+ 'compat_http_cookiejar_Cookie',
+ 'compat_http_cookies',
+ 'compat_http_cookies_SimpleCookie',
+ 'compat_contextlib_suppress',
+ 'compat_ctypes_WINFUNCTYPE',
+ 'compat_etree_fromstring',
+ 'compat_etree_iterfind',
+ 'compat_filter',
'compat_get_terminal_size',
'compat_getenv',
- 'compat_getpass',
+ 'compat_getpass_getpass',
'compat_html_entities',
'compat_html_entities_html5',
'compat_http_client',
@@ -3015,13 +3534,20 @@ __all__ = [
'compat_input',
'compat_integer_types',
'compat_itertools_count',
+ 'compat_itertools_zip_longest',
'compat_kwargs',
+ 'compat_map',
+ 'compat_ncompress',
'compat_numeric_types',
+ 'compat_open',
'compat_ord',
'compat_os_name',
- 'compat_parse_qs',
+ 'compat_os_path_expanduser',
+ 'compat_os_path_realpath',
'compat_print',
- 'compat_realpath',
+ 'compat_re_Match',
+ 'compat_re_Pattern',
+ 'compat_register_utf8',
'compat_setenv',
'compat_shlex_quote',
'compat_shlex_split',
@@ -3030,20 +3556,18 @@ __all__ = [
'compat_struct_pack',
'compat_struct_unpack',
'compat_subprocess_get_DEVNULL',
+ 'compat_subprocess_Popen',
'compat_tokenize_tokenize',
'compat_urllib_error',
'compat_urllib_parse',
- 'compat_urllib_parse_unquote',
- 'compat_urllib_parse_unquote_plus',
- 'compat_urllib_parse_unquote_to_bytes',
- 'compat_urllib_parse_urlencode',
- 'compat_urllib_parse_urlparse',
'compat_urllib_request',
'compat_urllib_request_DataHandler',
'compat_urllib_response',
- 'compat_urlparse',
- 'compat_urlretrieve',
- 'compat_xml_parse_error',
+ 'compat_urllib_request_urlretrieve',
+ 'compat_urllib_HTTPError',
+ 'compat_xml_etree_ElementTree_Element',
+ 'compat_xml_etree_ElementTree_ParseError',
+ 'compat_xml_etree_register_namespace',
'compat_xpath',
'compat_zip',
'workaround_optparse_bug9161',
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py
index 2e485df9d..d701d6292 100644
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -1,22 +1,31 @@
from __future__ import unicode_literals
+from ..utils import (
+ determine_protocol,
+)
+
+
+def get_suitable_downloader(info_dict, params={}):
+ info_dict['protocol'] = determine_protocol(info_dict)
+ info_copy = info_dict.copy()
+ return _get_suitable_downloader(info_copy, params)
+
+
+# Some of these require get_suitable_downloader
from .common import FileDownloader
+from .dash import DashSegmentsFD
from .f4m import F4mFD
from .hls import HlsFD
from .http import HttpFD
from .rtmp import RtmpFD
-from .dash import DashSegmentsFD
from .rtsp import RtspFD
from .ism import IsmFD
+from .niconico import NiconicoDmcFD
from .external import (
get_external_downloader,
FFmpegFD,
)
-from ..utils import (
- determine_protocol,
-)
-
PROTOCOL_MAP = {
'rtmp': RtmpFD,
'm3u8_native': HlsFD,
@@ -26,13 +35,12 @@ PROTOCOL_MAP = {
'f4m': F4mFD,
'http_dash_segments': DashSegmentsFD,
'ism': IsmFD,
+ 'niconico_dmc': NiconicoDmcFD,
}
-def get_suitable_downloader(info_dict, params={}):
+def _get_suitable_downloader(info_dict, params={}):
"""Get the downloader class that can handle the info dict."""
- protocol = determine_protocol(info_dict)
- info_dict['protocol'] = protocol
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
# return FFmpegFD
@@ -42,7 +50,11 @@ def get_suitable_downloader(info_dict, params={}):
ed = get_external_downloader(external_downloader)
if ed.can_download(info_dict):
return ed
+ # Avoid using unwanted args since external_downloader was rejected
+ if params.get('external_downloader_args'):
+ params['external_downloader_args'] = None
+ protocol = info_dict['protocol']
if protocol.startswith('m3u8') and info_dict.get('is_live'):
return FFmpegFD
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index 1cdba89cd..91e691776 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -88,17 +88,21 @@ class FileDownloader(object):
return '---.-%'
return '%6s' % ('%3.1f%%' % percent)
- @staticmethod
- def calc_eta(start, now, total, current):
+ @classmethod
+ def calc_eta(cls, start_or_rate, now_or_remaining, *args):
+ if len(args) < 2:
+ rate, remaining = (start_or_rate, now_or_remaining)
+ if None in (rate, remaining):
+ return None
+ return int(float(remaining) / rate)
+ start, now = (start_or_rate, now_or_remaining)
+ total, current = args[:2]
if total is None:
return None
if now is None:
now = time.time()
- dif = now - start
- if current == 0 or dif < 0.001: # One millisecond
- return None
- rate = float(current) / dif
- return int((float(total) - float(current)) / rate)
+ rate = cls.calc_speed(start, now, current)
+ return rate and int((float(total) - float(current)) / rate)
@staticmethod
def format_eta(eta):
@@ -123,6 +127,12 @@ class FileDownloader(object):
def format_retries(retries):
return 'inf' if retries == float('inf') else '%.0f' % retries
+ @staticmethod
+ def filesize_or_none(unencoded_filename):
+ fn = encodeFilename(unencoded_filename)
+ if os.path.isfile(fn):
+ return os.path.getsize(fn)
+
@staticmethod
def best_block_size(elapsed_time, bytes):
new_min = max(bytes / 2.0, 1.0)
@@ -329,6 +339,10 @@ class FileDownloader(object):
def download(self, filename, info_dict):
"""Download to a filename using the info from info_dict
Return True on success and False otherwise
+
+ This method filters the `Cookie` header from the info_dict to prevent leaks.
+ Downloaders have their own way of handling cookies.
+ See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
"""
nooverwrites_and_exists = (
diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
index c6d674bc6..f3c058879 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@@ -1,5 +1,7 @@
from __future__ import unicode_literals
+import itertools
+
from .fragment import FragmentFD
from ..compat import compat_urllib_error
from ..utils import (
@@ -30,26 +32,28 @@ class DashSegmentsFD(FragmentFD):
fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
- frag_index = 0
- for i, fragment in enumerate(fragments):
- frag_index += 1
+ for frag_index, fragment in enumerate(fragments, 1):
if frag_index <= ctx['fragment_index']:
continue
+ success = False
# In DASH, the first segment contains necessary headers to
# generate a valid MP4 file, so always abort for the first segment
- fatal = i == 0 or not skip_unavailable_fragments
- count = 0
- while count <= fragment_retries:
+ fatal = frag_index == 1 or not skip_unavailable_fragments
+ fragment_url = fragment.get('url')
+ if not fragment_url:
+ assert fragment_base_url
+ fragment_url = urljoin(fragment_base_url, fragment['path'])
+ headers = info_dict.get('http_headers')
+ fragment_range = fragment.get('range')
+ if fragment_range:
+ headers = headers.copy() if headers else {}
+ headers['Range'] = 'bytes=%s' % (fragment_range,)
+ for count in itertools.count():
try:
- fragment_url = fragment.get('url')
- if not fragment_url:
- assert fragment_base_url
- fragment_url = urljoin(fragment_base_url, fragment['path'])
- success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
+ success, frag_content = self._download_fragment(ctx, fragment_url, info_dict, headers)
if not success:
return False
self._append_fragment(ctx, frag_content)
- break
except compat_urllib_error.HTTPError as err:
# YouTube may often return 404 HTTP error for a fragment causing the
# whole download to fail. However if the same fragment is immediately
@@ -57,22 +61,21 @@ class DashSegmentsFD(FragmentFD):
# is usually enough) thus allowing to download the whole file successfully.
# To be future-proof we will retry all fragments that fail with any
# HTTP error.
- count += 1
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
+ if count < fragment_retries:
+ self.report_retry_fragment(err, frag_index, count + 1, fragment_retries)
+ continue
except DownloadError:
# Don't retry fragment if error occurred during HTTP downloading
- # itself since it has own retry settings
- if not fatal:
- self.report_skip_fragment(frag_index)
- break
- raise
+ # itself since it has its own retry settings
+ if fatal:
+ raise
+ break
- if count > fragment_retries:
+ if not success:
if not fatal:
self.report_skip_fragment(frag_index)
continue
- self.report_error('giving up after %s fragment retries' % fragment_retries)
+ self.report_error('giving up after %s fragment retries' % count)
return False
self._finish_frag_download(ctx)
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index c31f8910a..4fbc0f520 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -1,17 +1,24 @@
from __future__ import unicode_literals
-import os.path
+import os
import re
import subprocess
import sys
+import tempfile
import time
from .common import FileDownloader
from ..compat import (
compat_setenv,
compat_str,
+ compat_subprocess_Popen,
)
-from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
+
+try:
+ from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
+except ImportError:
+ FFmpegPostProcessor = None
+
from ..utils import (
cli_option,
cli_valueless_option,
@@ -22,6 +29,9 @@ from ..utils import (
handle_youtubedl_headers,
check_executable,
is_outdated_version,
+ process_communicate_or_kill,
+ T,
+ traverse_obj,
)
@@ -29,6 +39,7 @@ class ExternalFD(FileDownloader):
def real_download(self, filename, info_dict):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
+ self._cookies_tempfile = None
try:
started = time.time()
@@ -41,6 +52,13 @@ class ExternalFD(FileDownloader):
# should take place
retval = 0
self.to_screen('[%s] Interrupted by user' % self.get_basename())
+ finally:
+ if self._cookies_tempfile and os.path.isfile(self._cookies_tempfile):
+ try:
+ os.remove(self._cookies_tempfile)
+ except OSError:
+ self.report_warning(
+ 'Unable to delete temporary cookies file "{0}"'.format(self._cookies_tempfile))
if retval == 0:
status = {
@@ -96,6 +114,16 @@ class ExternalFD(FileDownloader):
def _configuration_args(self, default=[]):
return cli_configuration_args(self.params, 'external_downloader_args', default)
+ def _write_cookies(self):
+ if not self.ydl.cookiejar.filename:
+ tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
+ tmp_cookies.close()
+ self._cookies_tempfile = tmp_cookies.name
+ self.to_screen('[download] Writing temporary cookies file to "{0}"'.format(self._cookies_tempfile))
+ # real_download resets _cookies_tempfile; if it's None, save() will write to cookiejar.filename
+ self.ydl.cookiejar.save(self._cookies_tempfile, ignore_discard=True, ignore_expires=True)
+ return self.ydl.cookiejar.filename or self._cookies_tempfile
+
def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
@@ -104,18 +132,26 @@ class ExternalFD(FileDownloader):
p = subprocess.Popen(
cmd, stderr=subprocess.PIPE)
- _, stderr = p.communicate()
+ _, stderr = process_communicate_or_kill(p)
if p.returncode != 0:
self.to_stderr(stderr.decode('utf-8', 'replace'))
return p.returncode
+ @staticmethod
+ def _header_items(info_dict):
+ return traverse_obj(
+ info_dict, ('http_headers', T(dict.items), Ellipsis))
+
class CurlFD(ExternalFD):
AVAILABLE_OPT = '-V'
def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '--location', '-o', tmpfilename]
- for key, val in info_dict['http_headers'].items():
+ cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
+ cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+ if cookie_header:
+ cmd += ['--cookie', cookie_header]
+ for key, val in self._header_items(info_dict):
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
cmd += self._valueless_option('--silent', 'noprogress')
@@ -141,7 +177,7 @@ class CurlFD(ExternalFD):
# curl writes the progress to stderr so don't capture it.
p = subprocess.Popen(cmd)
- p.communicate()
+ process_communicate_or_kill(p)
return p.returncode
@@ -150,8 +186,11 @@ class AxelFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-o', tmpfilename]
- for key, val in info_dict['http_headers'].items():
+ for key, val in self._header_items(info_dict):
cmd += ['-H', '%s: %s' % (key, val)]
+ cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+ if cookie_header:
+ cmd += ['-H', 'Cookie: {0}'.format(cookie_header), '--max-redirect=0']
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
@@ -161,8 +200,10 @@ class WgetFD(ExternalFD):
AVAILABLE_OPT = '--version'
def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
- for key, val in info_dict['http_headers'].items():
+ cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
+ if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
+ cmd += ['--load-cookies', self._write_cookies()]
+ for key, val in self._header_items(info_dict):
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._option('--limit-rate', 'ratelimit')
retry = self._option('--tries', 'retries')
@@ -171,7 +212,10 @@ class WgetFD(ExternalFD):
retry[1] = '0'
cmd += retry
cmd += self._option('--bind-address', 'source_address')
- cmd += self._option('--proxy', 'proxy')
+ proxy = self.params.get('proxy')
+ if proxy:
+ for var in ('http_proxy', 'https_proxy'):
+ cmd += ['--execute', '%s=%s' % (var, proxy)]
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
@@ -181,24 +225,121 @@ class WgetFD(ExternalFD):
class Aria2cFD(ExternalFD):
AVAILABLE_OPT = '-v'
+ @staticmethod
+ def _aria2c_filename(fn):
+ return fn if os.path.isabs(fn) else os.path.join('.', fn)
+
def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '-c']
- cmd += self._configuration_args([
- '--min-split-size', '1M', '--max-connection-per-server', '4'])
- dn = os.path.dirname(tmpfilename)
- if dn:
- cmd += ['--dir', dn]
- cmd += ['--out', os.path.basename(tmpfilename)]
- for key, val in info_dict['http_headers'].items():
+ cmd = [self.exe, '-c',
+ '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
+ '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
+ if 'fragments' in info_dict:
+ cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
+ else:
+ cmd += ['--min-split-size', '1M']
+
+ if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
+ cmd += ['--load-cookies={0}'.format(self._write_cookies())]
+ for key, val in self._header_items(info_dict):
cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += self._configuration_args(['--max-connection-per-server', '4'])
+ cmd += ['--out', os.path.basename(tmpfilename)]
+ cmd += self._option('--max-overall-download-limit', 'ratelimit')
cmd += self._option('--interface', 'source_address')
cmd += self._option('--all-proxy', 'proxy')
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
- cmd += ['--', info_dict['url']]
+ cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
+ cmd += self._configuration_args()
+
+ # aria2c strips out spaces from the beginning/end of filenames and paths.
+ # We work around this issue by adding a "./" to the beginning of the
+ # filename and relative path, and adding a "/" at the end of the path.
+ # See: https://github.com/yt-dlp/yt-dlp/issues/276
+ # https://github.com/ytdl-org/youtube-dl/issues/20312
+ # https://github.com/aria2/aria2/issues/1373
+ dn = os.path.dirname(tmpfilename)
+ if dn:
+ cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep]
+ if 'fragments' not in info_dict:
+ cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))]
+ cmd += ['--auto-file-renaming=false']
+ if 'fragments' in info_dict:
+ cmd += ['--file-allocation=none', '--uri-selector=inorder']
+ url_list_file = '%s.frag.urls' % (tmpfilename, )
+ url_list = []
+ for frag_index, fragment in enumerate(info_dict['fragments']):
+ fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
+ url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
+ stream, _ = self.sanitize_open(url_list_file, 'wb')
+ stream.write('\n'.join(url_list).encode())
+ stream.close()
+ cmd += ['-i', self._aria2c_filename(url_list_file)]
+ else:
+ cmd += ['--', info_dict['url']]
return cmd
+class Aria2pFD(ExternalFD):
+ ''' Aria2pFD class
+ This class support to use aria2p as downloader.
+ (Aria2p, a command-line tool and Python library to interact with an aria2c daemon process
+ through JSON-RPC.)
+ It can help you to get download progress more easily.
+ To use aria2p as downloader, you need to install aria2c and aria2p, aria2p can download with pip.
+ Then run aria2c in the background and enable with the --enable-rpc option.
+ '''
+ try:
+ import aria2p
+ __avail = True
+ except ImportError:
+ __avail = False
+
+ @classmethod
+ def available(cls):
+ return cls.__avail
+
+ def _call_downloader(self, tmpfilename, info_dict):
+ aria2 = self.aria2p.API(
+ self.aria2p.Client(
+ host='http://localhost',
+ port=6800,
+ secret=''
+ )
+ )
+
+ options = {
+ 'min-split-size': '1M',
+ 'max-connection-per-server': 4,
+ 'auto-file-renaming': 'false',
+ }
+ options['dir'] = os.path.dirname(tmpfilename) or os.path.abspath('.')
+ options['out'] = os.path.basename(tmpfilename)
+ if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
+ options['load-cookies'] = self._write_cookies()
+ options['header'] = []
+ for key, val in self._header_items(info_dict):
+ options['header'].append('{0}: {1}'.format(key, val))
+ download = aria2.add_uris([info_dict['url']], options)
+ status = {
+ 'status': 'downloading',
+ 'tmpfilename': tmpfilename,
+ }
+ started = time.time()
+ while download.status in ['active', 'waiting']:
+ download = aria2.get_download(download.gid)
+ status.update({
+ 'downloaded_bytes': download.completed_length,
+ 'total_bytes': download.total_length,
+ 'elapsed': time.time() - started,
+ 'eta': download.eta.total_seconds(),
+ 'speed': download.download_speed,
+ })
+ self._hook_progress(status)
+ time.sleep(.5)
+ return download.status != 'complete'
+
+
class HttpieFD(ExternalFD):
@classmethod
def available(cls):
@@ -206,25 +347,34 @@ class HttpieFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
- for key, val in info_dict['http_headers'].items():
+ for key, val in self._header_items(info_dict):
cmd += ['%s:%s' % (key, val)]
+
+ # httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
+ # If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
+ # 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
+ # 2: https://httpie.io/docs/cli/sessions
+ cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+ if cookie_header:
+ cmd += ['Cookie:%s' % cookie_header]
return cmd
class FFmpegFD(ExternalFD):
@classmethod
def supports(cls, info_dict):
- return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
+ return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms', 'http_dash_segments')
@classmethod
def available(cls):
- return FFmpegPostProcessor().available
+ # actual availability can only be confirmed for an instance
+ return bool(FFmpegPostProcessor)
def _call_downloader(self, tmpfilename, info_dict):
- url = info_dict['url']
- ffpp = FFmpegPostProcessor(downloader=self)
+ # `downloader` means the parent `YoutubeDL`
+ ffpp = FFmpegPostProcessor(downloader=self.ydl)
if not ffpp.available:
- self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
+ self.report_error('ffmpeg required for download but no ffmpeg (nor avconv) executable could be found. Please install one.')
return False
ffpp.check_version()
@@ -253,7 +403,15 @@ class FFmpegFD(ExternalFD):
# if end_time:
# args += ['-t', compat_str(end_time - start_time)]
- if info_dict['http_headers'] and re.match(r'^https?://', url):
+ url = info_dict['url']
+ cookies = self.ydl.cookiejar.get_cookies_for_url(url)
+ if cookies:
+ args.extend(['-cookies', ''.join(
+ '{0}={1}; path={2}; domain={3};\r\n'.format(
+ cookie.name, cookie.value, cookie.path, cookie.domain)
+ for cookie in cookies)])
+
+ if info_dict.get('http_headers') and re.match(r'^https?://', url):
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
headers = handle_youtubedl_headers(info_dict['http_headers'])
@@ -333,18 +491,25 @@ class FFmpegFD(ExternalFD):
self._debug_cmd(args)
- proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
- try:
- retval = proc.wait()
- except KeyboardInterrupt:
- # subprocces.run would send the SIGKILL signal to ffmpeg and the
- # mp4 file couldn't be played, but if we ask ffmpeg to quit it
- # produces a file that is playable (this is mostly useful for live
- # streams). Note that Windows is not affected and produces playable
- # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
- if sys.platform != 'win32':
- proc.communicate(b'q')
- raise
+ # From [1], a PIPE opened in Popen() should be closed, unless
+ # .communicate() is called. Avoid leaking any PIPEs by using Popen
+ # as a context manager (newer Python 3.x and compat)
+ # Fixes "Resource Warning" in test/test_downloader_external.py
+ # [1] https://devpress.csdn.net/python/62fde12d7e66823466192e48.html
+ with compat_subprocess_Popen(args, stdin=subprocess.PIPE, env=env) as proc:
+ try:
+ retval = proc.wait()
+ except BaseException as e:
+ # subprocess.run would send the SIGKILL signal to ffmpeg and the
+ # mp4 file couldn't be played, but if we ask ffmpeg to quit it
+ # produces a file that is playable (this is mostly useful for live
+ # streams). Note that Windows is not affected and produces playable
+ # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
+ if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
+ process_communicate_or_kill(proc, b'q')
+ else:
+ proc.kill()
+ raise
return retval
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
index 35c76feba..913e91b64 100644
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@@ -71,7 +71,7 @@ class FragmentFD(FileDownloader):
@staticmethod
def __do_ytdl_file(ctx):
- return not ctx['live'] and not ctx['tmpfilename'] == '-'
+ return ctx['live'] is not True and ctx['tmpfilename'] != '-'
def _read_ytdl_file(self, ctx):
assert 'ytdl_corrupt' not in ctx
@@ -101,6 +101,13 @@ class FragmentFD(FileDownloader):
'url': frag_url,
'http_headers': headers or info_dict.get('http_headers'),
}
+ frag_resume_len = 0
+ if ctx['dl'].params.get('continuedl', True):
+ frag_resume_len = self.filesize_or_none(
+ self.temp_name(fragment_filename))
+ fragment_info_dict['frag_resume_len'] = frag_resume_len
+ ctx['frag_resume_len'] = frag_resume_len or 0
+
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:
return False, None
@@ -124,9 +131,7 @@ class FragmentFD(FileDownloader):
del ctx['fragment_filename_sanitized']
def _prepare_frag_download(self, ctx):
- if 'live' not in ctx:
- ctx['live'] = False
- if not ctx['live']:
+ if not ctx.setdefault('live', False):
total_frags_str = '%d' % ctx['total_frags']
ad_frags = ctx.get('ad_frags', 0)
if ad_frags:
@@ -136,10 +141,11 @@ class FragmentFD(FileDownloader):
self.to_screen(
'[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
self.report_destination(ctx['filename'])
+ continuedl = self.params.get('continuedl', True)
dl = HttpQuietDownloader(
self.ydl,
{
- 'continuedl': True,
+ 'continuedl': continuedl,
'quiet': True,
'noprogress': True,
'ratelimit': self.params.get('ratelimit'),
@@ -150,12 +156,11 @@ class FragmentFD(FileDownloader):
)
tmpfilename = self.temp_name(ctx['filename'])
open_mode = 'wb'
- resume_len = 0
# Establish possible resume length
- if os.path.isfile(encodeFilename(tmpfilename)):
+ resume_len = self.filesize_or_none(tmpfilename) or 0
+ if resume_len > 0:
open_mode = 'ab'
- resume_len = os.path.getsize(encodeFilename(tmpfilename))
# Should be initialized before ytdl file check
ctx.update({
@@ -164,7 +169,8 @@ class FragmentFD(FileDownloader):
})
if self.__do_ytdl_file(ctx):
- if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
+ ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
+ if continuedl and ytdl_file_exists:
self._read_ytdl_file(ctx)
is_corrupt = ctx.get('ytdl_corrupt') is True
is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
@@ -178,7 +184,12 @@ class FragmentFD(FileDownloader):
if 'ytdl_corrupt' in ctx:
del ctx['ytdl_corrupt']
self._write_ytdl_file(ctx)
+
else:
+ if not continuedl:
+ if ytdl_file_exists:
+ self._read_ytdl_file(ctx)
+ ctx['fragment_index'] = resume_len = 0
self._write_ytdl_file(ctx)
assert ctx['fragment_index'] == 0
@@ -209,6 +220,7 @@ class FragmentFD(FileDownloader):
start = time.time()
ctx.update({
'started': start,
+ 'fragment_started': start,
# Amount of fragment's bytes downloaded by the time of the previous
# frag progress hook invocation
'prev_frag_downloaded_bytes': 0,
@@ -218,6 +230,9 @@ class FragmentFD(FileDownloader):
if s['status'] not in ('downloading', 'finished'):
return
+ if not total_frags and ctx.get('fragment_count'):
+ state['fragment_count'] = ctx['fragment_count']
+
time_now = time.time()
state['elapsed'] = time_now - start
frag_total_bytes = s.get('total_bytes') or 0
@@ -232,16 +247,17 @@ class FragmentFD(FileDownloader):
ctx['fragment_index'] = state['fragment_index']
state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
+ ctx['speed'] = state['speed'] = self.calc_speed(
+ ctx['fragment_started'], time_now, frag_total_bytes)
+ ctx['fragment_started'] = time.time()
ctx['prev_frag_downloaded_bytes'] = 0
else:
frag_downloaded_bytes = s['downloaded_bytes']
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
+ ctx['speed'] = state['speed'] = self.calc_speed(
+ ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx['frag_resume_len'])
if not ctx['live']:
- state['eta'] = self.calc_eta(
- start, time_now, estimated_size - resume_len,
- state['downloaded_bytes'] - resume_len)
- state['speed'] = s.get('speed') or ctx.get('speed')
- ctx['speed'] = state['speed']
+ state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
self._hook_progress(state)
@@ -268,7 +284,7 @@ class FragmentFD(FileDownloader):
os.utime(ctx['filename'], (time.time(), filetime))
except Exception:
pass
- downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
+ downloaded_bytes = self.filesize_or_none(ctx['filename']) or 0
self._hook_progress({
'downloaded_bytes': downloaded_bytes,
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index d8ac41dcc..3cad87420 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -58,9 +58,9 @@ class HttpFD(FileDownloader):
if self.params.get('continuedl', True):
# Establish possible resume length
- if os.path.isfile(encodeFilename(ctx.tmpfilename)):
- ctx.resume_len = os.path.getsize(
- encodeFilename(ctx.tmpfilename))
+ ctx.resume_len = info_dict.get('frag_resume_len')
+ if ctx.resume_len is None:
+ ctx.resume_len = self.filesize_or_none(ctx.tmpfilename) or 0
ctx.is_resume = ctx.resume_len > 0
@@ -115,9 +115,9 @@ class HttpFD(FileDownloader):
raise RetryDownload(err)
raise err
# When trying to resume, Content-Range HTTP header of response has to be checked
- # to match the value of requested Range HTTP header. This is due to a webservers
+ # to match the value of requested Range HTTP header. This is due to webservers
# that don't support resuming and serve a whole file with no Content-Range
- # set in response despite of requested Range (see
+ # set in response despite requested Range (see
# https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
if has_range:
content_range = ctx.data.headers.get('Content-Range')
@@ -141,7 +141,8 @@ class HttpFD(FileDownloader):
# Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload
- self.report_unable_to_resume()
+ if range_start > 0:
+ self.report_unable_to_resume()
ctx.resume_len = 0
ctx.open_mode = 'wb'
ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
@@ -293,10 +294,7 @@ class HttpFD(FileDownloader):
# Progress message
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
- if ctx.data_len is None:
- eta = None
- else:
- eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
+ eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - byte_counter))
self._hook_progress({
'status': 'downloading',
diff --git a/youtube_dl/downloader/niconico.py b/youtube_dl/downloader/niconico.py
new file mode 100644
index 000000000..6392c9989
--- /dev/null
+++ b/youtube_dl/downloader/niconico.py
@@ -0,0 +1,66 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+try:
+ import threading
+except ImportError:
+ threading = None
+
+from .common import FileDownloader
+from ..downloader import get_suitable_downloader
+from ..extractor.niconico import NiconicoIE
+from ..utils import sanitized_Request
+
+
+class NiconicoDmcFD(FileDownloader):
+ """ Downloading niconico douga from DMC with heartbeat """
+
+ FD_NAME = 'niconico_dmc'
+
+ def real_download(self, filename, info_dict):
+ self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
+
+ ie = NiconicoIE(self.ydl)
+ info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
+
+ fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)
+ for ph in self._progress_hooks:
+ fd.add_progress_hook(ph)
+
+ if not threading:
+ self.to_screen('[%s] Threading for Heartbeat not available' % self.FD_NAME)
+ return fd.real_download(filename, info_dict)
+
+ success = download_complete = False
+ timer = [None]
+ heartbeat_lock = threading.Lock()
+ heartbeat_url = heartbeat_info_dict['url']
+ heartbeat_data = heartbeat_info_dict['data'].encode()
+ heartbeat_interval = heartbeat_info_dict.get('interval', 30)
+
+ request = sanitized_Request(heartbeat_url, heartbeat_data)
+
+ def heartbeat():
+ try:
+ self.ydl.urlopen(request).read()
+ except Exception:
+ self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
+
+ with heartbeat_lock:
+ if not download_complete:
+ timer[0] = threading.Timer(heartbeat_interval, heartbeat)
+ timer[0].start()
+
+ heartbeat_info_dict['ping']()
+ self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
+ try:
+ heartbeat()
+ if type(fd).__name__ == 'HlsFD':
+ info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
+ success = fd.real_download(filename, info_dict)
+ finally:
+ if heartbeat_lock:
+ with heartbeat_lock:
+ timer[0].cancel()
+ download_complete = True
+ return success
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py
index fbb7f51b0..8a25dbc8d 100644
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -89,11 +89,13 @@ class RtmpFD(FileDownloader):
self.to_screen('')
cursor_in_new_line = True
self.to_screen('[rtmpdump] ' + line)
- finally:
+ if not cursor_in_new_line:
+ self.to_screen('')
+ return proc.wait()
+ except BaseException: # Including KeyboardInterrupt
+ proc.kill()
proc.wait()
- if not cursor_in_new_line:
- self.to_screen('')
- return proc.returncode
+ raise
url = info_dict['url']
player_url = info_dict.get('player_url')
diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py
index 8b407bf9c..908c83377 100644
--- a/youtube_dl/extractor/abcnews.py
+++ b/youtube_dl/extractor/abcnews.py
@@ -1,14 +1,15 @@
# coding: utf-8
from __future__ import unicode_literals
-import calendar
import re
-import time
from .amp import AMPIE
from .common import InfoExtractor
-from .youtube import YoutubeIE
-from ..compat import compat_urlparse
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+ try_get,
+)
class AbcNewsVideoIE(AMPIE):
@@ -18,8 +19,8 @@ class AbcNewsVideoIE(AMPIE):
(?:
abcnews\.go\.com/
(?:
- [^/]+/video/(?P[0-9a-z-]+)-|
- video/embed\?.*?\bid=
+ (?:[^/]+/)*video/(?P[0-9a-z-]+)-|
+ video/(?:embed|itemfeed)\?.*?\bid=
)|
fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
)
@@ -36,6 +37,8 @@ class AbcNewsVideoIE(AMPIE):
'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
'duration': 180,
'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1380454200,
+ 'upload_date': '20130929',
},
'params': {
# m3u8 download
@@ -47,6 +50,12 @@ class AbcNewsVideoIE(AMPIE):
}, {
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
'only_matching': True,
+ }, {
+ 'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -67,28 +76,23 @@ class AbcNewsIE(InfoExtractor):
_VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P[0-9a-z-]+)/story\?id=(?P\d+)'
_TESTS = [{
- 'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
+ # Youtube Embeds
+ 'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
'info_dict': {
- 'id': '10505354',
- 'ext': 'flv',
- 'display_id': 'dramatic-video-rare-death-job-america',
- 'title': 'Occupational Hazards',
- 'description': 'Nightline investigates the dangers that lurk at various jobs.',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20100428',
- 'timestamp': 1272412800,
+ 'id': '51286501',
+ 'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
+ 'description': 'Billingsley went from a child actor to Hollywood power player.',
},
- 'add_ie': ['AbcNewsVideo'],
+ 'playlist_count': 5,
}, {
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
'info_dict': {
'id': '38897857',
'ext': 'mp4',
- 'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
'title': 'Justin Timberlake Drops Hints For Secret Single',
'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
- 'upload_date': '20160515',
- 'timestamp': 1463329500,
+ 'upload_date': '20160505',
+ 'timestamp': 1462442280,
},
'params': {
# m3u8 download
@@ -100,49 +104,55 @@ class AbcNewsIE(InfoExtractor):
}, {
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
'only_matching': True,
+ }, {
+ # inline.type == 'video'
+ 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('display_id')
- video_id = mobj.group('id')
+ story_id = self._match_id(url)
+ webpage = self._download_webpage(url, story_id)
+ story = self._parse_json(self._search_regex(
+ r"window\['__abcnews__'\]\s*=\s*({.+?});",
+ webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0]
+ article_contents = story.get('articleContents') or {}
- webpage = self._download_webpage(url, video_id)
- video_url = self._search_regex(
- r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
- full_video_url = compat_urlparse.urljoin(url, video_url)
+ def entries():
+ featured_video = story.get('featuredVideo') or {}
+ feed = try_get(featured_video, lambda x: x['video']['feed'])
+ if feed:
+ yield {
+ '_type': 'url',
+ 'id': featured_video.get('id'),
+ 'title': featured_video.get('name'),
+ 'url': feed,
+ 'thumbnail': featured_video.get('images'),
+ 'description': featured_video.get('description'),
+ 'timestamp': parse_iso8601(featured_video.get('uploadDate')),
+ 'duration': parse_duration(featured_video.get('duration')),
+ 'ie_key': AbcNewsVideoIE.ie_key(),
+ }
- youtube_url = YoutubeIE._extract_url(webpage)
+ for inline in (article_contents.get('inlines') or []):
+ inline_type = inline.get('type')
+ if inline_type == 'iframe':
+ iframe_url = try_get(inline, lambda x: x['attrs']['src'])
+ if iframe_url:
+ yield self.url_result(iframe_url)
+ elif inline_type == 'video':
+ video_id = inline.get('id')
+ if video_id:
+ yield {
+ '_type': 'url',
+ 'id': video_id,
+ 'url': 'http://abcnews.go.com/video/embed?id=' + video_id,
+ 'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'),
+ 'description': inline.get('description'),
+ 'duration': parse_duration(inline.get('duration')),
+ 'ie_key': AbcNewsVideoIE.ie_key(),
+ }
- timestamp = None
- date_str = self._html_search_regex(
- r']+class="timestamp">([^<]+)',
- webpage, 'timestamp', fatal=False)
- if date_str:
- tz_offset = 0
- if date_str.endswith(' ET'): # Eastern Time
- tz_offset = -5
- date_str = date_str[:-3]
- date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
- for date_format in date_formats:
- try:
- timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
- except ValueError:
- continue
- if timestamp is not None:
- timestamp -= tz_offset * 3600
-
- entry = {
- '_type': 'url_transparent',
- 'ie_key': AbcNewsVideoIE.ie_key(),
- 'url': full_video_url,
- 'id': video_id,
- 'display_id': display_id,
- 'timestamp': timestamp,
- }
-
- if youtube_url:
- entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
- return self.playlist_result(entries)
-
- return entry
+ return self.playlist_result(
+ entries(), story_id, article_contents.get('headline'),
+ article_contents.get('subHead'))
diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py
index d611ee237..5ff419f19 100644
--- a/youtube_dl/extractor/adn.py
+++ b/youtube_dl/extractor/adn.py
@@ -26,34 +26,42 @@ from ..utils import (
strip_or_none,
try_get,
unified_strdate,
+ urlencode_postdata,
)
class ADNIE(InfoExtractor):
- IE_DESC = 'Anime Digital Network'
- _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P\d+)'
- _TEST = {
- 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
- 'md5': '0319c99885ff5547565cacb4f3f9348d',
+ IE_DESC = 'Animation Digital Network'
+ _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P\d+)'
+ _TESTS = [{
+ 'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
+ 'md5': '1c9ef066ceb302c86f80c2b371615261',
'info_dict': {
- 'id': '7778',
+ 'id': '9841',
'ext': 'mp4',
- 'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
- 'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
- 'series': 'Blue Exorcist - Kyôto Saga',
- 'duration': 1467,
- 'release_date': '20170106',
+ 'title': 'Fruits Basket - Episode 1',
+ 'description': 'md5:14be2f72c3c96809b0ca424b0097d336',
+ 'series': 'Fruits Basket',
+ 'duration': 1437,
+ 'release_date': '20190405',
'comment_count': int,
'average_rating': float,
- 'season_number': 2,
- 'episode': 'Début des hostilités',
+ 'season_number': 1,
+ 'episode': 'À ce soir !',
'episode_number': 1,
- }
- }
+ },
+ 'skip': 'Only available in region (FR, ...)',
+ }, {
+ 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
+ 'only_matching': True,
+ }]
- _BASE_URL = 'http://animedigitalnetwork.fr'
- _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
+ _NETRC_MACHINE = 'animationdigitalnetwork'
+ _BASE = 'animationdigitalnetwork.fr'
+ _API_BASE_URL = 'https://gw.api.' + _BASE + '/'
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
+ _HEADERS = {}
+ _LOGIN_ERR_MESSAGE = 'Unable to log in'
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
_POS_ALIGN_MAP = {
'start': 1,
@@ -78,14 +86,14 @@ class ADNIE(InfoExtractor):
if subtitle_location:
enc_subtitles = self._download_webpage(
subtitle_location, video_id, 'Downloading subtitles data',
- fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
+ fatal=False, headers={'Origin': 'https://' + self._BASE})
if not enc_subtitles:
return None
- # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
+ # http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
- bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
+ bytes_to_intlist(binascii.unhexlify(self._K + '7fac1178830cfe0c')),
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
))
subtitles_json = self._parse_json(
@@ -129,19 +137,43 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
}])
return subtitles
+ def _real_initialize(self):
+ username, password = self._get_login_info()
+ if not username:
+ return
+ try:
+ url = self._API_BASE_URL + 'authentication/login'
+ access_token = (self._download_json(
+ url, None, 'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
+ data=urlencode_postdata({
+ 'password': password,
+ 'rememberMe': False,
+ 'source': 'Web',
+ 'username': username,
+ })) or {}).get('accessToken')
+ if access_token:
+ self._HEADERS = {'authorization': 'Bearer ' + access_token}
+ except ExtractorError as e:
+ message = None
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ resp = self._parse_json(
+ self._webpage_read_content(e.cause, url, username),
+ username, fatal=False) or {}
+ message = resp.get('message') or resp.get('code')
+ self.report_warning(message or self._LOGIN_ERR_MESSAGE)
+
def _real_extract(self, url):
video_id = self._match_id(url)
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
player = self._download_json(
video_base_url + 'configuration', video_id,
- 'Downloading player config JSON metadata')['player']
+ 'Downloading player config JSON metadata',
+ headers=self._HEADERS)['player']
options = player['options']
user = options['user']
if not user.get('hasAccess'):
- raise ExtractorError(
- 'This video is only available for paying users', expected=True)
- # self.raise_login_required() # FIXME: Login is not implemented
+ self.raise_login_required()
token = self._download_json(
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
@@ -184,12 +216,13 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
# This usually goes away with a different random pkcs1pad, so retry
continue
- error = self._parse_json(e.cause.read(), video_id)
+ error = self._parse_json(
+ self._webpage_read_content(e.cause, links_url, video_id),
+ video_id, fatal=False) or {}
message = error.get('message')
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
self.raise_geo_restricted(msg=message)
- else:
- raise ExtractorError(message)
+ raise ExtractorError(message)
else:
raise ExtractorError('Giving up retrying')
diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py
index a5d88ebbe..59fbe048a 100644
--- a/youtube_dl/extractor/aenetworks.py
+++ b/youtube_dl/extractor/aenetworks.py
@@ -8,6 +8,8 @@ from ..utils import (
ExtractorError,
GeoRestrictedError,
int_or_none,
+ remove_start,
+ traverse_obj,
update_url_query,
urlencode_postdata,
)
@@ -20,8 +22,8 @@ class AENetworksBaseIE(ThePlatformIE):
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
fyi\.tv
)/'''
- _THEPLATFORM_KEY = 'crazyjava'
- _THEPLATFORM_SECRET = 's3cr3t'
+ _THEPLATFORM_KEY = '43jXaGRQud'
+ _THEPLATFORM_SECRET = 'S10BPXHMlb'
_DOMAIN_MAP = {
'history.com': ('HISTORY', 'history'),
'aetv.com': ('AETV', 'aetv'),
@@ -33,14 +35,17 @@ class AENetworksBaseIE(ThePlatformIE):
}
def _extract_aen_smil(self, smil_url, video_id, auth=None):
- query = {'mbr': 'true'}
+ query = {
+ 'mbr': 'true',
+ 'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
+ }
if auth:
query['auth'] = auth
TP_SMIL_QUERY = [{
'assetTypes': 'high_video_ak',
- 'switch': 'hls_high_ak'
+ 'switch': 'hls_high_ak',
}, {
- 'assetTypes': 'high_video_s3'
+ 'assetTypes': 'high_video_s3',
}, {
'assetTypes': 'high_video_s3',
'switch': 'hls_high_fastly',
@@ -75,7 +80,14 @@ class AENetworksBaseIE(ThePlatformIE):
requestor_id, brand = self._DOMAIN_MAP[domain]
result = self._download_json(
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
- filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
+ filter_value, query={'filter[%s]' % filter_key: filter_value})
+ result = traverse_obj(
+ result, ('results',
+ lambda k, v: k == 0 and v[filter_key] == filter_value),
+ get_all=False)
+ if not result:
+ raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
+ video_id=remove_start(filter_value, '/'))
title = result['title']
video_id = result['id']
media_url = result['publicUrl']
@@ -126,7 +138,7 @@ class AENetworksIE(AENetworksBaseIE):
'skip_download': True,
},
'add_ie': ['ThePlatform'],
- 'skip': 'This video is only available for users of participating TV providers.',
+ 'skip': 'Geo-restricted - This content is not available in your location.'
}, {
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
'info_dict': {
@@ -143,6 +155,7 @@ class AENetworksIE(AENetworksBaseIE):
'skip_download': True,
},
'add_ie': ['ThePlatform'],
+ 'skip': 'This video is only available for users of participating TV providers.',
}, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
'only_matching': True
@@ -252,7 +265,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
_TESTS = [{
'url': 'http://www.history.com/shows/ancient-aliens',
'info_dict': {
- 'id': 'SH012427480000',
+ 'id': 'SERIES1574',
'title': 'Ancient Aliens',
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
},
diff --git a/youtube_dl/extractor/aliexpress.py b/youtube_dl/extractor/aliexpress.py
index 6f241e683..9722fe9ac 100644
--- a/youtube_dl/extractor/aliexpress.py
+++ b/youtube_dl/extractor/aliexpress.py
@@ -18,7 +18,7 @@ class AliExpressLiveIE(InfoExtractor):
'id': '2800002704436634',
'ext': 'mp4',
'title': 'CASIMA7.22',
- 'thumbnail': r're:http://.*\.jpg',
+ 'thumbnail': r're:https?://.*\.jpg',
'uploader': 'CASIMA Official Store',
'timestamp': 1500717600,
'upload_date': '20170722',
diff --git a/youtube_dl/extractor/aljazeera.py b/youtube_dl/extractor/aljazeera.py
index c68be3134..c4f915a3c 100644
--- a/youtube_dl/extractor/aljazeera.py
+++ b/youtube_dl/extractor/aljazeera.py
@@ -1,13 +1,16 @@
from __future__ import unicode_literals
+import json
+import re
+
from .common import InfoExtractor
class AlJazeeraIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P[^/]+)\.html'
+ _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?Pprogram/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P[^/?]+)'
_TESTS = [{
- 'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
+ 'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance',
'info_dict': {
'id': '3792260579001',
'ext': 'mp4',
@@ -20,14 +23,34 @@ class AlJazeeraIE(InfoExtractor):
'add_ie': ['BrightcoveNew'],
'skip': 'Not accessible from Travis CI server',
}, {
- 'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
+ 'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art',
'only_matching': True,
}]
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
def _real_extract(self, url):
- program_name = self._match_id(url)
- webpage = self._download_webpage(url, program_name)
- brightcove_id = self._search_regex(
- r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
- return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
+ post_type, name = re.match(self._VALID_URL, url).groups()
+ post_type = {
+ 'features': 'post',
+ 'program': 'episode',
+ 'videos': 'video',
+ }[post_type.split('/')[0]]
+ video = self._download_json(
+ 'https://www.aljazeera.com/graphql', name, query={
+ 'operationName': 'SingleArticleQuery',
+ 'variables': json.dumps({
+ 'name': name,
+ 'postType': post_type,
+ }),
+ }, headers={
+ 'wp-site': 'aje',
+ })['data']['article']['video']
+ video_id = video['id']
+ account_id = video.get('accountId') or '665003303001'
+ player_id = video.get('playerId') or 'BkeSH5BDb'
+ return self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
+ 'BrightcoveNew', video_id)
diff --git a/youtube_dl/extractor/alsace20tv.py b/youtube_dl/extractor/alsace20tv.py
new file mode 100644
index 000000000..228cec3ec
--- /dev/null
+++ b/youtube_dl/extractor/alsace20tv.py
@@ -0,0 +1,89 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ dict_get,
+ get_element_by_class,
+ int_or_none,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class Alsace20TVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?alsace20\.tv/(?:[\w-]+/)+[\w-]+-(?P[\w]+)'
+ _TESTS = [{
+ 'url': 'https://www.alsace20.tv/VOD/Actu/JT/Votre-JT-jeudi-3-fevrier-lyNHCXpYJh.html',
+ # 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
+ 'info_dict': {
+ 'id': 'lyNHCXpYJh',
+ 'ext': 'mp4',
+ 'description': 'md5:fc0bc4a0692d3d2dba4524053de4c7b7',
+ 'title': 'Votre JT du jeudi 3 février',
+ 'upload_date': '20220203',
+ 'thumbnail': r're:https?://.+\.jpg',
+ 'duration': 1073,
+ 'view_count': int,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }]
+
+ def _extract_video(self, video_id, url=None):
+ info = self._download_json(
+ 'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
+ video_id) or {}
+ title = info['titre']
+
+ formats = []
+ for res, fmt_url in (info.get('files') or {}).items():
+ formats.extend(
+ self._extract_smil_formats(fmt_url, video_id, fatal=False)
+ if '/smil:_' in fmt_url
+ else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
+ self._sort_formats(formats)
+
+ webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
+ thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
+ upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
+ upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': clean_html(get_element_by_class('wysiwyg', webpage)),
+ 'upload_date': upload_date,
+ 'thumbnail': thumbnail,
+ 'duration': int_or_none(self._og_search_property('video:duration', webpage) if webpage else None),
+ 'view_count': int_or_none(info.get('nb_vues')),
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self._extract_video(video_id, url)
+
+
+class Alsace20TVEmbedIE(Alsace20TVIE):
+ _VALID_URL = r'https?://(?:www\.)?alsace20\.tv/emb/(?P[\w]+)'
+ _TESTS = [{
+ 'url': 'https://www.alsace20.tv/emb/lyNHCXpYJh',
+ # 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
+ 'info_dict': {
+ 'id': 'lyNHCXpYJh',
+ 'ext': 'mp4',
+ 'title': 'Votre JT du jeudi 3 février',
+ 'upload_date': '20220203',
+ 'thumbnail': r're:https?://.+\.jpg',
+ 'view_count': int,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self._extract_video(video_id)
diff --git a/youtube_dl/extractor/americastestkitchen.py b/youtube_dl/extractor/americastestkitchen.py
index e20f00fc3..08d3604e9 100644
--- a/youtube_dl/extractor/americastestkitchen.py
+++ b/youtube_dl/extractor/americastestkitchen.py
@@ -1,18 +1,21 @@
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
+ int_or_none,
try_get,
unified_strdate,
+ unified_timestamp,
)
class AmericasTestKitchenIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?Pepisode|videos)/(?P\d+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?:cooks(?:country|illustrated)/)?(?Pepisode|videos)/(?P\d+)'
_TESTS = [{
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
'md5': 'b861c3e365ac38ad319cfd509c30577f',
@@ -20,15 +23,46 @@ class AmericasTestKitchenIE(InfoExtractor):
'id': '5b400b9ee338f922cb06450c',
'title': 'Japanese Suppers',
'ext': 'mp4',
+ 'display_id': 'weeknight-japanese-suppers',
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
- 'thumbnail': r're:^https?://',
- 'timestamp': 1523664000,
- 'upload_date': '20180414',
- 'release_date': '20180410',
+ 'timestamp': 1523304000,
+ 'upload_date': '20180409',
+ 'release_date': '20180409',
'series': "America's Test Kitchen",
+ 'season': 'Season 18',
'season_number': 18,
'episode': 'Japanese Suppers',
'episode_number': 15,
+ 'duration': 1376,
+ 'thumbnail': r're:^https?://',
+ 'average_rating': 0,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # Metadata parsing behaves differently for newer episodes (705) as opposed to older episodes (582 above)
+ 'url': 'https://www.americastestkitchen.com/episode/705-simple-chicken-dinner',
+ 'md5': '06451608c57651e985a498e69cec17e5',
+ 'info_dict': {
+ 'id': '5fbe8c61bda2010001c6763b',
+ 'title': 'Simple Chicken Dinner',
+ 'ext': 'mp4',
+ 'display_id': 'atktv_2103_simple-chicken-dinner_full-episode_web-mp4',
+ 'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
+ 'timestamp': 1610737200,
+ 'upload_date': '20210115',
+ 'release_date': '20210115',
+ 'series': "America's Test Kitchen",
+ 'season': 'Season 21',
+ 'season_number': 21,
+ 'episode': 'Simple Chicken Dinner',
+ 'episode_number': 3,
+ 'duration': 1397,
+ 'thumbnail': r're:^https?://',
+ 'view_count': int,
+ 'average_rating': 0,
},
'params': {
'skip_download': True,
@@ -36,6 +70,12 @@ class AmericasTestKitchenIE(InfoExtractor):
}, {
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
'only_matching': True,
+ }, {
+ 'url': 'https://www.americastestkitchen.com/cookscountry/episode/564-when-only-chocolate-will-do',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
+ 'only_matching': True,
}, {
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
'only_matching': True,
@@ -60,7 +100,121 @@ class AmericasTestKitchenIE(InfoExtractor):
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
'ie_key': 'Zype',
'description': clean_html(video.get('description')),
+ 'timestamp': unified_timestamp(video.get('publishDate')),
'release_date': unified_strdate(video.get('publishDate')),
+ 'episode_number': int_or_none(episode.get('number')),
+ 'season_number': int_or_none(episode.get('season')),
'series': try_get(episode, lambda x: x['show']['title']),
'episode': episode.get('title'),
}
+
+
+class AmericasTestKitchenSeasonIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?Pamericastestkitchen|(?Pcooks(?:country|illustrated)))\.com(?:(?:/(?Pcooks(?:country|illustrated)))?(?:/?$|(?\d+)))'
+ _TESTS = [{
+ # ATK Season
+ 'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
+ 'info_dict': {
+ 'id': 'season_1',
+ 'title': 'Season 1',
+ },
+ 'playlist_count': 13,
+ }, {
+ # Cooks Country Season
+ 'url': 'https://www.americastestkitchen.com/cookscountry/episodes/browse/season_12',
+ 'info_dict': {
+ 'id': 'season_12',
+ 'title': 'Season 12',
+ },
+ 'playlist_count': 13,
+ }, {
+ # America's Test Kitchen Series
+ 'url': 'https://www.americastestkitchen.com/',
+ 'info_dict': {
+ 'id': 'americastestkitchen',
+ 'title': 'America\'s Test Kitchen',
+ },
+ 'playlist_count': 558,
+ }, {
+ # Cooks Country Series
+ 'url': 'https://www.americastestkitchen.com/cookscountry',
+ 'info_dict': {
+ 'id': 'cookscountry',
+ 'title': 'Cook\'s Country',
+ },
+ 'playlist_count': 199,
+ }, {
+ 'url': 'https://www.americastestkitchen.com/cookscountry/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cookscountry.com/episodes/browse/season_12',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cookscountry.com',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.americastestkitchen.com/cooksillustrated/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cooksillustrated.com',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ match = re.match(self._VALID_URL, url).groupdict()
+ show = match.get('show2')
+ show_path = ('/' + show) if show else ''
+ show = show or match['show']
+ season_number = int_or_none(match.get('season'))
+
+ slug, title = {
+ 'americastestkitchen': ('atk', 'America\'s Test Kitchen'),
+ 'cookscountry': ('cco', 'Cook\'s Country'),
+ 'cooksillustrated': ('cio', 'Cook\'s Illustrated'),
+ }[show]
+
+ facet_filters = [
+ 'search_document_klass:episode',
+ 'search_show_slug:' + slug,
+ ]
+
+ if season_number:
+ playlist_id = 'season_%d' % season_number
+ playlist_title = 'Season %d' % season_number
+ facet_filters.append('search_season_list:' + playlist_title)
+ else:
+ playlist_id = show
+ playlist_title = title
+
+ season_search = self._download_json(
+ 'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
+ playlist_id, headers={
+ 'Origin': 'https://www.americastestkitchen.com',
+ 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
+ 'X-Algolia-Application-Id': 'Y1FNZXUI30',
+ }, query={
+ 'facetFilters': json.dumps(facet_filters),
+ 'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
+ 'attributesToHighlight': '',
+ 'hitsPerPage': 1000,
+ })
+
+ def entries():
+ for episode in (season_search.get('hits') or []):
+ search_url = episode.get('search_url') # always formatted like '/episode/123-title-of-episode'
+ if not search_url:
+ continue
+ yield {
+ '_type': 'url',
+ 'url': 'https://www.americastestkitchen.com%s%s' % (show_path, search_url),
+ 'id': try_get(episode, lambda e: e['objectID'].rsplit('_', 1)[-1]),
+ 'title': episode.get('title'),
+ 'description': episode.get('description'),
+ 'timestamp': unified_timestamp(episode.get('search_document_date')),
+ 'season_number': season_number,
+ 'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
+ 'ie_key': AmericasTestKitchenIE.ie_key(),
+ }
+
+ return self.playlist_result(
+ entries(), playlist_id, playlist_title)
diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py
index 7ff098cfa..24c684cad 100644
--- a/youtube_dl/extractor/amp.py
+++ b/youtube_dl/extractor/amp.py
@@ -8,6 +8,7 @@ from ..utils import (
int_or_none,
mimetype2ext,
parse_iso8601,
+ unified_timestamp,
url_or_none,
)
@@ -88,7 +89,7 @@ class AMPIE(InfoExtractor):
self._sort_formats(formats)
- timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
+ timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
return {
'id': video_id,
diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py
index e87994a6a..f6ecb8438 100644
--- a/youtube_dl/extractor/aol.py
+++ b/youtube_dl/extractor/aol.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
-from .common import InfoExtractor
+from .yahoo import YahooIE
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
@@ -15,9 +15,9 @@ from ..utils import (
)
-class AolIE(InfoExtractor):
+class AolIE(YahooIE):
IE_NAME = 'aol.com'
- _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P[0-9a-f]+)'
+ _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
_TESTS = [{
# video with 5min ID
@@ -76,10 +76,16 @@ class AolIE(InfoExtractor):
}, {
'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
'only_matching': True,
+ }, {
+ # Yahoo video
+ 'url': 'https://www.aol.com/video/play/991e6700-ac02-11ea-99ff-357400036f61/24bbc846-3e30-3c46-915e-fe8ccd7fcc46/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
+ if '-' in video_id:
+ return self._extract_yahoo_video(video_id, 'us')
response = self._download_json(
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
diff --git a/youtube_dl/extractor/apa.py b/youtube_dl/extractor/apa.py
index 98ccdaa4a..cbc1c0ecb 100644
--- a/youtube_dl/extractor/apa.py
+++ b/youtube_dl/extractor/apa.py
@@ -6,25 +6,21 @@ import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
- js_to_json,
+ int_or_none,
url_or_none,
)
class APAIE(InfoExtractor):
- _VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _VALID_URL = r'(?Phttps?://[^/]+\.apa\.at)/embed/(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TESTS = [{
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
'info_dict': {
- 'id': 'jjv85FdZ',
+ 'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
'ext': 'mp4',
- 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
- 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 254,
- 'timestamp': 1519211149,
- 'upload_date': '20180221',
},
}, {
'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
@@ -46,9 +42,11 @@ class APAIE(InfoExtractor):
webpage)]
def _real_extract(self, url):
- video_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ video_id, base_url = mobj.group('id', 'base_url')
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(
+ '%s/player/%s' % (base_url, video_id), video_id)
jwplatform_id = self._search_regex(
r'media[iI]d\s*:\s*["\'](?P[a-zA-Z0-9]{8})', webpage,
@@ -59,16 +57,18 @@ class APAIE(InfoExtractor):
'jwplatform:' + jwplatform_id, ie='JWPlatform',
video_id=video_id)
- sources = self._parse_json(
- self._search_regex(
- r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'),
- video_id, transform_source=js_to_json)
+ def extract(field, name=None):
+ return self._search_regex(
+ r'\b%s["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' % field,
+ webpage, name or field, default=None, group='value')
+
+ title = extract('title') or video_id
+ description = extract('description')
+ thumbnail = extract('poster', 'thumbnail')
formats = []
- for source in sources:
- if not isinstance(source, dict):
- continue
- source_url = url_or_none(source.get('file'))
+ for format_id in ('hls', 'progressive'):
+ source_url = url_or_none(extract(format_id))
if not source_url:
continue
ext = determine_ext(source_url)
@@ -77,18 +77,19 @@ class APAIE(InfoExtractor):
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
else:
+ height = int_or_none(self._search_regex(
+ r'(\d+)\.mp4', source_url, 'height', default=None))
formats.append({
'url': source_url,
+ 'format_id': format_id,
+ 'height': height,
})
self._sort_formats(formats)
- thumbnail = self._search_regex(
- r'image\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage,
- 'thumbnail', fatal=False, group='url')
-
return {
'id': video_id,
- 'title': video_id,
+ 'title': title,
+ 'description': description,
'thumbnail': thumbnail,
'formats': formats,
}
diff --git a/youtube_dl/extractor/appleconnect.py b/youtube_dl/extractor/appleconnect.py
index a84b8b1eb..494f8330c 100644
--- a/youtube_dl/extractor/appleconnect.py
+++ b/youtube_dl/extractor/appleconnect.py
@@ -9,10 +9,10 @@ from ..utils import (
class AppleConnectIE(InfoExtractor):
- _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P[\w-]+)'
- _TEST = {
+ _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P[\w-]+)'
+ _TESTS = [{
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
- 'md5': 'e7c38568a01ea45402570e6029206723',
+ 'md5': 'c1d41f72c8bcaf222e089434619316e4',
'info_dict': {
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
'ext': 'm4v',
@@ -22,7 +22,10 @@ class AppleConnectIE(InfoExtractor):
'upload_date': '20150710',
'timestamp': 1436545535,
},
- }
+ }, {
+ 'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -36,7 +39,7 @@ class AppleConnectIE(InfoExtractor):
video_data = self._parse_json(video_json, video_id)
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
- like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
+ like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
return {
'id': video_id,
diff --git a/youtube_dl/extractor/applepodcasts.py b/youtube_dl/extractor/applepodcasts.py
index 95758fece..95e0f663c 100644
--- a/youtube_dl/extractor/applepodcasts.py
+++ b/youtube_dl/extractor/applepodcasts.py
@@ -3,8 +3,11 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
+ clean_html,
clean_podcast_url,
+ get_element_by_class,
int_or_none,
+ parse_codecs,
parse_iso8601,
try_get,
)
@@ -14,16 +17,17 @@ class ApplePodcastsIE(InfoExtractor):
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P\d+)'
_TESTS = [{
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
- 'md5': 'df02e6acb11c10e844946a39e7222b08',
+ 'md5': '41dc31cd650143e530d9423b6b5a344f',
'info_dict': {
'id': '1000482637777',
'ext': 'mp3',
'title': '207 - Whitney Webb Returns',
- 'description': 'md5:13a73bade02d2e43737751e3987e1399',
+ 'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
'upload_date': '20200705',
- 'timestamp': 1593921600,
- 'duration': 6425,
+ 'timestamp': 1593932400,
+ 'duration': 6454,
'series': 'The Tim Dillon Show',
+ 'thumbnail': 're:.+[.](png|jpe?g|webp)',
}
}, {
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
@@ -39,18 +43,40 @@ class ApplePodcastsIE(InfoExtractor):
def _real_extract(self, url):
episode_id = self._match_id(url)
webpage = self._download_webpage(url, episode_id)
- ember_data = self._parse_json(self._search_regex(
- r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
- webpage, 'ember data'), episode_id)
- episode = ember_data['data']['attributes']
+ episode_data = {}
+ ember_data = {}
+ # new page type 2021-11
+ amp_data = self._parse_json(self._search_regex(
+ r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
+ webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
+ amp_data = try_get(amp_data,
+ lambda a: self._parse_json(
+ next(a[x] for x in iter(a) if episode_id in x),
+ episode_id),
+ dict) or {}
+ amp_data = amp_data.get('d') or []
+ episode_data = try_get(
+ amp_data,
+ lambda a: next(x for x in a
+ if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
+ dict)
+ if not episode_data:
+ # try pre 2021-11 page type: TODO: consider deleting if no longer used
+ ember_data = self._parse_json(self._search_regex(
+ r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
+ webpage, 'ember data'), episode_id) or {}
+ ember_data = ember_data.get(episode_id) or ember_data
+ episode_data = try_get(ember_data, lambda x: x['data'], dict)
+ episode = episode_data['attributes']
description = episode.get('description') or {}
series = None
- for inc in (ember_data.get('included') or []):
+ for inc in (amp_data or ember_data.get('included') or []):
if inc.get('type') == 'media/podcast':
series = try_get(inc, lambda x: x['attributes']['name'])
+ series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
- return {
+ info = [{
'id': episode_id,
'title': episode['name'],
'url': clean_podcast_url(episode['assetUrl']),
@@ -58,4 +84,10 @@ class ApplePodcastsIE(InfoExtractor):
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
'series': series,
- }
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }]
+ self._sort_formats(info)
+ info = info[0]
+ codecs = parse_codecs(info.get('ext', 'mp3'))
+ info.update(codecs)
+ return info
diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py
index c79c58e82..e42ed5e79 100644
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dl/extractor/archiveorg.py
@@ -2,15 +2,17 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
- unified_strdate,
clean_html,
+ extract_attributes,
+ unified_strdate,
+ unified_timestamp,
)
class ArchiveOrgIE(InfoExtractor):
IE_NAME = 'archive.org'
IE_DESC = 'archive.org videos'
- _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P[^/?#]+)(?:[?].*)?$'
+ _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P[^/?#&]+)'
_TESTS = [{
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
'md5': '8af1d4cf447933ed3c7f4871162602db',
@@ -19,8 +21,11 @@ class ArchiveOrgIE(InfoExtractor):
'ext': 'ogg',
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
- 'upload_date': '19681210',
- 'uploader': 'SRI International'
+ 'creator': 'SRI International',
+ 'release_date': '19681210',
+ 'uploader': 'SRI International',
+ 'timestamp': 1268695290,
+ 'upload_date': '20100315',
}
}, {
'url': 'https://archive.org/details/Cops1922',
@@ -29,22 +34,43 @@ class ArchiveOrgIE(InfoExtractor):
'id': 'Cops1922',
'ext': 'mp4',
'title': 'Buster Keaton\'s "Cops" (1922)',
- 'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6',
+ 'description': 'md5:43a603fd6c5b4b90d12a96b921212b9c',
+ 'timestamp': 1387699629,
+ 'upload_date': '20131222',
}
}, {
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
'only_matching': True,
+ }, {
+ 'url': 'https://archive.org/details/MSNBCW_20131125_040000_To_Catch_a_Predator/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://archive.org/embed/' + video_id, video_id)
- jwplayer_playlist = self._parse_json(self._search_regex(
- r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
- webpage, 'jwplayer playlist'), video_id)
- info = self._parse_jwplayer_data(
- {'playlist': jwplayer_playlist}, video_id, base_url=url)
+
+ playlist = None
+ play8 = self._search_regex(
+ r'(<[^>]+\bclass=["\']js-play8-playlist[^>]+>)', webpage,
+ 'playlist', default=None)
+ if play8:
+ attrs = extract_attributes(play8)
+ playlist = attrs.get('value')
+ if not playlist:
+ # Old jwplayer fallback
+ playlist = self._search_regex(
+ r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
+ webpage, 'jwplayer playlist', default='[]')
+ jwplayer_playlist = self._parse_json(playlist, video_id, fatal=False)
+ if jwplayer_playlist:
+ info = self._parse_jwplayer_data(
+ {'playlist': jwplayer_playlist}, video_id, base_url=url)
+ else:
+ # HTML5 media fallback
+ info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+ info['id'] = video_id
def get_optional(metadata, field):
return metadata.get(field, [None])[0]
@@ -58,8 +84,12 @@ class ArchiveOrgIE(InfoExtractor):
'description': clean_html(get_optional(metadata, 'description')),
})
if info.get('_type') != 'playlist':
+ creator = get_optional(metadata, 'creator')
info.update({
- 'uploader': get_optional(metadata, 'creator'),
- 'upload_date': unified_strdate(get_optional(metadata, 'date')),
+ 'creator': creator,
+ 'release_date': unified_strdate(get_optional(metadata, 'date')),
+ 'uploader': get_optional(metadata, 'publisher') or creator,
+ 'timestamp': unified_timestamp(get_optional(metadata, 'publicdate')),
+ 'language': get_optional(metadata, 'language'),
})
return info
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index 6bf5b3f13..a5b1f54d5 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -249,14 +249,14 @@ class ARDMediathekIE(ARDMediathekBaseIE):
class ARDIE(InfoExtractor):
- _VALID_URL = r'(?Phttps?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P[^/?#]+)-(?:video-?)?(?P[0-9]+))\.html'
+ _VALID_URL = r'(?Phttps?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P[^/?#&]+))\.html'
_TESTS = [{
# available till 7.01.2022
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
'info_dict': {
- 'display_id': 'maischberger-die-woche',
- 'id': '100',
+ 'id': 'maischberger-die-woche-video100',
+ 'display_id': 'maischberger-die-woche-video100',
'ext': 'mp4',
'duration': 3687.0,
'title': 'maischberger. die woche vom 7. Januar 2021',
@@ -264,16 +264,25 @@ class ARDIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
- 'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html',
+ 'url': 'https://www.daserste.de/information/politik-weltgeschehen/morgenmagazin/videosextern/dominik-kahun-aus-der-nhl-direkt-zur-weltmeisterschaft-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html',
'only_matching': True,
}, {
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
'only_matching': True,
+ }, {
+ 'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/Drehpause-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.daserste.de/unterhaltung/film/filmmittwoch-im-ersten/videos/making-ofwendezeit-video-100.html',
+ 'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('display_id')
+ display_id = mobj.group('id')
player_url = mobj.group('mainurl') + '~playerXml.xml'
doc = self._download_xml(player_url, display_id)
@@ -284,26 +293,63 @@ class ARDIE(InfoExtractor):
formats = []
for a in video_node.findall('.//asset'):
+ file_name = xpath_text(a, './fileName', default=None)
+ if not file_name:
+ continue
+ format_type = a.attrib.get('type')
+ format_url = url_or_none(file_name)
+ if format_url:
+ ext = determine_ext(file_name)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=format_type or 'hls', fatal=False))
+ continue
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(format_url, {'hdcore': '3.7.0'}),
+ display_id, f4m_id=format_type or 'hds', fatal=False))
+ continue
f = {
- 'format_id': a.attrib['type'],
- 'width': int_or_none(a.find('./frameWidth').text),
- 'height': int_or_none(a.find('./frameHeight').text),
- 'vbr': int_or_none(a.find('./bitrateVideo').text),
- 'abr': int_or_none(a.find('./bitrateAudio').text),
- 'vcodec': a.find('./codecVideo').text,
- 'tbr': int_or_none(a.find('./totalBitrate').text),
+ 'format_id': format_type,
+ 'width': int_or_none(xpath_text(a, './frameWidth')),
+ 'height': int_or_none(xpath_text(a, './frameHeight')),
+ 'vbr': int_or_none(xpath_text(a, './bitrateVideo')),
+ 'abr': int_or_none(xpath_text(a, './bitrateAudio')),
+ 'vcodec': xpath_text(a, './codecVideo'),
+ 'tbr': int_or_none(xpath_text(a, './totalBitrate')),
}
- if a.find('./serverPrefix').text:
- f['url'] = a.find('./serverPrefix').text
- f['playpath'] = a.find('./fileName').text
+ server_prefix = xpath_text(a, './serverPrefix', default=None)
+ if server_prefix:
+ f.update({
+ 'url': server_prefix,
+ 'playpath': file_name,
+ })
else:
- f['url'] = a.find('./fileName').text
+ if not format_url:
+ continue
+ f['url'] = format_url
formats.append(f)
self._sort_formats(formats)
+ _SUB_FORMATS = (
+ ('./dataTimedText', 'ttml'),
+ ('./dataTimedTextNoOffset', 'ttml'),
+ ('./dataTimedTextVtt', 'vtt'),
+ )
+
+ subtitles = {}
+ for subsel, subext in _SUB_FORMATS:
+ for node in video_node.findall(subsel):
+ subtitles.setdefault('de', []).append({
+ 'url': node.attrib['url'],
+ 'ext': subext,
+ })
+
return {
- 'id': mobj.group('id'),
+ 'id': xpath_text(video_node, './videoId', default=display_id),
'formats': formats,
+ 'subtitles': subtitles,
'display_id': display_id,
'title': video_node.find('./title').text,
'duration': parse_duration(video_node.find('./duration').text),
@@ -313,7 +359,7 @@ class ARDIE(InfoExtractor):
class ARDBetaMediathekIE(ARDMediathekBaseIE):
- _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P[^/]+)/(?:player|live|video)/(?P(?:[^/]+/)*)(?P[a-zA-Z0-9]+)'
+ _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?:[^/]+/)?(?:player|live|video)/(?:[^/]+/)*(?PY3JpZDovL[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
@@ -343,22 +389,22 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
}, {
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('video_id')
- display_id = mobj.group('display_id')
- if display_id:
- display_id = display_id.rstrip('/')
- if not display_id:
- display_id = video_id
+ video_id = self._match_id(url)
player_page = self._download_json(
'https://api.ardmediathek.de/public-gateway',
- display_id, data=json.dumps({
+ video_id, data=json.dumps({
'query': '''{
- playerPage(client:"%s", clipId: "%s") {
+ playerPage(client: "ard", clipId: "%s") {
blockedByFsk
broadcastedOn
maturityContentRating
@@ -388,7 +434,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
}
}
}
-}''' % (mobj.group('client'), video_id),
+}''' % video_id,
}).encode(), headers={
'Content-Type': 'application/json'
})['data']['playerPage']
@@ -413,7 +459,6 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
info.update({
'age_limit': age_limit,
- 'display_id': display_id,
'title': title,
'description': description,
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
diff --git a/youtube_dl/extractor/arnes.py b/youtube_dl/extractor/arnes.py
new file mode 100644
index 000000000..c0032fcab
--- /dev/null
+++ b/youtube_dl/extractor/arnes.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ remove_start,
+)
+
+
+class ArnesIE(InfoExtractor):
+ IE_NAME = 'video.arnes.si'
+ IE_DESC = 'Arnes Video'
+ _VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P[0-9a-zA-Z]{12})'
+ _TESTS = [{
+ 'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
+ 'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
+ 'info_dict': {
+ 'id': 'a1qrWTOQfVoU',
+ 'ext': 'mp4',
+ 'title': 'Linearna neodvisnost, definicija',
+ 'description': 'Linearna neodvisnost, definicija',
+ 'license': 'PRIVATE',
+ 'creator': 'Polona Oblak',
+ 'timestamp': 1585063725,
+ 'upload_date': '20200324',
+ 'channel': 'Polona Oblak',
+ 'channel_id': 'q6pc04hw24cj',
+ 'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
+ 'duration': 596.75,
+ 'view_count': int,
+ 'tags': ['linearna_algebra'],
+ 'start_time': 10,
+ }
+ }, {
+ 'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
+ 'only_matching': True,
+ }]
+ _BASE_URL = 'https://video.arnes.si'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
+ title = video['title']
+
+ formats = []
+ for media in (video.get('media') or []):
+ media_url = media.get('url')
+ if not media_url:
+ continue
+ formats.append({
+ 'url': self._BASE_URL + media_url,
+ 'format_id': remove_start(media.get('format'), 'FORMAT_'),
+ 'format_note': media.get('formatTranslation'),
+ 'width': int_or_none(media.get('width')),
+ 'height': int_or_none(media.get('height')),
+ })
+ self._sort_formats(formats)
+
+ channel = video.get('channel') or {}
+ channel_id = channel.get('url')
+ thumbnail = video.get('thumbnailUrl')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': self._BASE_URL + thumbnail,
+ 'description': video.get('description'),
+ 'license': video.get('license'),
+ 'creator': video.get('author'),
+ 'timestamp': parse_iso8601(video.get('creationTime')),
+ 'channel': channel.get('name'),
+ 'channel_id': channel_id,
+ 'channel_url': self._BASE_URL + '/?channel=' + channel_id if channel_id else None,
+ 'duration': float_or_none(video.get('duration'), 1000),
+ 'view_count': int_or_none(video.get('views')),
+ 'tags': video.get('hashtags'),
+ 'start_time': int_or_none(compat_parse_qs(
+ compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
+ }
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 03abdbfaf..5bfe57b10 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -12,6 +12,7 @@ from ..utils import (
ExtractorError,
int_or_none,
qualities,
+ strip_or_none,
try_get,
unified_strdate,
url_or_none,
@@ -252,3 +253,49 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
title = collection.get('title')
description = collection.get('shortDescription') or collection.get('teaserText')
return self.playlist_result(entries, playlist_id, title, description)
+
+
+class ArteTVCategoryIE(ArteTVBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P%s)/videos/(?P[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
+ _TESTS = [{
+ 'url': 'https://www.arte.tv/en/videos/politics-and-society/',
+ 'info_dict': {
+ 'id': 'politics-and-society',
+ 'title': 'Politics and society',
+ 'description': 'Investigative documentary series, geopolitical analysis, and international commentary',
+ },
+ 'playlist_mincount': 13,
+ },
+ ]
+
+ @classmethod
+ def suitable(cls, url):
+ return (
+ not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
+ and super(ArteTVCategoryIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ lang, playlist_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, playlist_id)
+
+ items = []
+ for video in re.finditer(
+ r']*?href\s*=\s*(?P"|\'|\b)(?Phttps?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
+ webpage):
+ video = video.group('url')
+ if video == url:
+ continue
+ if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
+ items.append(video)
+
+ if items:
+ title = (self._og_search_title(webpage, default=None)
+ or self._html_search_regex(r']*>([^<]+)', default=None))
+ title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)
+
+ result = self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title)
+ if result:
+ description = self._og_search_description(webpage, default=None)
+ if description:
+ result['description'] = description
+ return result
diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py
index cc7771354..4d1fbad1f 100644
--- a/youtube_dl/extractor/audiomack.py
+++ b/youtube_dl/extractor/audiomack.py
@@ -14,7 +14,7 @@ from ..utils import (
class AudiomackIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P[\w/-]+)'
+ _VALID_URL = r'https?://(?:www\.)?audiomack\.com/(?:song/|(?=.+/song/))(?P[\w/-]+)'
IE_NAME = 'audiomack'
_TESTS = [
# hosted on audiomack
@@ -29,25 +29,27 @@ class AudiomackIE(InfoExtractor):
}
},
# audiomack wrapper around soundcloud song
+ # Needs new test URL.
{
'add_ie': ['Soundcloud'],
'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
- 'info_dict': {
- 'id': '258901379',
- 'ext': 'mp3',
- 'description': 'mamba day freestyle for the legend Kobe Bryant ',
- 'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
- 'uploader': 'ILOVEMAKONNEN',
- 'upload_date': '20160414',
- }
+ 'only_matching': True,
+ # 'info_dict': {
+ # 'id': '258901379',
+ # 'ext': 'mp3',
+ # 'description': 'mamba day freestyle for the legend Kobe Bryant ',
+ # 'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
+ # 'uploader': 'ILOVEMAKONNEN',
+ # 'upload_date': '20160414',
+ # }
},
]
def _real_extract(self, url):
- # URLs end with [uploader name]/[uploader title]
+ # URLs end with [uploader name]/song/[uploader title]
# this title is whatever the user types in, and is rarely
# the proper song title. Real metadata is in the api response
- album_url_tag = self._match_id(url)
+ album_url_tag = self._match_id(url).replace('/song/', '/')
# Request the extended version of the api for extra fields like artist and title
api_response = self._download_json(
@@ -73,13 +75,13 @@ class AudiomackIE(InfoExtractor):
class AudiomackAlbumIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P[\w/-]+)'
+ _VALID_URL = r'https?://(?:www\.)?audiomack\.com/(?:album/|(?=.+/album/))(?P[\w/-]+)'
IE_NAME = 'audiomack:album'
_TESTS = [
# Standard album playlist
{
'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
- 'playlist_count': 15,
+ 'playlist_count': 11,
'info_dict':
{
'id': '812251',
@@ -95,24 +97,24 @@ class AudiomackAlbumIE(InfoExtractor):
},
'playlist': [{
'info_dict': {
- 'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)',
- 'id': '837577',
+ 'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)',
+ 'id': '837580',
'ext': 'mp3',
'uploader': 'Lil Herb a.k.a. G Herbo',
}
}],
'params': {
- 'playliststart': 9,
- 'playlistend': 9,
+ 'playliststart': 2,
+ 'playlistend': 2,
}
}
]
def _real_extract(self, url):
- # URLs end with [uploader name]/[uploader title]
+ # URLs end with [uploader name]/album/[uploader title]
# this title is whatever the user types in, and is rarely
# the proper song title. Real metadata is in the api response
- album_url_tag = self._match_id(url)
+ album_url_tag = self._match_id(url).replace('/album/', '/')
result = {'_type': 'playlist', 'entries': []}
# There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
# Therefore we don't know how many songs the album has and must infi-loop until failure
@@ -134,7 +136,7 @@ class AudiomackAlbumIE(InfoExtractor):
# Pull out the album metadata and add to result (if it exists)
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
if apikey in api_response and resultkey not in result:
- result[resultkey] = api_response[apikey]
+ result[resultkey] = compat_str(api_response[apikey])
song_id = url_basename(api_response['url']).rpartition('.')[0]
result['entries'].append({
'id': compat_str(api_response.get('id', song_id)),
diff --git a/youtube_dl/extractor/awaan.py b/youtube_dl/extractor/awaan.py
index a2603bbff..3a7700cd4 100644
--- a/youtube_dl/extractor/awaan.py
+++ b/youtube_dl/extractor/awaan.py
@@ -48,6 +48,7 @@ class AWAANBaseIE(InfoExtractor):
'duration': int_or_none(video_data.get('duration')),
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
'is_live': is_live,
+ 'uploader_id': video_data.get('user_id'),
}
@@ -107,6 +108,7 @@ class AWAANLiveIE(AWAANBaseIE):
'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'upload_date': '20150107',
'timestamp': 1420588800,
+ 'uploader_id': '71',
},
'params': {
# m3u8 download
diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py
index b1e20def5..930266990 100644
--- a/youtube_dl/extractor/azmedien.py
+++ b/youtube_dl/extractor/azmedien.py
@@ -47,7 +47,7 @@ class AZMedienIE(InfoExtractor):
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
'only_matching': True
}]
- _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d'
+ _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
_PARTNER_ID = '1719221'
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/bandaichannel.py b/youtube_dl/extractor/bandaichannel.py
new file mode 100644
index 000000000..d67285913
--- /dev/null
+++ b/youtube_dl/extractor/bandaichannel.py
@@ -0,0 +1,37 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .brightcove import BrightcoveNewIE
+from ..utils import extract_attributes
+
+
+class BandaiChannelIE(BrightcoveNewIE):
+ IE_NAME = 'bandaichannel'
+ _VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P\d+/\d+)'
+ _TESTS = [{
+ 'url': 'https://www.b-ch.com/titles/514/001',
+ 'md5': 'a0f2d787baa5729bed71108257f613a4',
+ 'info_dict': {
+ 'id': '6128044564001',
+ 'ext': 'mp4',
+ 'title': 'メタルファイターMIKU 第1話',
+ 'timestamp': 1580354056,
+ 'uploader_id': '5797077852001',
+ 'upload_date': '20200130',
+ 'duration': 1387.733,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ attrs = extract_attributes(self._search_regex(
+ r'(]+\bid="bcplayer"[^>]*>)', webpage, 'player'))
+ bc = self._download_json(
+ 'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'],
+ video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc']
+ return self._parse_brightcove_metadata(bc, bc['id'])
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index 69e673a26..006aab3b4 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -49,6 +49,7 @@ class BandcampIE(InfoExtractor):
'uploader': 'Ben Prunty',
'timestamp': 1396508491,
'upload_date': '20140403',
+ 'release_timestamp': 1396483200,
'release_date': '20140403',
'duration': 260.877,
'track': 'Lanius (Battle)',
@@ -69,6 +70,7 @@ class BandcampIE(InfoExtractor):
'uploader': 'Mastodon',
'timestamp': 1322005399,
'upload_date': '20111122',
+ 'release_timestamp': 1076112000,
'release_date': '20040207',
'duration': 120.79,
'track': 'Hail to Fire',
@@ -197,7 +199,7 @@ class BandcampIE(InfoExtractor):
'thumbnail': thumbnail,
'uploader': artist,
'timestamp': timestamp,
- 'release_date': unified_strdate(tralbum.get('album_release_date')),
+ 'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
'duration': duration,
'track': track,
'track_number': track_number,
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index b4daee54e..378b52f4f 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -1,37 +1,46 @@
# coding: utf-8
from __future__ import unicode_literals
+import functools
import itertools
+import json
import re
from .common import InfoExtractor
+from ..compat import (
+ compat_etree_Element,
+ compat_HTTPError,
+ compat_parse_qs,
+ compat_str,
+ compat_urllib_error,
+ compat_urllib_parse_urlparse,
+ compat_urlparse,
+)
from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
clean_html,
dict_get,
- ExtractorError,
float_or_none,
get_element_by_class,
int_or_none,
js_to_json,
parse_duration,
parse_iso8601,
+ strip_or_none,
try_get,
unescapeHTML,
+ unified_timestamp,
url_or_none,
urlencode_postdata,
urljoin,
)
-from ..compat import (
- compat_etree_Element,
- compat_HTTPError,
- compat_urlparse,
-)
class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer'
- _ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
+ _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?bbc\.co\.uk/
@@ -387,9 +396,17 @@ class BBCCoUkIE(InfoExtractor):
formats.extend(self._extract_mpd_formats(
href, programme_id, mpd_id=format_id, fatal=False))
elif transfer_format == 'hls':
- formats.extend(self._extract_m3u8_formats(
- href, programme_id, ext='mp4', entry_protocol='m3u8_native',
- m3u8_id=format_id, fatal=False))
+ # TODO: let expected_status be passed into _extract_xxx_formats() instead
+ try:
+ fmts = self._extract_m3u8_formats(
+ href, programme_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id=format_id, fatal=False)
+ except ExtractorError as e:
+ if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError)
+ and e.exc_info[1].code in (403, 404)):
+ raise
+ fmts = []
+ formats.extend(fmts)
elif transfer_format == 'hds':
formats.extend(self._extract_f4m_formats(
href, programme_id, f4m_id=format_id, fatal=False))
@@ -756,23 +773,44 @@ class BBCIE(BBCCoUkIE):
'only_matching': True,
}, {
# custom redirection to www.bbc.com
+ # also, video with window.__INITIAL_DATA__
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
- 'only_matching': True,
+ 'info_dict': {
+ 'id': 'p02xzws1',
+ 'ext': 'mp4',
+ 'title': "Pluto may have 'nitrogen glaciers'",
+ 'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
+ 'thumbnail': r're:https?://.+/.+\.jpg',
+ 'timestamp': 1437785037,
+ 'upload_date': '20150725',
+ },
+ }, {
+ # video with window.__INITIAL_DATA__ and value as JSON string
+ 'url': 'https://www.bbc.com/news/av/world-europe-59468682',
+ 'info_dict': {
+ 'id': 'p0b71qth',
+ 'ext': 'mp4',
+ 'title': 'Why France is making this woman a national hero',
+ 'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
+ 'thumbnail': r're:https?://.+/.+\.jpg',
+ 'timestamp': 1638230731,
+ 'upload_date': '20211130',
+ },
}, {
# single video article embedded with data-media-vpid
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
'only_matching': True,
}, {
+ # bbcthreeConfig
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
'info_dict': {
'id': 'p06556y7',
'ext': 'mp4',
- 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
- 'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
+ 'title': 'Things Not To Say to people that live on council estates',
+ 'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
+ 'duration': 360,
+ 'thumbnail': r're:https?://.+/.+\.jpg',
},
- 'params': {
- 'skip_download': True,
- }
}, {
# window.__PRELOADED_STATE__
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
@@ -793,11 +831,25 @@ class BBCIE(BBCCoUkIE):
'description': 'Learn English words and phrases from this story',
},
'add_ie': [BBCCoUkIE.ie_key()],
+ }, {
+ # BBC Reel
+ 'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
+ 'info_dict': {
+ 'id': 'p07c6sb9',
+ 'ext': 'mp4',
+ 'title': 'How positive thinking is harming your happiness',
+ 'alt_title': 'The downsides of positive thinking',
+ 'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
+ 'duration': 235,
+ 'thumbnail': r're:https?://.+/p07c9dsr.jpg',
+ 'upload_date': '20190604',
+ 'categories': ['Psychology'],
+ },
}]
@classmethod
def suitable(cls, url):
- EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE)
+ EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
else super(BBCIE, cls).suitable(url))
@@ -929,7 +981,7 @@ class BBCIE(BBCCoUkIE):
else:
entry['title'] = info['title']
entry['formats'].extend(info['formats'])
- except Exception as e:
+ except ExtractorError as e:
# Some playlist URL may fail with 500, at the same time
# the other one may work fine (e.g.
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
@@ -980,6 +1032,37 @@ class BBCIE(BBCCoUkIE):
'subtitles': subtitles,
}
+ # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
+ initial_data = self._parse_json(self._html_search_regex(
+ r'', webpage, 'drupal settings'),
@@ -53,11 +56,14 @@ class BravoTVIE(AdobePassIE):
tp_path = release_pid = tve['release_pid']
if tve.get('entitlement') == 'auth':
adobe_pass = settings.get('tve_adobe_auth', {})
+ if site == 'bravotv':
+ site = 'bravo'
resource = self._get_mvpd_resource(
- adobe_pass.get('adobePassResourceId', 'bravo'),
+ adobe_pass.get('adobePassResourceId') or site,
tve['title'], release_pid, tve.get('rating'))
query['auth'] = self._extract_mvpd_auth(
- url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
+ url, release_pid,
+ adobe_pass.get('adobePassRequestorId') or site, resource)
else:
shared_playlist = settings['ls_playlist']
account_pid = shared_playlist['account_pid']
diff --git a/youtube_dl/extractor/caffeine.py b/youtube_dl/extractor/caffeine.py
new file mode 100644
index 000000000..bffedb9a7
--- /dev/null
+++ b/youtube_dl/extractor/caffeine.py
@@ -0,0 +1,79 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ merge_dicts,
+ parse_iso8601,
+ T,
+ traverse_obj,
+ txt_or_none,
+ urljoin,
+)
+
+
+class CaffeineTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/]+/video/(?P[0-9a-f-]+)'
+ _TESTS = [{
+ 'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e',
+ 'info_dict': {
+ 'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e',
+ 'ext': 'mp4',
+ 'title': 'GOOOOD MORNINNNNN #highlights',
+ 'timestamp': 1654702180,
+ 'upload_date': '20220608',
+ 'uploader': 'TsuSurf',
+ 'duration': 3145,
+ 'age_limit': 17,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ json_data = self._download_json(
+ 'https://api.caffeine.tv/social/public/activity/' + video_id,
+ video_id)
+ broadcast_info = traverse_obj(json_data, ('broadcast_info', T(dict))) or {}
+ title = broadcast_info['broadcast_title']
+ video_url = broadcast_info['video_url']
+
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8',
+ fatal=False)
+ else:
+ formats = [{'url': video_url}]
+ self._sort_formats(formats)
+
+ return merge_dicts({
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ }, traverse_obj(json_data, {
+ 'uploader': ((None, 'user'), 'username'),
+ }, get_all=False), traverse_obj(json_data, {
+ 'like_count': ('like_count', T(int_or_none)),
+ 'view_count': ('view_count', T(int_or_none)),
+ 'comment_count': ('comment_count', T(int_or_none)),
+ 'tags': ('tags', Ellipsis, T(txt_or_none)),
+ 'is_live': 'is_live',
+ 'uploader': ('user', 'name'),
+ }), traverse_obj(broadcast_info, {
+ 'duration': ('content_duration', T(int_or_none)),
+ 'timestamp': ('broadcast_start_time', T(parse_iso8601)),
+ 'thumbnail': ('preview_image_path', T(lambda u: urljoin(url, u))),
+ 'age_limit': ('content_rating', T(lambda r: r and {
+ # assume Apple Store ratings [1]
+ # 1. https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
+ 'FOUR_PLUS': 0,
+ 'NINE_PLUS': 9,
+ 'TWELVE_PLUS': 12,
+ 'SEVENTEEN_PLUS': 17,
+ }.get(r, 17))),
+ }))
diff --git a/youtube_dl/extractor/callin.py b/youtube_dl/extractor/callin.py
new file mode 100644
index 000000000..341be479f
--- /dev/null
+++ b/youtube_dl/extractor/callin.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ traverse_obj,
+ try_get,
+)
+
+
+class CallinIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?:[^/#?-]+-)*(?P[^/#?-]+)'
+ _TESTS = [{
+ 'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
+ 'md5': '14ede27ee2c957b7e4db93140fc0745c',
+ 'info_dict': {
+ 'id': 'PrumRdSQJW',
+ 'ext': 'mp4',
+ 'title': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
+ 'description': 'Or, why the government doesn’t like SpaceX',
+ 'channel': 'The Pull Request',
+ 'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
+ }
+ }, {
+ 'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
+ 'md5': '16f704ddbf82a27e3930533b12062f07',
+ 'info_dict': {
+ 'id': 'lzxMidUnjA',
+ 'ext': 'mp4',
+ 'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
+ 'description': 'Let’s talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
+ 'channel': 'The DEBRIEF With Briahna Joy Gray',
+ 'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
+ }
+ }]
+
+ def _search_nextjs_data(self, webpage, video_id, transform_source=None, fatal=True, **kw):
+ return self._parse_json(
+ self._search_regex(
+ r'(?s)',
+ webpage, 'next.js data', fatal=fatal, **kw),
+ video_id, transform_source=transform_source, fatal=fatal)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ next_data = self._search_nextjs_data(webpage, video_id)
+ episode = traverse_obj(next_data, ('props', 'pageProps', 'episode'), expected_type=dict)
+ if not episode:
+ raise ExtractorError('Failed to find episode data')
+
+ title = episode.get('title') or self._og_search_title(webpage)
+ description = episode.get('description') or self._og_search_description(webpage)
+
+ formats = []
+ formats.extend(self._extract_m3u8_formats(
+ episode.get('m3u8'), video_id, 'mp4',
+ entry_protocol='m3u8_native', fatal=False))
+ self._sort_formats(formats)
+
+ channel = try_get(episode, lambda x: x['show']['title'], compat_str)
+ channel_url = try_get(episode, lambda x: x['show']['linkObj']['resourceUrl'], compat_str)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ 'channel': channel,
+ 'channel_url': channel_url,
+ }
diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py
index 1eb81b75e..d2e860b24 100644
--- a/youtube_dl/extractor/cammodels.py
+++ b/youtube_dl/extractor/cammodels.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
- ExtractorError,
int_or_none,
url_or_none,
)
@@ -20,32 +19,11 @@ class CamModelsIE(InfoExtractor):
def _real_extract(self, url):
user_id = self._match_id(url)
- webpage = self._download_webpage(
- url, user_id, headers=self.geo_verification_headers())
-
- manifest_root = self._html_search_regex(
- r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
-
- if not manifest_root:
- ERRORS = (
- ("I'm offline, but let's stay connected", 'This user is currently offline'),
- ('in a private show', 'This user is in a private show'),
- ('is currently performing LIVE', 'This model is currently performing live'),
- )
- for pattern, message in ERRORS:
- if pattern in webpage:
- error = message
- expected = True
- break
- else:
- error = 'Unable to find manifest URL root'
- expected = False
- raise ExtractorError(error, expected=expected)
-
manifest = self._download_json(
- '%s%s.json' % (manifest_root, user_id), user_id)
+ 'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id)
formats = []
+ thumbnails = []
for format_id, format_dict in manifest['formats'].items():
if not isinstance(format_dict, dict):
continue
@@ -85,6 +63,13 @@ class CamModelsIE(InfoExtractor):
'preference': -1,
})
else:
+ if format_id == 'jpeg':
+ thumbnails.append({
+ 'url': f['url'],
+ 'width': f['width'],
+ 'height': f['height'],
+ 'format_id': f['format_id'],
+ })
continue
formats.append(f)
self._sort_formats(formats)
@@ -92,6 +77,7 @@ class CamModelsIE(InfoExtractor):
return {
'id': user_id,
'title': self._live_title(user_id),
+ 'thumbnails': thumbnails,
'is_live': True,
'formats': formats,
'age_limit': 18
diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py
index 8b76a0200..eefbab241 100644
--- a/youtube_dl/extractor/canvas.py
+++ b/youtube_dl/extractor/canvas.py
@@ -7,19 +7,21 @@ from .common import InfoExtractor
from .gigya import GigyaBaseIE
from ..compat import compat_HTTPError
from ..utils import (
- extract_attributes,
ExtractorError,
- strip_or_none,
+ clean_html,
+ extract_attributes,
float_or_none,
+ get_element_by_class,
int_or_none,
merge_dicts,
str_or_none,
+ strip_or_none,
url_or_none,
)
class CanvasIE(InfoExtractor):
- _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?Pcanvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P[^/?#&]+)'
+ _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?Pcanvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P[^/?#&]+)'
_TESTS = [{
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'md5': '68993eda72ef62386a15ea2cf3c93107',
@@ -332,3 +334,51 @@ class VrtNUIE(GigyaBaseIE):
'display_id': display_id,
'season_number': int_or_none(page.get('episode_season')),
})
+
+
+class DagelijkseKostIE(InfoExtractor):
+ IE_DESC = 'dagelijksekost.een.be'
+ _VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
+ 'md5': '30bfffc323009a3e5f689bef6efa2365',
+ 'info_dict': {
+ 'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
+ 'display_id': 'hachis-parmentier-met-witloof',
+ 'ext': 'mp4',
+ 'title': 'Hachis parmentier met witloof',
+ 'description': 'md5:9960478392d87f63567b5b117688cdc5',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 283.02,
+ },
+ 'expected_warnings': ['is not a supported codec'],
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ title = strip_or_none(get_element_by_class(
+ 'dish-metadata__title', webpage
+ ) or self._html_search_meta(
+ 'twitter:title', webpage))
+
+ description = clean_html(get_element_by_class(
+ 'dish-description', webpage)
+ ) or self._html_search_meta(
+ ('description', 'twitter:description', 'og:description'),
+ webpage)
+
+ video_id = self._html_search_regex(
+ r'data-url=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video id',
+ group='id')
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'https://mediazone.vrt.be/api/v1/dako/assets/%s' % video_id,
+ 'ie_key': CanvasIE.ie_key(),
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ }
diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py
index 4a19a73d2..c79e55a75 100644
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@@ -27,7 +27,7 @@ class CBSBaseIE(ThePlatformFeedIE):
class CBSIE(CBSBaseIE):
- _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P[\w-]+)'
+ _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs|paramountplus)\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P[\w-]+)'
_TESTS = [{
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
@@ -52,6 +52,9 @@ class CBSIE(CBSBaseIE):
}, {
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
'only_matching': True,
+ }, {
+ 'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
+ 'only_matching': True,
}]
def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py
index 345debcf0..1285ed65e 100644
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dl/extractor/cbsnews.py
@@ -26,7 +26,7 @@ class CBSNewsEmbedIE(CBSIE):
def _real_extract(self, url):
item = self._parse_json(zlib.decompress(compat_b64decode(
compat_urllib_parse_unquote(self._match_id(url))),
- -zlib.MAX_WBITS), None)['video']['items'][0]
+ -zlib.MAX_WBITS).decode('utf-8'), None)['video']['items'][0]
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py
index 83b764762..a891c9a55 100644
--- a/youtube_dl/extractor/cbssports.py
+++ b/youtube_dl/extractor/cbssports.py
@@ -1,38 +1,113 @@
from __future__ import unicode_literals
-from .cbs import CBSBaseIE
+import re
+
+# from .cbs import CBSBaseIE
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ try_get,
+)
-class CBSSportsIE(CBSBaseIE):
- _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/(?:video|news)/(?P[^/?#&]+)'
-
+# class CBSSportsEmbedIE(CBSBaseIE):
+class CBSSportsEmbedIE(InfoExtractor):
+ IE_NAME = 'cbssports:embed'
+ _VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
+ (?:
+ ids%3D(?P[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})|
+ pcid%3D(?P\d+)
+ )'''
_TESTS = [{
- 'url': 'https://www.cbssports.com/nba/video/donovan-mitchell-flashes-star-potential-in-game-2-victory-over-thunder/',
- 'info_dict': {
- 'id': '1214315075735',
- 'ext': 'mp4',
- 'title': 'Donovan Mitchell flashes star potential in Game 2 victory over Thunder',
- 'description': 'md5:df6f48622612c2d6bd2e295ddef58def',
- 'timestamp': 1524111457,
- 'upload_date': '20180419',
- 'uploader': 'CBSI-NEW',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
+ 'url': 'https://www.cbssports.com/player/embed/?args=player_id%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26ids%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26resizable%3D1%26autoplay%3Dtrue%26domain%3Dcbssports.com%26comp_ads_enabled%3Dfalse%26watchAndRead%3D0%26startTime%3D0%26env%3Dprod',
+ 'only_matching': True,
}, {
- 'url': 'https://www.cbssports.com/nba/news/nba-playoffs-2018-watch-76ers-vs-heat-game-3-series-schedule-tv-channel-online-stream/',
+ 'url': 'https://embed.247sports.com/player/embed/?args=%3fplayer_id%3d1827823171591%26channel%3dcollege-football-recruiting%26pcid%3d1827823171591%26width%3d640%26height%3d360%26autoplay%3dTrue%26comp_ads_enabled%3dFalse%26uvpc%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_v4%2526partner%253d247%26uvpc_m%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_m_v4%2526partner_m%253d247_mobile%26utag%3d247sportssite%26resizable%3dTrue',
'only_matching': True,
}]
- def _extract_video_info(self, filter_query, video_id):
- return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
+ # def _extract_video_info(self, filter_query, video_id):
+ # return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
+ def _real_extract(self, url):
+ uuid, pcid = re.match(self._VALID_URL, url).groups()
+ query = {'id': uuid} if uuid else {'pcid': pcid}
+ video = self._download_json(
+ 'https://www.cbssports.com/api/content/video/',
+ uuid or pcid, query=query)[0]
+ video_id = video['id']
+ title = video['title']
+ metadata = video.get('metaData') or {}
+ # return self._extract_video_info('byId=%d' % metadata['mpxOutletId'], video_id)
+ # return self._extract_video_info('byGuid=' + metadata['mpxRefId'], video_id)
+
+ formats = self._extract_m3u8_formats(
+ metadata['files'][0]['url'], video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ self._sort_formats(formats)
+
+ image = video.get('image')
+ thumbnails = None
+ if image:
+ image_path = image.get('path')
+ if image_path:
+ thumbnails = [{
+ 'url': image_path,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ 'filesize': int_or_none(image.get('size')),
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': video.get('description'),
+ 'timestamp': int_or_none(try_get(video, lambda x: x['dateCreated']['epoch'])),
+ 'duration': int_or_none(metadata.get('duration')),
+ }
+
+
+class CBSSportsBaseIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- video_id = self._search_regex(
- [r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'],
- webpage, 'video id')
- return self._extract_video_info('byId=%s' % video_id, video_id)
+ iframe_url = self._search_regex(
+ r'
' % NOT_AVAILABLE_STRING in webpage:
- raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
+ self.raise_geo_restricted(NOT_AVAILABLE_STRING)
+ if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )):
+ raise ExtractorError('no video with IDEC available', video_id=idec, expected=True)
type_ = None
episode_id = None
@@ -100,7 +166,7 @@ class CeskaTelevizeIE(InfoExtractor):
data = {
'playlist[0][type]': type_,
'playlist[0][id]': episode_id,
- 'requestUrl': compat_urllib_parse_urlparse(url).path,
+ 'requestUrl': parsed_url.path,
'requestSource': 'iVysilani',
}
@@ -108,7 +174,7 @@ class CeskaTelevizeIE(InfoExtractor):
for user_agent in (None, USER_AGENTS['Safari']):
req = sanitized_Request(
- 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
+ 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
data=urlencode_postdata(data))
req.add_header('Content-type', 'application/x-www-form-urlencoded')
@@ -130,9 +196,6 @@ class CeskaTelevizeIE(InfoExtractor):
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
req.add_header('Referer', url)
- playlist_title = self._og_search_title(webpage, default=None)
- playlist_description = self._og_search_description(webpage, default=None)
-
playlist = self._download_json(req, playlist_id, fatal=False)
if not playlist:
continue
@@ -167,7 +230,7 @@ class CeskaTelevizeIE(InfoExtractor):
entries[num]['formats'].extend(formats)
continue
- item_id = item.get('id') or item['assetId']
+ item_id = str_or_none(item.get('id') or item['assetId'])
title = item['title']
duration = float_or_none(item.get('duration'))
@@ -181,8 +244,6 @@ class CeskaTelevizeIE(InfoExtractor):
if playlist_len == 1:
final_title = playlist_title or title
- if is_live:
- final_title = self._live_title(final_title)
else:
final_title = '%s (%s)' % (playlist_title, title)
@@ -200,6 +261,8 @@ class CeskaTelevizeIE(InfoExtractor):
for e in entries:
self._sort_formats(e['formats'])
+ if len(entries) == 1:
+ return entries[0]
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
def _get_subtitles(self, episode_id, subs):
@@ -236,54 +299,3 @@ class CeskaTelevizeIE(InfoExtractor):
yield line
return '\r\n'.join(_fix_subtitle(subtitles))
-
-
-class CeskaTelevizePoradyIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P[^/#?]+)'
- _TESTS = [{
- # video with 18+ caution trailer
- 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
- 'info_dict': {
- 'id': '215562210900007-bogotart',
- 'title': 'Queer: Bogotart',
- 'description': 'Alternativní průvodce současným queer světem',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': '61924494876844842',
- 'ext': 'mp4',
- 'title': 'Queer: Bogotart (Varování 18+)',
- 'duration': 10.2,
- },
- }, {
- 'info_dict': {
- 'id': '61924494877068022',
- 'ext': 'mp4',
- 'title': 'Queer: Bogotart (Queer)',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 1558.3,
- },
- }],
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- # iframe embed
- 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- data_url = update_url_query(unescapeHTML(self._search_regex(
- (r']*\bdata-url=(["\'])(?P(?:(?!\1).)+)\1',
- r'